PHYLIPNEW-3.69.650/0002775000175000017500000000000012171071713010350 500000000000000PHYLIPNEW-3.69.650/config.sub0000755000175000017500000010532712171071677012270 00000000000000#! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. timestamp='2012-04-18' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software # can handle that machine. It does not imply ALL GNU software can. # # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Please send patches to . Submit a context # diff and a properly formatted GNU ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. # Each package is responsible for reporting which valid configurations # it does not support. The user should be able to distinguish # a failure to support a valid configuration from a meaningless # configuration. # The goal of this file is to map all the various variations of a given # machine specification into a single specification in the form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM # or in some cases, the newer four-part form: # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM # It is wrong to echo any other type of specification. me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] CPU-MFR-OPSYS $0 [OPTION] ALIAS Canonicalize a configuration name. Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.sub ($timestamp) Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" exit 1 ;; *local*) # First pass through any local machine types. echo $1 exit ;; * ) break ;; esac done case $# in 0) echo "$me: missing argument$help" >&2 exit 1;; 1) ;; *) echo "$me: too many arguments$help" >&2 exit 1;; esac # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ knetbsd*-gnu* | netbsd*-gnu* | \ kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; android-linux) os=-linux-android basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown ;; *) basic_machine=`echo $1 | sed 's/-[^-]*$//'` if [ $basic_machine != $1 ] then os=`echo $1 | sed 's/.*-/-/'` else os=; fi ;; esac ### Let's recognize common machines as not being operating systems so ### that things like config.sub decstation-3100 work. We also ### recognize some manufacturers as not being operating systems, so we ### can provide default operating systems below. case $os in -sun*os*) # Prevent following clause from handling this invalid input. ;; -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ -apple | -axis | -knuth | -cray | -microblaze) os= basic_machine=$1 ;; -bluegene*) os=-cnk ;; -sim | -cisco | -oki | -wec | -winbond) os= basic_machine=$1 ;; -scout) ;; -wrs) os=-vxworks basic_machine=$1 ;; -chorusos*) os=-chorusos basic_machine=$1 ;; -chorusrdb) os=-chorusrdb basic_machine=$1 ;; -hiux*) os=-hiuxwe2 ;; -sco6) os=-sco5v6 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco5) os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco4) os=-sco3.2v4 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco3.2.[4-9]*) os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco3.2v[4-9]*) # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco5v6*) # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -udk*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -isc) os=-isc2.2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -clix*) basic_machine=clipper-intergraph ;; -isc*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -lynx*178) os=-lynxos178 ;; -lynx*5) os=-lynxos5 ;; -lynx*) os=-lynxos ;; -ptx*) basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` ;; -windowsnt*) os=`echo $os | sed -e 's/windowsnt/winnt/'` ;; -psos*) os=-psos ;; -mint | -mint[0-9]*) basic_machine=m68k-atari os=-mint ;; esac # Decode aliases for certain CPU-COMPANY combinations. case $basic_machine in # Recognize the basic CPU types without company name. # Some are omitted here because they have special meanings below. 1750a | 580 \ | a29k \ | aarch64 | aarch64_be \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ | be32 | be64 \ | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ | epiphany \ | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ | maxq | mb | microblaze | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ | mips64octeon | mips64octeonel \ | mips64orion | mips64orionel \ | mips64r5900 | mips64r5900el \ | mips64vr | mips64vrel \ | mips64vr4100 | mips64vr4100el \ | mips64vr4300 | mips64vr4300el \ | mips64vr5000 | mips64vr5000el \ | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ | moxie \ | mt \ | msp430 \ | nds32 | nds32le | nds32be \ | nios | nios2 \ | ns16k | ns32k \ | open8 \ | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ | rl78 | rx \ | score \ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ | spu \ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) basic_machine=$basic_machine-unknown ;; c54x) basic_machine=tic54x-unknown ;; c55x) basic_machine=tic55x-unknown ;; c6x) basic_machine=tic6x-unknown ;; m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) basic_machine=$basic_machine-unknown os=-none ;; m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) ;; ms1) basic_machine=mt-unknown ;; strongarm | thumb | xscale) basic_machine=arm-unknown ;; xgate) basic_machine=$basic_machine-unknown os=-none ;; xscaleeb) basic_machine=armeb-unknown ;; xscaleel) basic_machine=armel-unknown ;; # We use `pc' rather than `unknown' # because (1) that's what they normally are, and # (2) the word "unknown" tends to confuse beginning users. i*86 | x86_64) basic_machine=$basic_machine-pc ;; # Object if more than one company name word. *-*-*) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 ;; # Recognize the basic CPU types with company name. 580-* \ | a29k-* \ | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hexagon-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ | le32-* | le64-* \ | lm32-* \ | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ | mips64octeon-* | mips64octeonel-* \ | mips64orion-* | mips64orionel-* \ | mips64r5900-* | mips64r5900el-* \ | mips64vr-* | mips64vrel-* \ | mips64vr4100-* | mips64vr4100el-* \ | mips64vr4300-* | mips64vr4300el-* \ | mips64vr5000-* | mips64vr5000el-* \ | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ | mt-* \ | msp430-* \ | nds32-* | nds32le-* | nds32be-* \ | nios-* | nios2-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ | pyramid-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ | tron-* \ | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ | ymp-* \ | z8k-* | z80-*) ;; # Recognize the basic CPU types without company name, with glob match. xtensa*) basic_machine=$basic_machine-unknown ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. 386bsd) basic_machine=i386-unknown os=-bsd ;; 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) basic_machine=m68000-att ;; 3b*) basic_machine=we32k-att ;; a29khif) basic_machine=a29k-amd os=-udi ;; abacus) basic_machine=abacus-unknown ;; adobe68k) basic_machine=m68010-adobe os=-scout ;; alliant | fx80) basic_machine=fx80-alliant ;; altos | altos3068) basic_machine=m68k-altos ;; am29k) basic_machine=a29k-none os=-bsd ;; amd64) basic_machine=x86_64-pc ;; amd64-*) basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; amdahl) basic_machine=580-amdahl os=-sysv ;; amiga | amiga-*) basic_machine=m68k-unknown ;; amigaos | amigados) basic_machine=m68k-unknown os=-amigaos ;; amigaunix | amix) basic_machine=m68k-unknown os=-sysv4 ;; apollo68) basic_machine=m68k-apollo os=-sysv ;; apollo68bsd) basic_machine=m68k-apollo os=-bsd ;; aros) basic_machine=i386-pc os=-aros ;; aux) basic_machine=m68k-apple os=-aux ;; balance) basic_machine=ns32k-sequent os=-dynix ;; blackfin) basic_machine=bfin-unknown os=-linux ;; blackfin-*) basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` os=-linux ;; bluegene*) basic_machine=powerpc-ibm os=-cnk ;; c54x-*) basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` ;; c55x-*) basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` ;; c6x-*) basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` ;; c90) basic_machine=c90-cray os=-unicos ;; cegcc) basic_machine=arm-unknown os=-cegcc ;; convex-c1) basic_machine=c1-convex os=-bsd ;; convex-c2) basic_machine=c2-convex os=-bsd ;; convex-c32) basic_machine=c32-convex os=-bsd ;; convex-c34) basic_machine=c34-convex os=-bsd ;; convex-c38) basic_machine=c38-convex os=-bsd ;; cray | j90) basic_machine=j90-cray os=-unicos ;; craynv) basic_machine=craynv-cray os=-unicosmp ;; cr16 | cr16-*) basic_machine=cr16-unknown os=-elf ;; crds | unos) basic_machine=m68k-crds ;; crisv32 | crisv32-* | etraxfs*) basic_machine=crisv32-axis ;; cris | cris-* | etrax*) basic_machine=cris-axis ;; crx) basic_machine=crx-unknown os=-elf ;; da30 | da30-*) basic_machine=m68k-da30 ;; decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) basic_machine=mips-dec ;; decsystem10* | dec10*) basic_machine=pdp10-dec os=-tops10 ;; decsystem20* | dec20*) basic_machine=pdp10-dec os=-tops20 ;; delta | 3300 | motorola-3300 | motorola-delta \ | 3300-motorola | delta-motorola) basic_machine=m68k-motorola ;; delta88) basic_machine=m88k-motorola os=-sysv3 ;; dicos) basic_machine=i686-pc os=-dicos ;; djgpp) basic_machine=i586-pc os=-msdosdjgpp ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx ;; dpx2* | dpx2*-bull) basic_machine=m68k-bull os=-sysv3 ;; ebmon29k) basic_machine=a29k-amd os=-ebmon ;; elxsi) basic_machine=elxsi-elxsi os=-bsd ;; encore | umax | mmax) basic_machine=ns32k-encore ;; es1800 | OSE68k | ose68k | ose | OSE) basic_machine=m68k-ericsson os=-ose ;; fx2800) basic_machine=i860-alliant ;; genix) basic_machine=ns32k-ns ;; gmicro) basic_machine=tron-gmicro os=-sysv ;; go32) basic_machine=i386-pc os=-go32 ;; h3050r* | hiux*) basic_machine=hppa1.1-hitachi os=-hiuxwe2 ;; h8300hms) basic_machine=h8300-hitachi os=-hms ;; h8300xray) basic_machine=h8300-hitachi os=-xray ;; h8500hms) basic_machine=h8500-hitachi os=-hms ;; harris) basic_machine=m88k-harris os=-sysv3 ;; hp300-*) basic_machine=m68k-hp ;; hp300bsd) basic_machine=m68k-hp os=-bsd ;; hp300hpux) basic_machine=m68k-hp os=-hpux ;; hp3k9[0-9][0-9] | hp9[0-9][0-9]) basic_machine=hppa1.0-hp ;; hp9k2[0-9][0-9] | hp9k31[0-9]) basic_machine=m68000-hp ;; hp9k3[2-9][0-9]) basic_machine=m68k-hp ;; hp9k6[0-9][0-9] | hp6[0-9][0-9]) basic_machine=hppa1.0-hp ;; hp9k7[0-79][0-9] | hp7[0-79][0-9]) basic_machine=hppa1.1-hp ;; hp9k78[0-9] | hp78[0-9]) # FIXME: really hppa2.0-hp basic_machine=hppa1.1-hp ;; hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) # FIXME: really hppa2.0-hp basic_machine=hppa1.1-hp ;; hp9k8[0-9][13679] | hp8[0-9][13679]) basic_machine=hppa1.1-hp ;; hp9k8[0-9][0-9] | hp8[0-9][0-9]) basic_machine=hppa1.0-hp ;; hppa-next) os=-nextstep3 ;; hppaosf) basic_machine=hppa1.1-hp os=-osf ;; hppro) basic_machine=hppa1.1-hp os=-proelf ;; i370-ibm* | ibm*) basic_machine=i370-ibm ;; i*86v32) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv32 ;; i*86v4*) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv4 ;; i*86v) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv ;; i*86sol2) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-solaris2 ;; i386mach) basic_machine=i386-mach os=-mach ;; i386-vsta | vsta) basic_machine=i386-unknown os=-vsta ;; iris | iris4d) basic_machine=mips-sgi case $os in -irix*) ;; *) os=-irix4 ;; esac ;; isi68 | isi) basic_machine=m68k-isi os=-sysv ;; m68knommu) basic_machine=m68k-unknown os=-linux ;; m68knommu-*) basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` os=-linux ;; m88k-omron*) basic_machine=m88k-omron ;; magnum | m3230) basic_machine=mips-mips os=-sysv ;; merlin) basic_machine=ns32k-utek os=-sysv ;; microblaze) basic_machine=microblaze-xilinx ;; mingw32) basic_machine=i386-pc os=-mingw32 ;; mingw32ce) basic_machine=arm-unknown os=-mingw32ce ;; miniframe) basic_machine=m68000-convergent ;; *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) basic_machine=m68k-atari os=-mint ;; mips3*-*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` ;; mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; monitor) basic_machine=m68k-rom68k os=-coff ;; morphos) basic_machine=powerpc-unknown os=-morphos ;; msdos) basic_machine=i386-pc os=-msdos ;; ms1-*) basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` ;; msys) basic_machine=i386-pc os=-msys ;; mvs) basic_machine=i370-ibm os=-mvs ;; nacl) basic_machine=le32-unknown os=-nacl ;; ncr3000) basic_machine=i486-ncr os=-sysv4 ;; netbsd386) basic_machine=i386-unknown os=-netbsd ;; netwinder) basic_machine=armv4l-rebel os=-linux ;; news | news700 | news800 | news900) basic_machine=m68k-sony os=-newsos ;; news1000) basic_machine=m68030-sony os=-newsos ;; news-3600 | risc-news) basic_machine=mips-sony os=-newsos ;; necv70) basic_machine=v70-nec os=-sysv ;; next | m*-next ) basic_machine=m68k-next case $os in -nextstep* ) ;; -ns2*) os=-nextstep2 ;; *) os=-nextstep3 ;; esac ;; nh3000) basic_machine=m68k-harris os=-cxux ;; nh[45]000) basic_machine=m88k-harris os=-cxux ;; nindy960) basic_machine=i960-intel os=-nindy ;; mon960) basic_machine=i960-intel os=-mon960 ;; nonstopux) basic_machine=mips-compaq os=-nonstopux ;; np1) basic_machine=np1-gould ;; neo-tandem) basic_machine=neo-tandem ;; nse-tandem) basic_machine=nse-tandem ;; nsr-tandem) basic_machine=nsr-tandem ;; op50n-* | op60c-*) basic_machine=hppa1.1-oki os=-proelf ;; openrisc | openrisc-*) basic_machine=or32-unknown ;; os400) basic_machine=powerpc-ibm os=-os400 ;; OSE68000 | ose68000) basic_machine=m68000-ericsson os=-ose ;; os68k) basic_machine=m68k-none os=-os68k ;; pa-hitachi) basic_machine=hppa1.1-hitachi os=-hiuxwe2 ;; paragon) basic_machine=i860-intel os=-osf ;; parisc) basic_machine=hppa-unknown os=-linux ;; parisc-*) basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` os=-linux ;; pbd) basic_machine=sparc-tti ;; pbb) basic_machine=m68k-tti ;; pc532 | pc532-*) basic_machine=ns32k-pc532 ;; pc98) basic_machine=i386-pc ;; pc98-*) basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc ;; pentiumpro | p6 | 6x86 | athlon | athlon_*) basic_machine=i686-pc ;; pentiumii | pentium2 | pentiumiii | pentium3) basic_machine=i686-pc ;; pentium4) basic_machine=i786-pc ;; pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentiumpro-* | p6-* | 6x86-* | athlon-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentium4-*) basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pn) basic_machine=pn-gould ;; power) basic_machine=power-ibm ;; ppc | ppcbe) basic_machine=powerpc-unknown ;; ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppc64) basic_machine=powerpc64-unknown ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppc64le | powerpc64little | ppc64-le | powerpc64-little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ps2) basic_machine=i386-ibm ;; pw32) basic_machine=i586-unknown os=-pw32 ;; rdos) basic_machine=i386-pc os=-rdos ;; rom68k) basic_machine=m68k-rom68k os=-coff ;; rm[46]00) basic_machine=mips-siemens ;; rtpc | rtpc-*) basic_machine=romp-ibm ;; s390 | s390-*) basic_machine=s390-ibm ;; s390x | s390x-*) basic_machine=s390x-ibm ;; sa29200) basic_machine=a29k-amd os=-udi ;; sb1) basic_machine=mipsisa64sb1-unknown ;; sb1el) basic_machine=mipsisa64sb1el-unknown ;; sde) basic_machine=mipsisa32-sde os=-elf ;; sei) basic_machine=mips-sei os=-seiux ;; sequent) basic_machine=i386-sequent ;; sh) basic_machine=sh-hitachi os=-hms ;; sh5el) basic_machine=sh5le-unknown ;; sh64) basic_machine=sh64-unknown ;; sparclite-wrs | simso-wrs) basic_machine=sparclite-wrs os=-vxworks ;; sps7) basic_machine=m68k-bull os=-sysv2 ;; spur) basic_machine=spur-unknown ;; st2000) basic_machine=m68k-tandem ;; stratus) basic_machine=i860-stratus os=-sysv4 ;; strongarm-* | thumb-*) basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` ;; sun2) basic_machine=m68000-sun ;; sun2os3) basic_machine=m68000-sun os=-sunos3 ;; sun2os4) basic_machine=m68000-sun os=-sunos4 ;; sun3os3) basic_machine=m68k-sun os=-sunos3 ;; sun3os4) basic_machine=m68k-sun os=-sunos4 ;; sun4os3) basic_machine=sparc-sun os=-sunos3 ;; sun4os4) basic_machine=sparc-sun os=-sunos4 ;; sun4sol2) basic_machine=sparc-sun os=-solaris2 ;; sun3 | sun3-*) basic_machine=m68k-sun ;; sun4) basic_machine=sparc-sun ;; sun386 | sun386i | roadrunner) basic_machine=i386-sun ;; sv1) basic_machine=sv1-cray os=-unicos ;; symmetry) basic_machine=i386-sequent os=-dynix ;; t3e) basic_machine=alphaev5-cray os=-unicos ;; t90) basic_machine=t90-cray os=-unicos ;; tile*) basic_machine=$basic_machine-unknown os=-linux-gnu ;; tx39) basic_machine=mipstx39-unknown ;; tx39el) basic_machine=mipstx39el-unknown ;; toad1) basic_machine=pdp10-xkl os=-tops20 ;; tower | tower-32) basic_machine=m68k-ncr ;; tpf) basic_machine=s390x-ibm os=-tpf ;; udi29k) basic_machine=a29k-amd os=-udi ;; ultra3) basic_machine=a29k-nyu os=-sym1 ;; v810 | necv810) basic_machine=v810-nec os=-none ;; vaxv) basic_machine=vax-dec os=-sysv ;; vms) basic_machine=vax-dec os=-vms ;; vpp*|vx|vx-*) basic_machine=f301-fujitsu ;; vxworks960) basic_machine=i960-wrs os=-vxworks ;; vxworks68) basic_machine=m68k-wrs os=-vxworks ;; vxworks29k) basic_machine=a29k-wrs os=-vxworks ;; w65*) basic_machine=w65-wdc os=-none ;; w89k-*) basic_machine=hppa1.1-winbond os=-proelf ;; xbox) basic_machine=i686-pc os=-mingw32 ;; xps | xps100) basic_machine=xps100-honeywell ;; xscale-* | xscalee[bl]-*) basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` ;; ymp) basic_machine=ymp-cray os=-unicos ;; z8k-*-coff) basic_machine=z8k-unknown os=-sim ;; z80-*-coff) basic_machine=z80-unknown os=-sim ;; none) basic_machine=none-none os=-none ;; # Here we handle the default manufacturer of certain CPU types. It is in # some cases the only manufacturer, in others, it is the most popular. w89k) basic_machine=hppa1.1-winbond ;; op50n) basic_machine=hppa1.1-oki ;; op60c) basic_machine=hppa1.1-oki ;; romp) basic_machine=romp-ibm ;; mmix) basic_machine=mmix-knuth ;; rs6000) basic_machine=rs6000-ibm ;; vax) basic_machine=vax-dec ;; pdp10) # there are many clones, so DEC is not a safe bet basic_machine=pdp10-unknown ;; pdp11) basic_machine=pdp11-dec ;; we32k) basic_machine=we32k-att ;; sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) basic_machine=sparc-sun ;; cydra) basic_machine=cydra-cydrome ;; orion) basic_machine=orion-highlevel ;; orion105) basic_machine=clipper-highlevel ;; mac | mpw | mac-mpw) basic_machine=m68k-apple ;; pmac | pmac-mpw) basic_machine=powerpc-apple ;; *-unknown) # Make sure to match an already-canonicalized machine name. ;; *) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 ;; esac # Here we canonicalize certain aliases for manufacturers. case $basic_machine in *-digital*) basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` ;; *-commodore*) basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` ;; *) ;; esac # Decode manufacturer-specific aliases for certain operating systems. if [ x"$os" != x"" ] then case $os in # First match some system type aliases # that might get confused with valid system types. # -solaris* is a basic system type, with this one exception. -auroraux) os=-auroraux ;; -solaris1 | -solaris1.*) os=`echo $os | sed -e 's|solaris1|sunos4|'` ;; -solaris) os=-solaris2 ;; -svr4*) os=-sysv4 ;; -unixware*) os=-sysv4.2uw ;; -gnu/linux*) os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` ;; # First accept the basic system types. # The portable systems comes first. # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ | -sym* | -kopensolaris* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ | -mingw32* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) case $basic_machine in x86-* | i*86-*) ;; *) os=-nto$os ;; esac ;; -nto-qnx*) ;; -nto*) os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; -linux-dietlibc) os=-linux-dietlibc ;; -linux*) os=`echo $os | sed -e 's|linux|linux-gnu|'` ;; -sunos5*) os=`echo $os | sed -e 's|sunos5|solaris2|'` ;; -sunos6*) os=`echo $os | sed -e 's|sunos6|solaris3|'` ;; -opened*) os=-openedition ;; -os400*) os=-os400 ;; -wince*) os=-wince ;; -osfrose*) os=-osfrose ;; -osf*) os=-osf ;; -utek*) os=-bsd ;; -dynix*) os=-bsd ;; -acis*) os=-aos ;; -atheos*) os=-atheos ;; -syllable*) os=-syllable ;; -386bsd) os=-bsd ;; -ctix* | -uts*) os=-sysv ;; -nova*) os=-rtmk-nova ;; -ns2 ) os=-nextstep2 ;; -nsk*) os=-nsk ;; # Preserve the version number of sinix5. -sinix5.*) os=`echo $os | sed -e 's|sinix|sysv|'` ;; -sinix*) os=-sysv4 ;; -tpf*) os=-tpf ;; -triton*) os=-sysv3 ;; -oss*) os=-sysv3 ;; -svr4) os=-sysv4 ;; -svr3) os=-sysv3 ;; -sysvr4) os=-sysv4 ;; # This must come after -sysvr4. -sysv*) ;; -ose*) os=-ose ;; -es1800*) os=-ose ;; -xenix) os=-xenix ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) os=-mint ;; -aros*) os=-aros ;; -kaos*) os=-kaos ;; -zvmoe) os=-zvmoe ;; -dicos*) os=-dicos ;; -nacl*) ;; -none) ;; *) # Get rid of the `-' at the beginning of $os. os=`echo $os | sed 's/[^-]*-//'` echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 exit 1 ;; esac else # Here we handle the default operating systems that come with various machines. # The value should be what the vendor currently ships out the door with their # machine or put another way, the most popular os provided with the machine. # Note that if you're going to try to match "-MANUFACTURER" here (say, # "-sun"), then you have to tell the case statement up towards the top # that MANUFACTURER isn't an operating system. Otherwise, code above # will signal an error saying that MANUFACTURER isn't an operating # system, and we'll never get to this point. case $basic_machine in score-*) os=-elf ;; spu-*) os=-elf ;; *-acorn) os=-riscix1.2 ;; arm*-rebel) os=-linux ;; arm*-semi) os=-aout ;; c4x-* | tic4x-*) os=-coff ;; hexagon-*) os=-elf ;; tic54x-*) os=-coff ;; tic55x-*) os=-coff ;; tic6x-*) os=-coff ;; # This must come before the *-dec entry. pdp10-*) os=-tops20 ;; pdp11-*) os=-none ;; *-dec | vax-*) os=-ultrix4.2 ;; m68*-apollo) os=-domain ;; i386-sun) os=-sunos4.0.2 ;; m68000-sun) os=-sunos3 ;; m68*-cisco) os=-aout ;; mep-*) os=-elf ;; mips*-cisco) os=-elf ;; mips*-*) os=-elf ;; or32-*) os=-coff ;; *-tti) # must be before sparc entry or we get the wrong os. os=-sysv3 ;; sparc-* | *-sun) os=-sunos4.1.1 ;; *-be) os=-beos ;; *-haiku) os=-haiku ;; *-ibm) os=-aix ;; *-knuth) os=-mmixware ;; *-wec) os=-proelf ;; *-winbond) os=-proelf ;; *-oki) os=-proelf ;; *-hp) os=-hpux ;; *-hitachi) os=-hiux ;; i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) os=-sysv ;; *-cbm) os=-amigaos ;; *-dg) os=-dgux ;; *-dolphin) os=-sysv3 ;; m68k-ccur) os=-rtu ;; m88k-omron*) os=-luna ;; *-next ) os=-nextstep ;; *-sequent) os=-ptx ;; *-crds) os=-unos ;; *-ns) os=-genix ;; i370-*) os=-mvs ;; *-next) os=-nextstep3 ;; *-gould) os=-sysv ;; *-highlevel) os=-bsd ;; *-encore) os=-bsd ;; *-sgi) os=-irix ;; *-siemens) os=-sysv4 ;; *-masscomp) os=-rtu ;; f30[01]-fujitsu | f700-fujitsu) os=-uxpv ;; *-rom68k) os=-coff ;; *-*bug) os=-coff ;; *-apple) os=-macos ;; *-atari*) os=-mint ;; *) os=-none ;; esac fi # Here we handle the case where we know the os, and the CPU type, but not the # manufacturer. We pick the logical manufacturer. vendor=unknown case $basic_machine in *-unknown) case $os in -riscix*) vendor=acorn ;; -sunos*) vendor=sun ;; -cnk*|-aix*) vendor=ibm ;; -beos*) vendor=be ;; -hpux*) vendor=hp ;; -mpeix*) vendor=hp ;; -hiux*) vendor=hitachi ;; -unos*) vendor=crds ;; -dgux*) vendor=dg ;; -luna*) vendor=omron ;; -genix*) vendor=ns ;; -mvs* | -opened*) vendor=ibm ;; -os400*) vendor=ibm ;; -ptx*) vendor=sequent ;; -tpf*) vendor=ibm ;; -vxsim* | -vxworks* | -windiss*) vendor=wrs ;; -aux*) vendor=apple ;; -hms*) vendor=hitachi ;; -mpw* | -macos*) vendor=apple ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) vendor=atari ;; -vos*) vendor=stratus ;; esac basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` ;; esac echo $basic_machine$os exit # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: PHYLIPNEW-3.69.650/src/0002775000175000017500000000000012171071712011136 500000000000000PHYLIPNEW-3.69.650/src/retree.c0000664000175000017500000023375511305225544012527 00000000000000 #include "phylip.h" #include "moves.h" /* version 3.6. (c) Copyright 1993-2008 by the University of Washington. Written by Joseph Felsenstein and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* maximum number of species */ #define maxsp 5000 /* size of pointer array. >= 2*maxsp - 1 */ /* (this can be large without eating memory */ #define maxsz 9999 #define overr 4 #define which 1 AjPPhyloTree* phylotrees = NULL; typedef enum {valid, remoov, quit} reslttype; typedef enum { horiz, vert, up, updel, ch_over, upcorner, midcorner, downcorner, aa, cc, gg, tt, deleted } chartype; typedef struct treeset_t { node *root; pointarray nodep; long nonodes; boolean waswritten, hasmult, haslengths, nolengths, initialized; } treeset_t; treeset_t treesets[2]; treeset_t simplifiedtree; typedef enum { arb, use, spec } howtree; typedef enum {beforenode, atnode} movet; movet fromtype; #ifndef OLDC /* function prototypes */ void initretreenode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void gdispose(node *); void maketriad(node **, long); void maketip(node **, long); void copynode(node *, node *); node *copytrav(node *); void copytree(void); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void configure(void); void prefix(chartype); void postfix(chartype); void ltrav(node *, boolean *); boolean ifhaslengths(void); void add_at(node *, node *, node *); void add_before(node *, node *); void add_child(node *, node *); void re_move(node **, node **); void reroot(node *); void ltrav_(node *, double, double, double *, long *, long *); void precoord(node *, boolean *, double *, long *); void coordinates(node *, double, long *, long *, double *); void flatcoordinates(node *, long *); void grwrite(chartype, long, long *); void drawline(long, node *, boolean *); void printree(void); void togglelengths(void); void arbitree(void); void yourtree(void); void buildtree(void); void unbuildtree(void); void retree_help(void); void consolidatetree(long); void rearrange(void); boolean any_deleted(node *); void fliptrav(node *, boolean); void flip(long); void transpose(long); void ifdeltrav(node *, boolean *); double oltrav(node *); void outlength(void); void midpoint(void); void deltrav(node *, boolean ); void reg_del(node *, boolean); boolean isdeleted(long); void deletebranch(void); void restorebranch(void); void del_or_restore(void); void undo(void); void treetrav(node *); void simcopynode(node *, node *); node *simcopytrav(node *); void simcopytree(void); void writebranchlength(double); void treeout(node *, boolean, double, long); void maketemptriad(node **, long); void roottreeout(boolean *); void notrootedtorooted(void); void rootedtonotrooted(void); void treewrite(boolean *); void retree_window(adjwindow); void getlength(double *, reslttype *, boolean *); void changelength(void); void changename(void); void clade(void); void changeoutgroup(void); void redisplay(void); void treeconstruct(void); void fill_del(node*p); /* function prototypes */ #endif node *root, *garbage; long nonodes, outgrno, screenwidth, vscreenwidth, screenlines, col, treenumber, leftedge, topedge, treelines, hscroll, vscroll, scrollinc, whichtree, othertree, numtrees, treesread; double trweight; boolean waswritten, onfirsttree, hasmult, haslengths, nolengths, nexus, xmltree; node **treeone, **treetwo; pointarray nodep; /* pointers to all nodes in current tree */ node *grbg; boolean reversed[14]; boolean graphic[14]; unsigned char cch[14]; howtree how; char intreename[FNMLNGTH]; const char* outtreename; AjPFile embossouttree; boolean subtree, written, readnext; node *nuroot; Char ch; boolean delarray[maxsz]; void initretreenode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ long i; boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; (*p)->deleted=false; (*p)->deadend=false; (*p)->onebranch=false; (*p)->onebranchhaslength=false; for (i=0;inayme[i] = '\0'; nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); (*p)->index = nodei; break; case hslength: if ((*p)->back) { (*p)->back->back = *p; (*p)->haslength = (*p)->back->haslength; if ((*p)->haslength) (*p)->length = (*p)->back->length; } break; case tip: (*ntips)++; gnu(grbg, p); nodep[(*ntips) - 1] = *p; (*p)->index = *ntips; (*p)->tip = true; (*p)->hasname = true; strncpy ((*p)->nayme, str, MAXNCH); break; case length: (*p)->haslength = true; if ((*p)->back != NULL) (*p)->back->haslength = (*p)->haslength; processlength(&valyew, &divisor, ch, &minusread, treestr, parens); if (!minusread) (*p)->length = valyew / divisor; else (*p)->length = 0.0; (*p)->back = q; if (haslengths && q != NULL) { (*p)->back->haslength = (*p)->haslength; (*p)->back->length = (*p)->length; } break; case hsnolength: haslengths = (haslengths && q == NULL); (*p)->haslength = false; (*p)->back = q; break; default: /*cases iter, treewt, unttrwt */ break; /*should not occur */ } } /* initretreenode */ void gdispose(node *p) { /* go through tree throwing away nodes */ node *q, *r; if (p->tip) return; q = p->next; while (q != p) { gdispose(q->back); q->tip = false; q->hasname = false; q->haslength = false; r = q; q = q->next; chuck(&grbg, r); } q->tip = false; q->hasname = false; q->haslength = false; chuck(&grbg, q); } /* gdispose */ void maketriad(node **p, long index) { /* Initiate an internal node with stubs for two children */ long i, j; node *q; q = NULL; for (i = 1; i <= 3; i++) { gnu(&grbg, p); (*p)->index = index; (*p)->hasname = false; (*p)->haslength = false; (*p)->deleted=false; (*p)->deadend=false; (*p)->onebranch=false; (*p)->onebranchhaslength=false; for (j=0;jnayme[j] = '\0'; (*p)->next = q; q = *p; } (*p)->next->next->next = *p; q = (*p)->next; while (*p != q) { (*p)->back = NULL; (*p)->tip = false; *p = (*p)->next; } nodep[index - 1] = *p; } /* maketriad */ void maketip(node **p, long index) { /* Initiate a tip node */ gnu(&grbg, p); (*p)->index = index; (*p)->tip = true; (*p)->hasname = false; (*p)->haslength = false; nodep[index - 1] = *p; } /* maketip */ void copynode(node *fromnode, node *tonode) { /* Copy the contents of a node from fromnode to tonode. */ int i; tonode->index = fromnode->index; tonode->deleted = fromnode->deleted; tonode->tip = fromnode->tip; tonode->hasname = fromnode->hasname; if (fromnode->hasname) for (i=0;inayme[i] = fromnode->nayme[i]; tonode->haslength = fromnode->haslength; if (fromnode->haslength) tonode->length = fromnode->length; } /* copynode */ node *copytrav(node *p) { /* Traverse the tree from p on down, copying nodes to the other tree */ node *q, *newnode, *newnextnode, *temp; gnu(&grbg, &newnode); copynode(p,newnode); if (nodep[p->index-1] == p) treesets[othertree].nodep[p->index-1] = newnode; /* if this is a tip, return now */ if (p->tip) return newnode; /* go around the ring, copying as we go */ q = p->next; gnu(&grbg, &newnextnode); copynode(q, newnextnode); newnode->next = newnextnode; do { newnextnode->back = copytrav(q->back); newnextnode->back->back = newnextnode; q = q->next; if (q == p) newnextnode->next = newnode; else { temp = newnextnode; gnu(&grbg, &newnextnode); copynode(q, newnextnode); temp->next = newnextnode; } } while (q != p); return newnode; } /* copytrav */ void copytree() { /* Make a complete copy of the current tree for undo purposes */ if (whichtree == 1) othertree = 0; else othertree = 1; treesets[othertree].root = copytrav(root); treesets[othertree].nonodes = nonodes; treesets[othertree].waswritten = waswritten; treesets[othertree].hasmult = hasmult; treesets[othertree].haslengths = haslengths; treesets[othertree].nolengths = nolengths; treesets[othertree].initialized = true; } /* copytree */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr initialtree = NULL; AjPStr format = NULL; how = use; outgrno = 1; onfirsttree = true; screenlines = 24; screenwidth = 80; vscreenwidth = 80; nexus = false; xmltree = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylotrees = ajAcdGetTree("intreefile"); if(ajStrMatchC(initialtree, "a")) how = arb; else if(ajStrMatchC(initialtree, "u")) how = use; else if(ajStrMatchC(initialtree, "s")) how = spec; format = ajAcdGetListSingle("format"); if(ajStrMatchC(format, "n")) nexus = true; else if(ajStrMatchC(format, "x")) xmltree = true; screenwidth = ajAcdGetInt("screenwidth"); vscreenwidth = ajAcdGetInt("vscreenwidth"); screenlines = ajAcdGetInt("screenlines"); if (scrollinc < screenwidth / 2.0) hscroll = scrollinc; else hscroll = screenwidth / 2; if (scrollinc < screenlines / 2.0) vscroll = scrollinc; else vscroll = screenlines / 2; embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } /* emboss_getoptions */ void configure() { /* configure to machine -- set up special characters */ chartype a; for (a = horiz; (long)a <= (long)deleted; a = (chartype)((long)a + 1)) reversed[(long)a] = false; for (a = horiz; (long)a <= (long)deleted; a = (chartype)((long)a + 1)) graphic[(long)a] = false; cch[(long)deleted] = '.'; cch[(long)updel] = ':'; if (ibmpc) { cch[(long)horiz] = '>'; cch[(long)vert] = 186; graphic[(long)vert] = true; cch[(long)up] = 186; graphic[(long)up] = true; cch[(long)ch_over] = 205; graphic[(long)ch_over] = true; cch[(long)upcorner] = 200; graphic[(long)upcorner] = true; cch[(long)midcorner] = 204; graphic[(long)midcorner] = true; cch[(long)downcorner] = 201; graphic[(long)downcorner] = true; return; } if (ansi) { cch[(long)horiz] = '>'; cch[(long)vert] = cch[(long)horiz]; reversed[(long)vert] = true; cch[(long)up] = 'x'; graphic[(long)up] = true; cch[(long)ch_over] = 'q'; graphic[(long)ch_over] = true; cch[(long)upcorner] = 'm'; graphic[(long)upcorner] = true; cch[(long)midcorner] = 't'; graphic[(long)midcorner] = true; cch[(long)downcorner] = 'l'; graphic[(long)downcorner] = true; return; } cch[(long)horiz] = '>'; cch[(long)vert] = ' '; cch[(long)up] = '!'; cch[(long)upcorner] = '`'; cch[(long)midcorner] = '+'; cch[(long)downcorner] = ','; cch[(long)ch_over] = '-'; } /* configure */ void prefix(chartype a) { /* give prefix appropriate for this character */ if (reversed[(long)a]) prereverse(ansi); if (graphic[(long)a]) pregraph2(ansi); } /* prefix */ void postfix(chartype a) { /* give postfix appropriate for this character */ if (reversed[(long)a]) postreverse(ansi); if (graphic[(long)a]) postgraph2(ansi); } /* postfix */ void ltrav(node *p, boolean *localhl) { /* Traversal function for ifhaslengths() */ node *q; if (p->tip) { (*localhl) = ((*localhl) && p->haslength); return; } q = p->next; do { (*localhl) = ((*localhl) && q->haslength); if ((*localhl)) ltrav(q->back, localhl); q = q->next; } while (p != q); } /* ltrav */ boolean ifhaslengths() { /* return true if every branch in tree has a length */ boolean localhl; localhl = true; ltrav(root, &localhl); return localhl; } /* ifhaslengths */ void add_at(node *below, node *newtip, node *newfork) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant */ node *leftdesc, *rtdesc; double length; if (below != nodep[below->index - 1]) below = nodep[below->index - 1]; if (newfork == NULL) { nonodes++; maketriad (&newfork, nonodes); if (haslengths) { newfork->haslength = true; newfork->next->haslength = true; newfork->next->next->haslength = true; } } if (below->back != NULL) { below->back->back = newfork; } newfork->back = below->back; leftdesc = newtip; rtdesc = below; rtdesc->back = newfork->next->next; newfork->next->next->back = rtdesc; newfork->next->back = leftdesc; leftdesc->back = newfork->next; if (root == below) root = newfork; root->back = NULL; if (!haslengths) return; if (newfork->back != NULL) { length = newfork->back->length / 2.0; newfork->length = length; newfork->back->length = length; below->length = length; below->back->length = length; } else { length = newtip->length / 2.0; newtip->length = length; newtip->back->length = length; below->length = length; below->back->length = length; below->haslength = true; } newtip->back->length = newtip->length; } /* add_at */ void add_before(node *atnode, node *newtip) { /* inserts the node newtip together with its ancestral fork into the tree next to the node atnode. */ /*xx ?? debug what to do if no ancestral node -- have to create one */ /*xx this case is handled by add_at. However, add_at does not account for when there is more than one sibling for the relocated newtip */ node *q; if (atnode != nodep[atnode->index - 1]) atnode = nodep[atnode->index - 1]; q = nodep[newtip->index-1]->back; if (q != NULL) { q = nodep[q->index-1]; if (newtip == q->next->next->back) { q->next->back = newtip; newtip->back = q->next; q->next->next->back = NULL; } } if (newtip->back != NULL) { add_at(atnode, newtip, nodep[newtip->back->index-1]); } else { add_at(atnode, newtip, NULL); } } /* add_before */ void add_child(node *parent, node *newchild) { /* adds the node newchild into the tree as the last child of parent */ int i; node *newnode, *q; if (parent != nodep[parent->index - 1]) parent = nodep[parent->index - 1]; gnu(&grbg, &newnode); newnode->tip = false; newnode->deleted=false; newnode->deadend=false; newnode->onebranch=false; newnode->onebranchhaslength=false; for (i=0;inayme[i] = '\0'; newnode->index = parent->index; q = parent; do { q = q->next; } while (q->next != parent); newnode->next = parent; q->next = newnode; newnode->back = newchild; newchild->back = newnode; if (newchild->haslength) { newnode->length = newchild->length; newnode->haslength = true; } else newnode->haslength = false; } /* add_child */ void re_move(node **item, node **fork) { /* Removes node item from the tree. If item has one sibling, removes its ancestor, fork, from the tree as well and attach item's sib to fork's ancestor. In this case, it returns a pointer to the removed fork node which is still attached to item. */ node *p =NULL, *q; int nodecount; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = nodep[(*item)->back->index - 1]; nodecount = 0; if ((*fork)->next->back == *item) p = *fork; q = (*fork)->next; do { nodecount++; if (q->next->back == *item) p = q; q = q->next; } while (*fork != q); if (nodecount > 2) { fromtype = atnode; p->next = (*item)->back->next; chuck(&grbg, (*item)->back); (*item)->back = NULL; /*xx*/ *fork = NULL; } else { /* traditional (binary tree) remove code */ if (*item == (*fork)->next->back) { if (root == *fork) root = (*fork)->next->next->back; } else { if (root == *fork) root = (*fork)->next->back; } fromtype = beforenode; /* stitch nodes together, leaving out item */ p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; if (haslengths) { if (p != NULL && q != NULL) { p->length += q->length; q->length = p->length; } else (*item)->length = (*fork)->next->length + (*fork)->next->next->length; } (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; } /* endif nodecount > 2 else */ } /* re_move */ void reroot(node *outgroup) { /* Reorient tree so that outgroup is by itself on the left of the root */ node *p, *q, *r; long nodecount = 0; double templen; q = root->next; do { /* when this loop exits, p points to the internal */ p = q; /* node to the right of root */ nodecount++; q = p->next; } while (q != root); r = p; /* There is no point in proceeding if 1. outgroup is a child of root, and 2. the tree bifurcates at the root. */ if((outgroup->back->index == root->index) && !(nodecount > 2)) return; /* reorient nodep array The nodep array must point to the ring member of each ring that is closest to the root. The while loop changes the ring member pointed to by nodep[] for those nodes that will have their orientation changed by the reroot operation. */ p = outgroup->back; while (p->index != root->index) { q = nodep[p->index - 1]->back; nodep[p->index - 1] = p; p = q; } if (nodecount > 2) nodep[p->index - 1] = p; /* If nodecount > 2, the current node ring to which root is pointing will remain in place and root will point somewhere else. */ /* detach root from old location */ if (nodecount > 2) { r->next = root->next; root->next = NULL; nonodes++; maketriad(&root, nonodes); if (haslengths) { /* root->haslength remains false, or else treeout() will generate a bogus extra length */ root->next->haslength = true; root->next->next->haslength = true; } } else { /* if (nodecount > 2) else */ q = root->next; q->back->back = r->back; r->back->back = q->back; if (haslengths) { r->back->length = r->back->length + q->back->length; q->back->length = r->back->length; } } /* if (nodecount > 2) endif */ /* tie root into new location */ root->next->back = outgroup; root->next->next->back = outgroup->back; outgroup->back->back = root->next->next; outgroup->back = root->next; /* place root equidistant between left child (outgroup) and right child by dividing outgroup's length */ if (haslengths) { templen = outgroup->length / 2.0; outgroup->length = templen; outgroup->back->length = templen; root->next->next->length = templen; root->next->next->back->length = templen; } } /* reroot */ void ltrav_(node *p, double lengthsum, double lmin, double *tipmax, long *across, long *maxchar) { node *q; long rchar, nl; double sublength; if (p->tip) { if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; if (lmin == 0.0) return; rchar = (long)(lengthsum / (*tipmax) * (*across) + 0.5); nl = strlen(nodep[p->index - 1]->nayme); if (rchar + nl > (*maxchar)) (*across) = (*maxchar) - (long)(nl * (*tipmax) / lengthsum + 0.5); return; } q = p->next; do { if (q->length >= lmin) sublength = q->length; else sublength = lmin; ltrav_(q->back, lengthsum + sublength, lmin, tipmax, across, maxchar); q = q->next; } while (p != q); } /* ltrav */ void precoord(node *nuroot,boolean *subtree,double *tipmax,long *across) { /* set tipmax and across so that tree is scaled to screenwidth */ double oldtipmax, minimum; long i, maxchar; (*tipmax) = 0.0; if ((*subtree)) maxchar = vscreenwidth - 13; else maxchar = vscreenwidth - 5; (*across) = maxchar; ltrav_(nuroot, 0.0, 0.0, tipmax, across, &maxchar); i = 0; do { oldtipmax = (*tipmax); minimum = 3.0 / (*across) * (*tipmax); ltrav_(nuroot, 0.0, minimum, tipmax, across, &maxchar); i++; } while (fabs((*tipmax) - oldtipmax) > 0.01 * oldtipmax && i <= 40); } /* precoord */ void coordinates(node *p, double lengthsum, long *across, long *tipy, double *tipmax) { /* establishes coordinates of nodes for display with lengths */ node *q, *first, *last; if (p->tip) { p->xcoord = (long)((*across) * lengthsum / (*tipmax) + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; return; } q = p->next; do { coordinates(q->back, lengthsum + q->length, across, tipy, tipmax); q = q->next; } while (p != q); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)((*across) * lengthsum / (*tipmax) + 0.5); if (p == root) { if (root->next->next->next == root) p->ycoord = (first->ycoord + last->ycoord) / 2; else p->ycoord = p->next->next->back->ycoord; } else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* coordinates */ void flatcoordinates(node *p, long *tipy) { /* establishes coordinates of nodes for display without lengths */ node *q, *first, *last; if (p->tip) { p->xcoord = 0; p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; return; } q = p->next; do { flatcoordinates(q->back, tipy); q = q->next; } while (p != q); first = p->next->back; q = p->next; while (q->next != p) q = q->next; last = q->back; p->xcoord = (last->ymax - first->ymin) * 3 / 2; if (p == root) { if (root->next->next->next == root) p->ycoord = (first->ycoord + last->ycoord) / 2; else p->ycoord = p->next->next->back->ycoord; } else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* flatcoordinates */ void grwrite(chartype c, long num, long *pos) { long i; prefix(c); for (i = 1; i <= num; i++) { if ((*pos) >= leftedge && (*pos) - leftedge + 1 < screenwidth) putchar(cch[(long)c]); (*pos)++; } postfix(c); } /* grwrite */ void drawline(long i, node *nuroot, boolean *subtree) { /* draws one row of the tree diagram by moving up tree */ long pos; node *p, *q, *r, *s, *first =NULL, *last =NULL; long n, j; long up_nondel, down_nondel; boolean extra, done; chartype c, d; pos = 1; p = nuroot; q = nuroot; extra = false; if (i == (long)p->ycoord && (p == root || (*subtree))) { c = ch_over; if ((*subtree)) stwrite("Subtree:", 8, &pos, leftedge, screenwidth); if (p->index >= 100) nnwrite(p->index, 3, &pos, leftedge, screenwidth); else if (p->index >= 10) { grwrite(c, 1, &pos); nnwrite(p->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(p->index, 1, &pos, leftedge, screenwidth); } extra = true; } else { if ((*subtree)) stwrite(" ", 10, &pos, leftedge, screenwidth); else stwrite(" ", 2, &pos, leftedge, screenwidth); } do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); if (haslengths && !nolengths) n = (long)(q->xcoord - p->xcoord); else n = (long)(p->xcoord - q->xcoord); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { c = ch_over; if (!haslengths && !q->haslength) c = horiz; if (q->deleted) c = deleted; if (q == first) d = downcorner; else if (q == last) d = upcorner; else if ((long)q->ycoord == (long)p->ycoord) d = c; else d = midcorner; if (n > 1 || q->tip) { grwrite(d, 1, &pos); grwrite(c, n - 3, &pos); } if (q->index >= 100) nnwrite(q->index, 3, &pos, leftedge, screenwidth); else if (q->index >= 10) { grwrite(c, 1, &pos); nnwrite(q->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(q->index, 1, &pos, leftedge, screenwidth); } extra = true; } else if (!q->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { c = up; if(p->deleted) c = updel; if (!p->tip) { up_nondel = 0; down_nondel = 0; r = p->next; do { s = r->back; if ((long)s->ycoord < (long)p->ycoord && !s->deleted) up_nondel = (long)s->ycoord; if (s->ycoord > p->ycoord && !s->deleted && (down_nondel == 0)) down_nondel = (long)s->ycoord; if (i < (long)p->ycoord && s->deleted && i > (long)s->ycoord) c = updel; if (i > (long)p->ycoord && s->deleted && i < (long)s->ycoord) c = updel; r = r->next; } while (r != p); if ((up_nondel != 0) && i < (long)p->ycoord && i > up_nondel) c = up; if ((down_nondel != 0) && i > (long)p->ycoord && i < down_nondel) c = up; } grwrite(c, 1, &pos); chwrite(' ', n - 1, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); if (p != q) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { if (p->hasname) { n = 0; for (j = 1; j <= MAXNCH; j++) { if (nodep[p->index - 1]->nayme[j - 1] != '\0') n = j; } chwrite(':', 1, &pos, leftedge, screenwidth); for (j = 0; j < n; j++) chwrite(nodep[p->index - 1]->nayme[j], 1, &pos, leftedge, screenwidth); } } putchar('\n'); } /* drawline */ void printree() { /* prints out diagram of the tree */ long across; long tipy; double tipmax; long i, dow, vmargin; haslengths = ifhaslengths(); if (!subtree) nuroot = root; cleerhome(); tipy = 1; dow = down; if (spp * dow > screenlines && !subtree) { dow--; } if (haslengths && !nolengths) { precoord(nuroot, &subtree, &tipmax, &across); /* protect coordinates() from div/0 errors if user decides to examine a tip as a subtree */ if (tipmax == 0) tipmax = 0.01; coordinates(nuroot, 0.0, &across, &tipy, &tipmax); } else flatcoordinates(nuroot, &tipy); vmargin = 2; treelines = tipy - dow; if (topedge != 1) { printf("** %ld lines above screen **\n", topedge - 1); vmargin++; } if ((treelines - topedge + 1) > (screenlines - vmargin)) vmargin++; for (i = 1; i <= treelines; i++) { if (i >= topedge && i < topedge + screenlines - vmargin) drawline(i, nuroot,&subtree); } if (leftedge > 1) printf("** %ld characters to left of screen ", leftedge); if ((treelines - topedge + 1) > (screenlines - vmargin)) { printf("** %ld", treelines - (topedge - 1 + screenlines - vmargin)); printf(" lines below screen **\n"); } if (treelines - topedge + vmargin + 1 < screenlines) putchar('\n'); } /* printree */ void togglelengths() { nolengths = !nolengths; printree(); } /* togglengths */ void arbitree() { long i; node *newtip, *newfork; spp = ajAcdGetInt("spp"); nonodes = spp * 2 - 1; maketip(&root, 1); maketip(&newtip, 2); maketriad(&newfork, spp + 1); add_at(root, newtip, newfork); for (i = 3; i <= spp; i++) { maketip(&newtip, i); maketriad(&newfork, spp + i - 1); add_at(nodep[spp + i - 3], newtip, newfork); } } /* arbitree */ void yourtree() { long uniquearray[maxsz]; long uniqueindex = 0; long i, j, k, k_max=0, maxinput; boolean ok, done; node *newtip, *newfork; Char ch; uniquearray[0] = 0; spp = 2; nonodes = spp * 2 - 1; maketip(&root, 1); maketip(&newtip, 2); maketriad(&newfork, spp + 3); add_at(root, newtip, newfork); i = 2; maxinput = 1; k_max = 5; do { i++; printree(); printf("Enter 0 to stop building tree.\n"); printf("Add species%3ld", i); do { printf("\n at or before which node (type number): "); inpnum(&j, &ok); ok = (ok && ((unsigned long)j < i || (j > spp + 2 && j < spp + i + 1))); if (!ok) printf("Impossible number. Please try again:\n"); maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing number\n"); embExitBad(); } } while (!ok); maxinput = 1; if (j >= i) { /* has user chosen a non-tip? if so, offer choice */ do { printf(" Insert at node (A) or before node (B)? "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = isupper((int)ch) ? ch : toupper((int)ch); maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); embExitBad(); } } while (ch != 'A' && ch != 'B'); } else ch = 'B'; /* if user has chosen a tip, set Before */ if (j != 0) { if (ch == 'A') { if (!nodep[j - 1]->tip) { maketip(&newtip, i); add_child(nodep[j - 1], nodep[i - 1]); } } else { maketip(&newtip, i); maketriad(&newfork, spp + i + 1); nodep[i-1]->back = newfork; newfork->back = nodep[i-1]; add_before(nodep[j - 1], nodep[i - 1]); } /* endif (before or at node) */ } done = (j == 0); if (!done) { if (ch == 'B') k = spp * 2 + 3; else k = spp * 2 + 2; k_max = k; do { if (nodep[k - 2] != NULL) { nodep[k - 1] = nodep[k - 2]; nodep[k - 1]->index = k; nodep[k - 1]->next->index = k; nodep[k - 1]->next->next->index = k; } k--; } while (k != spp + 3); if (j > spp + 1) j++; spp++; nonodes = spp * 2 - 1; } } while (!done); for (i = spp + 1; i <= k_max; i++) { if ((nodep[i - 1] != nodep[i]) && (nodep[i - 1] != NULL)) { uniquearray[uniqueindex++] = i; uniquearray[uniqueindex] = 0; } } for ( i = 0; uniquearray[i] != 0; i++) { nodep[spp + i] = nodep[uniquearray[i] - 1]; nodep[spp + i]->index = spp + i + 1; nodep[spp + i]->next->index = spp + i + 1; nodep[spp + i]->next->next->index = spp + i + 1; } for (i = spp + uniqueindex; i <= k_max; i++) nodep[i] = NULL; nonodes = spp * 2 - 1; } /* yourtree */ void buildtree(void) { /* variables needed to be passed to treeread() */ long nextnode = 0; pointarray dummy_treenode=NULL; /* Ignore what happens to this */ boolean goteof = false; boolean haslengths = false; boolean firsttree; node *p, *q; long nodecount = 0; char* treestr; /* These assignments moved from treeconstruct -- they seem to happen only here. */ /*xx treeone & treetwo assignments should probably happen in treeconstruct. Memory leak if user reads multiple trees. */ treeone = (node **)Malloc(maxsz*sizeof(node *)); treetwo = (node **)Malloc(maxsz*sizeof(node *)); simplifiedtree.nodep = (node **)Malloc(maxsz*sizeof(node *)); subtree = false; topedge = 1; leftedge = 1; switch (how) { case arb: nodep = treeone; treesets[othertree].nodep = treetwo; arbitree(); break; case use: printf("\nReading tree file ...\n\n"); if (!readnext) { /* This is the first time through here, act accordingly */ firsttree = true; treesread = 0; } else { /* This isn't the first time through here ... */ firsttree = false; } treestr = ajStrGetuniquePtr(&phylotrees[treesread]->Tree); allocate_nodep(&nodep, treestr, &spp); treesets[whichtree].nodep = nodep; if (firsttree) nayme = (naym *)Malloc(spp*sizeof(naym)); treeread(&treestr, &root, dummy_treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initretreenode,true,-1); nonodes = nextnode; treesread++; treesets[othertree].nodep = treetwo; break; case spec: nodep = treeone; treesets[othertree].nodep = treetwo; yourtree(); break; } q = root->next; do { p = q; nodecount++; q = p->next; } while (q != root); outgrno = root->next->back->index; if(!(nodecount > 2)) { reroot(nodep[outgrno - 1]); } } /* buildtree */ void unbuildtree() { /* throw all nodes of the tree onto the garbage heap */ long i; gdispose(root); for (i = 0; i < nonodes; i++) nodep[i] = NULL; } /* unbuildtree */ void retree_help() { /* display help information */ char tmp[100]; printf("\n\n . Redisplay the same tree again\n"); if (haslengths) { printf(" = Redisplay the same tree with"); if (!nolengths) printf("out/with"); else printf("/without"); printf(" lengths\n"); } printf(" U Undo the most recent change in the tree\n"); printf(" W Write tree to a file\n"); printf(" + Read next tree from file (may blow up if none is there)\n"); printf("\n"); printf(" R Rearrange a tree by moving a node or group\n"); printf(" O select an Outgroup for the tree\n"); if (haslengths) printf(" M Midpoint root the tree\n"); printf(" T Transpose immediate branches at a node\n"); printf(" F Flip (rotate) subtree at a node\n"); printf(" D Delete or restore nodes\n"); printf(" B Change or specify the length of a branch\n"); printf(" N Change or specify the name(s) of tip(s)\n"); printf("\n"); printf(" H Move viewing window to the left\n"); printf(" J Move viewing window downward\n"); printf(" K Move viewing window upward\n"); printf(" L Move viewing window to the right\n"); printf(" C show only one Clade (subtree) (might be useful if tree is "); printf("too big)\n"); printf(" ? Help (this screen)\n"); printf(" Q (Quit) Exit from program\n"); printf(" X Exit from program\n\n"); printf(" TO CONTINUE, PRESS ON THE Return OR Enter KEY"); getstryng(tmp); printree(); } /* retree_help */ void consolidatetree(long index) { node *start, *r, *q; int i; start = nodep[index - 1]; q = start->next; while (q != start) { r = q; q = q->next; chuck(&grbg, r); } chuck(&grbg, q); i = index; while (nodep[i-1] != NULL) { r = nodep[i - 1]; if (!(r->tip)) r->index--; if (!(r->tip)) { q = r->next; do { q->index--; q = q->next; } while (r != q && q != NULL); } nodep[i - 1] = nodep[i]; i++; } nonodes--; } /* consolidatetree */ void rearrange() { long i, j, maxinput; boolean ok; node *p, *q; char ch; printf("Remove everything to the right of which node? "); inpnum(&i, &ok); if ( ok == false ) { /* fall through */ } else if ( i < 1 || i > spp*2 - 1 ) { /* i is not in range */ ok = false; } else if (i == root->index ) { /* i is root */ ok = false; } else if ( nodep[i-1]->deleted ) { /* i has been deleted */ ok = false; } else { printf("Add at or before which node? "); inpnum(&j, &ok); if ( ok == false ) { /* fall through */ } else if ( j < 1 || j > spp*2 - 1 ) { /* j is not in range */ ok = false; } else if ( nodep[j-1]->deleted ) { /* j has been deleted */ ok = false; } else if (j != root->index && nodep[nodep[j-1]->back->index - 1]->deleted ) { /* parent of j has been deleted */ ok = false; } else if ( nodep[j-1] == nodep[nodep[i-1]->back->index -1] ) { /* i is j's parent */ ok = false; } else { /* make sure that j is not a descendant of i */ for ( p = nodep[j-1]; p != root; p = nodep[p->back->index - 1] ) { if ( p == nodep[i-1] ) { ok = false; break; } } if ( ok ) { maxinput = 1; do { printf("Insert at node (A) or before node (B)? "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = toupper(ch); maxinput++; if (maxinput == 100) { printf("ERROR: Input failed too many times.\n"); exxit(-1); } } while (ch != 'A' && ch != 'B'); if (ch == 'A') { if ( nodep[j - 1]->deleted || nodep[j - 1]->tip ) { /* If j is a tip or has been deleted */ ok = false; } else if ( nodep[j-1] == nodep[nodep[i-1]->back->index -1] ) { /* If j is i's parent */ ok = false; } else { copytree(); re_move(&nodep[i - 1], &q); add_child(nodep[j - 1], nodep[i - 1]); if (fromtype == beforenode) consolidatetree(q->index); } } else { /* ch == 'B' */ if (j == root->index) { /* can't insert at root */ ok = false; } else { copytree(); printf("Insert before node %ld\n",j); re_move(&nodep[i - 1], &q); if (q != NULL) { nodep[q->index-1]->next->back = nodep[i-1]; nodep[i-1]->back = nodep[q->index-1]->next; } add_before(nodep[j - 1], nodep[i - 1]); } } /* endif (before or at node) */ } /* endif (ok to do move) */ } /* endif (destination node ok) */ } /* endif (from node ok) */ printree(); if ( !ok ) printf("Not a possible rearrangement. Try again: \n"); else { written = false; } } /* rearrange */ boolean any_deleted(node *p) { /* return true if there are any deleted branches from branch on down */ boolean localdl; localdl = false; ifdeltrav(p, &localdl); return localdl; } /* any_deleted */ void fliptrav(node *p, boolean recurse) { node *q, *temp, *r =NULL, *rprev =NULL, *l, *lprev; boolean lprevflag; int nodecount, loopcount, i; if (p->tip) return; q = p->next; l = q; lprev = p; nodecount = 0; do { nodecount++; if (q->next->next == p) { rprev = q; r = q->next; } q = q->next; } while (p != q); if (nodecount == 1) return; loopcount = nodecount / 2; for (i=0; inext = r; rprev->next = l; temp = r->next; r->next = l->next; l->next = temp; if (i < (loopcount - 1)) { lprevflag = false; q = p->next; do { if (q == lprev->next && !lprevflag) { lprev = q; l = q->next; lprevflag = true; } if (q->next == rprev) { rprev = q; r = q->next; } q = q->next; } while (p != q); } } if (recurse) { q = p->next; do { fliptrav(q->back, true); q = q->next; } while (p != q); } } /* fliptrav */ void flip(long atnode) { /* flip at a node left-right */ long i; boolean ok; if (atnode == 0) { printf("Flip branches at which node? "); inpnum(&i, &ok); ok = (ok && i > spp && i <= nonodes); if (ok) ok = !any_deleted(nodep[i - 1]); } else { i = atnode; ok = true; } if (ok) { copytree(); fliptrav(nodep[i - 1], true); } if (atnode == 0) printree(); if (ok) { written = false; return; } if ((i >= 1 && i <= spp) || (i > spp && i <= nonodes && any_deleted(nodep[i - 1]))) printf("Can't flip there. "); else printf("No such node. "); } /* flip */ void transpose(long atnode) { /* flip at a node left-right */ long i; boolean ok; if (atnode == 0) { printf("Transpose branches at which node? "); inpnum(&i, &ok); ok = (ok && i > spp && i <= nonodes); if (ok) ok = !nodep[i - 1]->deleted; } else { i = atnode; ok = true; } if (ok) { copytree(); fliptrav(nodep[i - 1], false); } if (atnode == 0) printree(); if (ok) { written = false; return; } if ((i >= 1 && i <= spp) || (i > spp && i <= nonodes && nodep[i - 1]->deleted)) printf("Can't transpose there. "); else printf("No such node. "); } /* transpose */ void ifdeltrav(node *p, boolean *localdl) { node *q; if (*localdl) return; if (p->tip) { (*localdl) = ((*localdl) || p->deleted); return; } q = p->next; do { (*localdl) = ((*localdl) || q->deleted); ifdeltrav(q->back, localdl); q = q->next; } while (p != q); } /* ifdeltrav */ double oltrav(node *p) { node *q; double maxlen, templen; if (p->deleted) return 0.0; if (p->tip) { p->beyond = 0.0; return 0.0; } else { q = p->next; maxlen = 0; do { templen = q->back->deleted ? 0.0 : q->length + oltrav(q->back); maxlen = (maxlen > templen) ? maxlen : templen; q->beyond = templen; q = q->next; } while (p != q); p->beyond = maxlen; return (maxlen); } } /* oltrav */ void outlength() { /* compute the farthest combined length out from each node */ oltrav(root); } /* outlength */ void midpoint() { /* midpoint root the tree */ double balance, greatlen, lesslen, grlen, maxlen; node *maxnode, *grnode, *lsnode =NULL; boolean ok = true; boolean changed = false; node *p, *q; long nodecount = 0; boolean multi = false; copytree(); p = root; outlength(); q = p->next; greatlen = 0; grnode = q->back; lesslen = 0; q = root->next; do { p = q; nodecount++; q = p->next; } while (q != root); if (nodecount > 2) multi = true; /* Find the two greatest lengths reaching from root to tips. Also find the lengths and node pointers of the first nodes in the direction of those two greatest lengths. */ p = root; q = root->next; do { if (greatlen <= q->beyond) { lesslen = greatlen; lsnode = grnode; greatlen = q->beyond; grnode = q->back; } if ((greatlen > q->beyond) && (q->beyond >= lesslen)) { lesslen = q->beyond; lsnode = q->back; } q = q->next; } while (p != q); /* If we don't have two non-deleted nodes to balance between then we can't midpoint root the tree */ if (grnode->deleted || lsnode->deleted || grnode == lsnode) ok = false; balance = greatlen - (greatlen + lesslen) / 2.0; grlen = grnode->length; while ((balance - grlen > 1e-10) && ok) { /* First, find the most distant immediate child of grnode and reroot to it. */ p = grnode; q = p->next; maxlen = 0; maxnode = q->back; do { if (maxlen <= q->beyond) { maxlen = q->beyond; maxnode = q->back; } q = q->next; } while (p != q); reroot(maxnode); changed = true; /* Reassess the situation, using the same "find the two greatest lengths" code as occurs before the while loop. If another reroot is necessary, this while loop will repeat. */ p = root; outlength(); q = p->next; greatlen = 0; grnode = q->back; lesslen = 0; do { if (greatlen <= q->beyond) { lesslen = greatlen; lsnode = grnode; greatlen = q->beyond; grnode = q->back; } if ((greatlen > q->beyond) && (q->beyond >= lesslen)) { lesslen = q->beyond; lsnode = q->back; } q = q->next; } while (p != q); if (grnode->deleted || lsnode->deleted || grnode == lsnode) ok = false; balance = greatlen - (greatlen + lesslen) / 2.0; grlen = grnode->length; }; /* end of while ((balance > grlen) && ok) */ if (ok) { /*xx the following ignores deleted nodes */ /* this may be ok because deleted nodes are omitted from length calculations */ if (multi) { reroot(grnode); /*xx need length corrections */ p = root; outlength(); q = p->next; greatlen = 0; grnode = q->back; lesslen = 0; do { if (greatlen <= q->beyond) { lesslen = greatlen; lsnode = grnode; greatlen = q->beyond; grnode = q->back; } if ((greatlen > q->beyond) && (q->beyond >= lesslen)) { lesslen = q->beyond; lsnode = q->back; } q = q->next; } while (p != q); balance = greatlen - (greatlen + lesslen) / 2.0; } grnode->length -= balance; if (((grnode->length) < 0.0) && (grnode->length > -1.0e-10)) grnode->length = 0.0; grnode->back->length = grnode->length; lsnode->length += balance; if (((lsnode->length) < 0.0) && (lsnode->length > -1.0e-10)) lsnode->length = 0.0; lsnode->back->length = lsnode->length; } printree(); if (ok) { if (any_deleted(root)) printf("Deleted nodes were not used in midpoint calculations.\n"); } else { printf("Can't perform midpoint because of deleted branches.\n"); if (changed) { undo(); printf("Tree restored to original state. Undo information lost.\n"); } } } /* midpoint */ void deltrav(node *p, boolean value) { /* register p and p's children as deleted or extant, depending on value */ node *q; p->deleted = value; if (p->tip) return; q = p->next; do { deltrav(q->back, value); q = q->next; } while (p != q); } /* deltrav */ void fill_del(node*p) { int alldell; node *q = p; if ( p->next == NULL) return; q=p->next; while ( q != p) { fill_del(q->back); q=q->next; } alldell = 1; q=p->next; while ( q != p) { if ( !q->back->deleted ) { alldell = 0; } q=q->next; } p->deleted = alldell; } void reg_del(node *delp, boolean value) { /* register delp and all of delp's children as deleted */ deltrav(delp, value); } /* reg_del */ boolean isdeleted(long nodenum) { /* true if nodenum is a node number in a deleted branch */ return(nodep[nodenum - 1]->deleted); } /* isdeleted */ void deletebranch() { /* delete a node */ long i; boolean ok1; printf("Delete everything to the right of which node? "); inpnum(&i, &ok1); ok1 = (ok1 && i >= 1 && i <= nonodes && i != root->index && !isdeleted(i)); if (ok1) { copytree(); reg_del(nodep[i - 1],true); } printree(); if (!ok1) printf("Not a possible deletion. Try again.\n"); else { written = false; } } /* deletebranch */ void restorebranch() { /* restore deleted branches */ long i; boolean ok1; printf("Restore everything to the right of which node? "); inpnum(&i, &ok1); ok1 = (ok1 && i >= 1 && i < spp * 2 && i != root->index && isdeleted(i) && !nodep[nodep[i - 1]->back->index - 1]->deleted); if (ok1) { reg_del(nodep[i - 1],false); } printree(); if (!ok1) printf("Not a possible restoration. Try again: \n"); else { written = false; } } /* restorebranch */ void del_or_restore() { /* delete or restore a branch */ long maxinput; Char ch; if (any_deleted(root)) { maxinput = 1; do { printf("Enter D to delete a branch\n"); printf("OR enter R to restore a branch: "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = (isupper((int)ch)) ? ch : toupper((int)ch); maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); embExitBad(); } } while (ch != 'D' && ch != 'R'); if (ch == 'R') restorebranch(); else deletebranch(); } else deletebranch(); } /* del_or_restore */ void undo() { /* don't undo to an uninitialized tree */ if (!treesets[othertree].initialized) { printree(); printf("Nothing to undo.\n"); return; } treesets[whichtree].root = root; treesets[whichtree].nodep = nodep; treesets[whichtree].nonodes = nonodes; treesets[whichtree].waswritten = waswritten; treesets[whichtree].hasmult = hasmult; treesets[whichtree].haslengths = haslengths; treesets[whichtree].nolengths = nolengths; treesets[whichtree].initialized = true; whichtree = othertree; root = treesets[whichtree].root; nodep = treesets[whichtree].nodep; nonodes = treesets[whichtree].nonodes; waswritten = treesets[whichtree].waswritten; hasmult = treesets[whichtree].hasmult; haslengths = treesets[whichtree].haslengths; nolengths = treesets[whichtree].nolengths; if (othertree == 0) othertree = 1; else othertree = 0; printree(); } /* undo */ /* These attributes of nodes in the tree are modified by treetrav() in preparation for writing a tree to disk. boolean deadend This node is not deleted but all of its children are, so this node will be treated as such when the tree is written or displayed. boolean onebranch This node has only one valid child, so that this node will not be written and its child will be written as a child of its grandparent with the appropriate summing of lengths. nodep *onebranchnode Used if onebranch is true. Onebranchnode points to the one valid child. This child may be one or more generations down from the current node. double onebranchlength Used if onebranch is true. Onebranchlength is the length from the current node to the valid child. */ void treetrav(node *p) { long branchcount = 0; node *q, *onebranchp =NULL; /* Count the non-deleted branches hanging off of this node into branchcount. If there is only one such branch, onebranchp points to that branch. */ if (p->tip) return; q = p->next; do { if (!q->back->deleted) { if (!q->back->tip) treetrav(q->back); if (!q->back->deadend && !q->back->deleted) { branchcount++; onebranchp = q->back; } } q = q->next; } while (p != q); if (branchcount == 0) p->deadend = true; else p->deadend = false; p->onebranch = false; if (branchcount == 1 && onebranchp->tip) { p->onebranch = true; p->onebranchnode = onebranchp; p->onebranchhaslength = (p->haslength || (p == root)) && onebranchp->haslength; if (p->onebranchhaslength) p->onebranchlength = onebranchp->length + p->length; } if (branchcount == 1 && !onebranchp->tip) { p->onebranch = true; if (onebranchp->onebranch) { p->onebranchnode = onebranchp->onebranchnode; p->onebranchhaslength = (p->haslength || (p == root)) && onebranchp->onebranchhaslength; if (p->onebranchhaslength) p->onebranchlength = onebranchp->onebranchlength + p->length; } else { p->onebranchnode = onebranchp; p->onebranchhaslength = p->haslength && onebranchp->haslength; if (p->onebranchhaslength) p->onebranchlength = onebranchp->length + p->length; } } } /* treetrav */ void simcopynode(node *fromnode, node *tonode) { /* Copy the contents of a node from fromnode to tonode. */ int i; tonode->index = fromnode->index; tonode->deleted = fromnode->deleted; tonode->tip = fromnode->tip; tonode->hasname = fromnode->hasname; if (fromnode->hasname) for (i=0;inayme[i] = fromnode->nayme[i]; tonode->haslength = fromnode->haslength; if (fromnode->haslength) tonode->length = fromnode->length; } /* simcopynode */ node *simcopytrav(node *p) { /* Traverse the tree from p on down, copying nodes to the other tree */ node *q, *newnode, *newnextnode, *temp; long lastnodeidx = 0; gnu(&grbg, &newnode); simcopynode(p, newnode); if (nodep[p->index - 1] == p) simplifiedtree.nodep[p->index - 1] = newnode; /* if this is a tip, return now */ if (p->tip) return newnode; if (p->onebranch && p->onebranchnode->tip) { simcopynode(p->onebranchnode, newnode); if (p->onebranchhaslength) newnode->length = p->onebranchlength; return newnode; } else if (p->onebranch && !p->onebranchnode->tip) { /* recurse down p->onebranchnode */ p->onebranchnode->length = p->onebranchlength; p->onebranchnode->haslength = p->onebranchnode->haslength; return simcopytrav(p->onebranchnode); } else { /* Multiple non-deleted branch case: go round the node recursing down the branches. Don't go down deleted branches or dead ends. */ q = p->next; while (q != p) { if (!q->back->deleted && !q->back->deadend) lastnodeidx = q->back->index; q = q->next; } q = p->next; gnu(&grbg, &newnextnode); simcopynode(q, newnextnode); newnode->next = newnextnode; do { /* If branch is deleted or is a dead end, do not recurse down the branch. */ if (!q->back->deleted && !q->back->deadend) { newnextnode->back = simcopytrav(q->back); newnextnode->back->back = newnextnode; q = q->next; if (newnextnode->back->index == lastnodeidx) { newnextnode->next = newnode; break; } if (q == p) { newnextnode->next = newnode; } else { temp = newnextnode; gnu(&grbg, &newnextnode); simcopynode(q, newnextnode); temp->next = newnextnode; } } else { /*xx this else and q=q->next are experimental (seems to be working) */ q = q->next; } } while (q != p); } return newnode; } /* simcopytrav */ void simcopytree() { /* Make a simplified copy of the current tree for rooting/unrooting on output. Deleted notes are removed and lengths are consolidated. */ simplifiedtree.root = simcopytrav(root); /*xx If there are deleted nodes, nonodes will be different. However, nonodes is not used in the simplified tree. */ simplifiedtree.nonodes = nonodes; simplifiedtree.waswritten = waswritten; simplifiedtree.hasmult = hasmult; simplifiedtree.haslengths = haslengths; simplifiedtree.nolengths = nolengths; simplifiedtree.initialized = true; } /* simcopytree */ void writebranchlength(double x) { long w; /* write branch length onto output file, keeping track of what column of line you are in, and writing to correct precision */ if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if ((long)(100000*x) == 100000*(long)x) { if (!xmltree) putc(':', outtree); fprintf(outtree, "%*.1f", (int)(w + 2), x); col += w + 3; } else { if ((long)(100000*x) == 10000*(long)(10*x)) { if (!xmltree) putc(':', outtree); fprintf(outtree, "%*.1f", (int)(w + 3), x); col += w + 4; } else { if ((long)(100000*x) == 1000*(long)(100*x)) { if (!xmltree) putc(':', outtree); fprintf(outtree, "%*.2f", (int)(w + 4), x); col += w + 5; } else { if ((long)(100000*x) == 100*(long)(1000*x)) { if (!xmltree) putc(':', outtree); fprintf(outtree, "%*.3f", (int)(w + 5), x); col += w + 6; } else { if ((long)(100000*x) == 10*(long)(10000*x)) { if (!xmltree) putc(':', outtree); fprintf(outtree, "%*.4f", (int)(w + 6), x); col += w + 7; } else { if (!xmltree) putc(':', outtree); fprintf(outtree, "%*.5f", (int)(w + 7), x); col += w + 8; } } } } } } /* writebranchlength */ void treeout(node *p, boolean writeparens, double addlength, long indent) { /* write out file with representation of final tree */ long i, n, lastnodeidx = 0; Char c; double x; boolean comma; node *q; /* If this is a tip or there are no non-deleted branches from this node, render this node as a tip (write its name). */ if (p == root) { if (xmltree) indent = 0; else indent = 0; if (xmltree) { fprintf(outtree, ""); /* assumes no length at root! */ } else putc('(', outtree); } if (p->tip) { if (p->hasname) { n = 0; for (i = 1; i <= MAXNCH; i++) { if ((nodep[p->index - 1]->nayme[i - 1] != '\0') && (nodep[p->index - 1]->nayme[i - 1] != ' ')) n = i; } indent += 2; if (xmltree) { putc('\n', outtree); for (i = 1; i <= indent; i++) putc(' ', outtree); fprintf(outtree, "haslength) { fprintf(outtree, " length="); x = p->length; writebranchlength(x); } putc('>', outtree); fprintf(outtree, ""); } for (i = 0; i < n; i++) { c = nodep[p->index - 1]->nayme[i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; if (xmltree) fprintf(outtree, ""); } } else if (p->onebranch && p->onebranchnode->tip) { if (p->onebranchnode->hasname) { n = 0; for (i = 1; i <= MAXNCH; i++) { if ((nodep[p->index - 1]->nayme[i - 1] != '\0') && (nodep[p->index - 1]->nayme[i - 1] != ' ')) n = i; indent += 2; if (xmltree) { putc('\n', outtree); for (i = 1; i <= indent; i++) putc(' ', outtree); fprintf(outtree, "haslength && writeparens) || p->onebranch) { if (!(p->onebranch && !p->onebranchhaslength)) { fprintf(outtree, " length="); if (p->onebranch) x = p->onebranchlength; else x = p->length; x += addlength; writebranchlength(x); } fprintf(outtree, ""); } } for (i = 0; i < n; i++) { c = p->onebranchnode->nayme[i]; if (c == '_') c = ' '; putc(c, outtree); } col += n; if (xmltree) fprintf(outtree, ""); } } } else if (p->onebranch && !p->onebranchnode->tip) { treeout(p->onebranchnode, true, 0.0, indent); } else { /* Multiple non-deleted branch case: go round the node recursing down the branches. */ if (xmltree) { putc('\n', outtree); indent += 2; for (i = 1; i <= indent; i++) putc(' ', outtree); if (p == root) fprintf(outtree, ""); } if (p != root) { if (xmltree) { fprintf(outtree, "haslength && writeparens) || p->onebranch) { if (!(p->onebranch && !p->onebranchhaslength)) { fprintf(outtree, " length=\""); if (p->onebranch) x = p->onebranchlength; else x = p->length; x += addlength; writebranchlength(x); } fprintf(outtree, "\">"); } else fprintf(outtree, ">"); } else putc('(', outtree); } (col)++; q = p->next; while (q != p) { if (!q->back->deleted && !q->back->deadend) lastnodeidx = q->back->index; q = q->next; } q = p->next; while (q != p) { comma = true; /* If branch is deleted or is a dead end, do not recurse down the branch and do not write a comma afterwards. */ if (!q->back->deleted && !q->back->deadend) treeout(q->back, true, 0.0, indent); else comma = false; if (q->back->index == lastnodeidx) comma = false; q = q->next; if (q == p) break; if ((q->next == p) && (q->back->deleted || q->back->deadend)) break; if (comma && !xmltree) putc(',', outtree); (col)++; if ((!xmltree) && col > 65) { putc('\n', outtree); col = 0; } } /* The right paren ')' closes off this level of recursion. */ if (p != root) { if (xmltree) { fprintf(outtree, "\n"); for (i = 1; i <= indent; i++) putc(' ', outtree); } if (xmltree) { fprintf(outtree, ""); } else putc(')', outtree); } (col)++; } if (!xmltree) if ((p->haslength && writeparens) || p->onebranch) { if (!(p->onebranch && !p->onebranchhaslength)) { if (p->onebranch) x = p->onebranchlength; else x = p->length; x += addlength; writebranchlength(x); } } if (p == root) { if (xmltree) { fprintf(outtree, "\n \n\n"); } else putc(')', outtree); } } /* treeout */ void maketemptriad(node **p, long index) { /* Initiate an internal node with stubs for two children */ long i, j; node *q; q = NULL; for (i = 1; i <= 3; i++) { gnu(&grbg, p); (*p)->index = index; (*p)->hasname = false; (*p)->haslength = false; (*p)->deleted=false; (*p)->deadend=false; (*p)->onebranch=false; (*p)->onebranchhaslength=false; for (j=0;jnayme[j] = '\0'; (*p)->next = q; q = *p; } (*p)->next->next->next = *p; q = (*p)->next; while (*p != q) { (*p)->back = NULL; (*p)->tip = false; *p = (*p)->next; } } /* maketemptriad */ void roottreeout(boolean *userwantsrooted) { /* write out file with representation of final tree */ long trnum, trnumwide; boolean treeisrooted = false; treetrav(root); simcopytree(); /* Prepare a copy of the going tree without deleted branches */ treesets[whichtree].root = root; /* Store the current root */ if (nexus) { trnum = treenumber; trnumwide = 1; while (trnum >= 10) { trnum /= 10; trnumwide++; } fprintf(outtree, "TREE PHYLIP_%*ld = ", (int)trnumwide, treenumber); if (!(*userwantsrooted)) fprintf(outtree, "[&U] "); else fprintf(outtree, "[&R] "); col += 15; } root = simplifiedtree.root; /* Point root at simplified tree */ root->haslength = false; /* Root should not have a length */ if (root->tip) treeisrooted = true; else { if (root->next->next->next == root) treeisrooted = true; else treeisrooted = false; } if (*userwantsrooted && !treeisrooted) notrootedtorooted(); if (!(*userwantsrooted) && treeisrooted) rootedtonotrooted(); if ((*userwantsrooted && treeisrooted) || (!(*userwantsrooted) && !treeisrooted)) { treeout(root,true,0.0, 0); } root = treesets[whichtree].root; /* Point root at original (real) tree */ if (!xmltree) { if (hasmult) fprintf(outtree, "[%6.4f];\n", trweight); else fprintf(outtree, ";\n"); } } /* roottreeout */ void notrootedtorooted() { node *newbase, *temp; /* root halfway along leftmost branch of unrooted tree */ /* create a new triad for the new base */ maketemptriad(&newbase,nonodes+1); /* Take left branch and make it the left branch of newbase */ newbase->next->back = root->next->back; newbase->next->next->back = root; /* If needed, divide length between left and right branches */ if (newbase->next->back->haslength) { newbase->next->back->length /= 2.0; newbase->next->next->back->length = newbase->next->back->length; newbase->next->next->back->haslength = true; } /* remove leftmost ring node from old base ring */ temp = root->next->next; chuck(&grbg, root->next); root->next = temp; /* point root at new base and write the tree */ root = newbase; treeout(root,true,0.0, 0); /* (since tree mods are to simplified tree and will not be used for general purpose tree editing, much initialization can be skipped.) */ } /* notrootedtorooted */ void rootedtonotrooted() { node *q, *r, *temp, *newbase; boolean sumhaslength = false; double sumlength = 0; /* Use the leftmost non-tip immediate descendant of the root, root at that, write a multifurcation with that as the base. If both descendants are tips, write tree as is. */ root = simplifiedtree.root; /* first, search for leftmost non-tip immediate descendent of root */ q = root->next->back; r = root->next->next->back; if (q->tip && r->tip) { treeout(root,true,0.0, 0); } else if (!(q->tip)) { /* allocate new base pointer */ gnu(&grbg,&newbase); newbase->next = q->next; q->next = newbase; q->back = r; r->back = q; if (q->haslength && r->haslength) { sumlength = q->length + r->length; sumhaslength = true; } if (sumhaslength) { q->length = sumlength; q->back->length = sumlength; } else { q->haslength = false; r->haslength = false; } chuck(&grbg, root->next->next); chuck(&grbg, root->next); chuck(&grbg, root); root = newbase; treeout(root, true, 0.0, 0); } else if (q-tip && !(r->tip)) { temp = r; do { temp = temp->next; } while (temp->next != r); gnu(&grbg,&newbase); newbase->next = temp->next; temp->next = newbase; q->back = r; r->back = q; if (q->haslength && r->haslength) { sumlength = q->length + r->length; sumhaslength = true; } if (sumhaslength) { q->length = sumlength; q->back->length = sumlength; } else { q->haslength = false; r->haslength = false; } chuck(&grbg, root->next->next); chuck(&grbg, root->next); chuck(&grbg, root); root = newbase; treeout(root, true, 0.0, 0); } } /* rootedtonotrooted */ void treewrite(boolean *done) { /* write out tree to a file */ long maxinput; boolean rooted; if (nexus && onfirsttree) { fprintf(outtree, "#NEXUS\n"); fprintf(outtree, "BEGIN TREES\n"); fprintf(outtree, "TRANSLATE;\n"); /* MacClade needs this */ } if (xmltree && onfirsttree) { fprintf(outtree, "\n"); } onfirsttree = false; maxinput = 1; do { fprintf(stderr, "Enter R if the tree is to be rooted, "); fprintf(stderr, "OR enter U if the tree is to be unrooted: "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = (isupper((int)ch)) ? ch : toupper((int)ch); maxinput++; if (maxinput == 100) { fprintf(stderr, "ERROR: too many tries at choosing option\n"); embExitBad(); } } while (ch != 'R' && ch != 'U'); col = 0; rooted = (ch == 'R'); roottreeout(&rooted); treenumber++; fprintf(stderr, "\nTree written to file \"%s\"\n\n", outtreename); waswritten = true; written = true; if (!(*done)) printree(); FClose(outtree); } /* treewrite */ void retree_window(adjwindow action) { /* move viewing window of tree */ switch (action) { case left: if (leftedge != 1) leftedge -= hscroll; break; case downn: /* The 'topedge + 3' is needed to allow downward scrolling when part of the tree is above the screen and only 1 or 2 lines are below it. */ if (treelines - topedge + 3 >= screenlines) topedge += vscroll; break; case upp: if (topedge != 1) topedge -= vscroll; break; case right: if (leftedge < vscreenwidth+2) { if (hscroll > leftedge - vscreenwidth + 1) leftedge = vscreenwidth; else leftedge += hscroll; } break; } printree(); } /* retree_window */ void getlength(double *length, reslttype *reslt, boolean *hslngth) { long maxinput; double valyew; char tmp[100]; valyew = 0.0; maxinput = 1; do { printf("\nEnter the new branch length\n"); printf("OR enter U to leave the length unchanged\n"); if (*hslngth) printf("OR enter R to remove the length from this branch: \n"); getstryng(tmp); if (tmp[0] == 'u' || tmp[0] == 'U'){ *reslt = quit; break; } else if (tmp[0] == 'r' || tmp[0] == 'R') { (*reslt) = remoov; break;} else if (sscanf(tmp,"%lf",&valyew) == 1){ (*reslt) = valid; break; } maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); embExitBad(); } } while (1); (*length) = valyew; } /* getlength */ void changelength() { /* change or specify the length of a tip */ boolean hslngth; boolean ok; long i, w, maxinput; double length, x; Char ch; reslttype reslt; node *p; maxinput = 1; do { printf("Specify length of which branch (0 = all branches)? "); inpnum(&i, &ok); ok = (ok && (unsigned long)i <= nonodes); if (ok && (i != 0)) ok = (ok && !nodep[i - 1]->deleted); if (i == 0) ok = (nodep[i - 1] != root); maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); embExitBad(); } } while (!ok); if (i != 0) { p = nodep[i - 1]; putchar('\n'); if (p->haslength) { x = p->length; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; printf("The current length of this branch is %*.5f\n", (int)(w + 7), x); } else printf("This branch does not have a length\n"); hslngth = p->haslength; getlength(&length, &reslt, &hslngth); switch (reslt) { case valid: copytree(); p->length = length; p->haslength = true; if (p->back != NULL) { p->back->length = length; p->back->haslength = true; } break; case remoov: copytree(); p->haslength = false; if (p->back != NULL) p->back->haslength = false; break; case quit: /* blank case */ break; } } else { printf("\n (this operation cannot be undone)\n"); maxinput = 1; do { printf("\n enter U to leave the lengths unchanged\n"); printf("OR enter R to remove the lengths from all branches: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); exxit(-1); } } while (ch != 'U' && ch != 'u' && ch != 'R' && ch != 'r'); if (ch == 'R' || ch == 'r') { copytree(); for (i = 0; i < spp; i++) nodep[i]->haslength = false; for (i = spp; i < nonodes; i++) { if (nodep[i] != NULL) { nodep[i]->haslength = false; nodep[i]->next->haslength = false; nodep[i]->next->next->haslength = false; } } } } printree(); } /* changelength */ void changename() { /* change or specify the name of a tip */ boolean ok; long i, n, tipno; char tipname[100]; for(;;) { for(;;) { printf("Specify name of which tip? (enter its number or 0 to quit): "); inpnum(&i, &ok); if (i > 0 && ((unsigned long)i <= spp) && ok) if (!nodep[i - 1]->deleted) { tipno = i; break; } if (i == 0) { tipno = 0; break; } } if (tipno == 0) break; if (nodep[tipno - 1]->hasname) { n = 0; /* this is valid because names are padded out to MAXNCH with nulls */ for (i = 1; i <= MAXNCH; i++) { if (nodep[tipno - 1]->nayme[i - 1] != '\0') n = i; } printf("The current name of tip %ld is \"", tipno); for (i = 0; i < n; i++) putchar(nodep[tipno - 1]->nayme[i]); printf("\"\n"); } copytree(); for (i = 0; i < MAXNCH; i++) nodep[tipno - 1]->nayme[i] = ' '; printf("Enter new tip name: "); i = 1; getstryng(tipname); strncpy(nodep[tipno-1]->nayme,tipname,MAXNCH); nodep[tipno - 1]->hasname = true; printree(); } printree(); } /* changename */ void clade() { /* pick a subtree and show only that on screen */ long i; boolean ok; printf("Select subtree rooted at which node (0 for whole tree)? "); inpnum(&i, &ok); ok = (ok && (unsigned long)i <= nonodes); if (ok) { subtree = (i > 0); if (subtree) nuroot = nodep[i - 1]; else nuroot = root; } printree(); if (!ok) printf("Not possible to use this node. "); } /* clade */ void changeoutgroup() { long i, maxinput; boolean ok; maxinput = 1; do { printf("Which node should be the new outgroup? "); inpnum(&i, &ok); ok = (ok && i >= 1 && i <= nonodes && i != root->index); if (ok) ok = (ok && !nodep[i - 1]->deleted); if (ok) ok = !nodep[nodep[i - 1]->back->index - 1]->deleted; if (ok) outgrno = i; maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); embExitBad(); } } while (!ok); copytree(); reroot(nodep[outgrno - 1]); printree(); written = false; } /* changeoutgroup */ void redisplay() { long maxinput; boolean done; char ch; done = false; maxinput = 1; do { fprintf(stderr, "NEXT? (R . "); if (haslengths) fprintf(stderr, "= "); fprintf(stderr, "U W O "); if (haslengths) fprintf(stderr, "M "); fprintf(stderr, "T F D B N H J K L C + ? X Q) (? for Help): "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = isupper((int)ch) ? ch : toupper((int)ch); if (ch == 'C' || ch == 'F' || ch == 'O' || ch == 'R' || ch == 'U' || ch == 'X' || ch == 'Q' || ch == '.' || ch == 'W' || ch == 'B' || ch == 'N' || ch == '?' || ch == 'H' || ch == 'J' || ch == 'K' || ch == 'L' || ch == '+' || ch == 'T' || ch == 'D' || (haslengths && ch == 'M') || (haslengths && ch == '=')) { switch (ch) { case 'R': rearrange(); break; case '.': printree(); break; case '=': togglelengths(); break; case 'U': undo(); break; case 'W': treewrite(&done); break; case 'O': changeoutgroup(); break; case 'M': midpoint(); break; case 'T': transpose(0); break; case 'F': flip(0); break; case 'C': clade(); break; case 'D': del_or_restore(); break; case 'B': changelength(); break; case 'N': changename(); break; case 'H': retree_window(left); break; case 'J': retree_window(downn); break; case 'K': retree_window(upp); break; case 'L': retree_window(right); break; case '?': retree_help(); break; case '+': if (treesread \n"); else if (nexus) fprintf(outtree, "END;\n"); } FClose(intree); FClose(outtree); #ifdef MAC fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Retree */ PHYLIPNEW-3.69.650/src/discboot.c0000664000175000017500000007001311616234203013026 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, and Doug Buxton. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef enum { seqs, morphology, restsites, genefreqs } datatype; typedef enum { dna, rna, protein } seqtype; AjPPhyloState* phylostate = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylomix = NULL; AjPPhyloProp phylofact = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void seqboot_inputnumbersstate(AjPPhyloState); void inputoptions(void); void seqboot_inputdatastate(AjPPhyloState); void allocrest(void); void allocnew(void); void doinput(int argc, Char *argv[]); void bootweights(void); void sppermute(long); void charpermute(long, long); void writedata(void); void writeweights(void); void writecategories(void); void writeauxdata(steptr, FILE*); void writefactors(void); void bootwrite(void); void seqboot_inputaux(steptr, FILE*); void seqboot_inputfactors(AjPPhyloProp fact); /* function prototypes */ #endif FILE *outcatfile, *outweightfile, *outmixfile, *outancfile, *outfactfile; Char infilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH], mixfilename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; const char* outweightfilename; AjPFile embossoutweightfile; const char* outmixfilename; AjPFile embossoutmixfile; const char* outancfilename; AjPFile embossoutancfile; const char* outcatfilename; AjPFile embossoutcatfile; const char* outfactfilename; AjPFile embossoutfactfile; long sites, loci, maxalleles, groups, newsites, newersites, newgroups, newergroups, nenzymes, reps, ws, blocksize, categs, maxnewsites; boolean bootstrap, permute, ild, lockhart, jackknife, regular, xml, nexus, weights, categories, factors, enzymes, all, justwts, progress, mixture, firstrep, ancvar; double fracsample; datatype data; seqtype seq; steptr oldweight, where, how_many, newwhere, newhowmany, newerwhere, newerhowmany, factorr, newerfactor, mixdata, ancdata; steptr *charorder; Char *factor; long *alleles; Char **nodep; double **nodef; long **sppord; longer seed; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr test = NULL; AjPStr typeofseq = NULL; AjPStr justweights = NULL; AjBool rewrite = false; long inseed, inseed0; data = morphology; seq = dna; bootstrap = false; jackknife = false; permute = false; ild = false; lockhart = false; blocksize = 1; regular = true; fracsample = 1.0; all = false; reps = 100; weights = false; mixture = false; ancvar = false; categories = false; justwts = false; printdata = false; dotdiff = true; progress = true; interleaved = true; xml = false; nexus = false; factors = false; enzymes = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostate = ajAcdGetDiscretestates("infile"); phylofact = ajAcdGetProperties("factorfile"); if(phylofact) { factors = true; embossoutfactfile = ajAcdGetOutfile("outfactfile"); emboss_openfile(embossoutfactfile, &outfactfile, &outfactfilename); } test = ajAcdGetListSingle("test"); if(ajStrMatchC(test, "b")) { bootstrap = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 1.0; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } blocksize = ajAcdGetInt("blocksize"); } else if(ajStrMatchC(test, "j")) { jackknife = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 0.5; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } } else if(ajStrMatchC(test, "c")) permute = true; else if(ajStrMatchC(test, "o")) ild = true; else if(ajStrMatchC(test, "s")) lockhart = true; else if(ajStrMatchC(test, "r")) rewrite = true; if(rewrite) { if (data == morphology) { typeofseq = ajAcdGetListSingle("morphseqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } else{ reps = ajAcdGetInt("reps"); inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); if(jackknife || bootstrap || permute) { phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) { ancvar = true; embossoutancfile = ajAcdGetOutfile("outancfile"); emboss_openfile(embossoutancfile, &outancfile, &outancfilename); } phylomix = ajAcdGetProperties("mixfile"); if(phylomix) { mixture = true; embossoutmixfile = ajAcdGetOutfile("outmixfile"); emboss_openfile(embossoutmixfile, &outmixfile, &outmixfilename); } if(!permute) { justweights = ajAcdGetListSingle("justweights"); if(ajStrMatchC(justweights, "j")) justwts = true; } } } printdata = ajAcdGetBoolean("printdata"); if(printdata) dotdiff = ajAcdGetBoolean("dotdiff"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void seqboot_inputnumbersstate(AjPPhyloState state) { /* read numbers of species and of sites */ spp = state->Size; sites = state->Len; loci = sites; maxalleles = 1; } /* seqboot_inputnumberstate */ void seqboot_inputfactors(AjPPhyloProp fact) { long i, j; Char ch, prevch; AjPStr str; prevch = ' '; str = fact->Str[0]; j = 0; for (i = 0; i < (sites); i++) { ch = ajStrGetCharPos(str,i); if (ch != prevch) j++; prevch = ch; factorr[i] = j; } } /* seqboot_inputfactors */ void inputoptions() { /* input the information on the options */ long weightsum, maxfactsize, i, j, k, l, m; if (data == genefreqs) { k = 0; l = 0; for (i = 0; i < (loci); i++) { m = alleles[i]; k++; for (j = 1; j <= m; j++) { l++; factorr[l - 1] = k; } } } else { for (i = 1; i <= (sites); i++) factorr[i - 1] = i; } if(factors){ seqboot_inputfactors(phylofact); } for (i = 0; i < (sites); i++) oldweight[i] = 1; if (weights) inputweightsstr2(phyloweights->Str[0],0, sites, &weightsum, oldweight, &weights, "seqboot"); if (factors && printdata) { for(i = 0; i < sites; i++) factor[i] = (char)('0' + (factorr[i]%10)); printfactors(outfile, sites, factor, " (least significant digit)"); } if (weights && printdata) printweights(outfile, 0, sites, oldweight, "Sites"); for (i = 0; i < (loci); i++) how_many[i] = 0; for (i = 0; i < (loci); i++) where[i] = 0; for (i = 1; i <= (sites); i++) { how_many[factorr[i - 1] - 1]++; if (where[factorr[i - 1] - 1] == 0) where[factorr[i - 1] - 1] = i; } groups = factorr[sites - 1]; newgroups = 0; newsites = 0; maxfactsize = 0; for(i = 0 ; i < loci ; i++){ if(how_many[i] > maxfactsize){ maxfactsize = how_many[i]; } } maxnewsites = groups * maxfactsize; allocnew(); for (i = 0; i < (groups); i++) { if (oldweight[where[i] - 1] > 0) { newgroups++; newsites += how_many[i]; newwhere[newgroups - 1] = where[i]; newhowmany[newgroups - 1] = how_many[i]; } } } /* inputoptions */ void seqboot_inputdatastate(AjPPhyloState state) { /* input the names and sequences for each species */ long i, j, k, l, m, n; Char charstate; AjPStr str; boolean allread, done; nodep = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < (spp); i++) nodep[i] = (Char *)Malloc(sites*sizeof(Char)); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); if (data == morphology) fprintf(outfile, " character\n\n"); if (data == restsites) fprintf(outfile, " site\n\n"); } else { if (ild) { if (data == morphology) fprintf(outfile, "Character"); if (data == restsites) fprintf(outfile, "Site"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) if (data == morphology) fprintf(outfile, "Character"); if (data == restsites) fprintf(outfile, "Site"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, ", spp); if (data == seqs) fprintf(outfile, "%3ld sites\n\n", sites); else if (data == morphology) fprintf(outfile, "%3ld characters\n\n", sites); else if (data == restsites) fprintf(outfile, "%3ld sites\n\n", sites); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } allread = false; while (!allread) { allread = true; i = 1; while (i <= spp) { initnamestate(state, i-1); str = state->Str[i-1]; j = 0; done = false; while (!done) { while (j < sites) { charstate = ajStrGetCharPos(str, j); uppercase(&charstate); j++; if (charstate == '.') charstate = nodep[0][j-1]; nodep[i-1][j-1] = charstate; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (!printdata) return; m = (sites - 1) / 60 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; n = (i - 1) * 60; for (k = n; k < l; k++) { if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) charstate = '.'; else charstate = nodep[j][k]; putc(charstate, outfile); if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdatastate */ void allocrest() { /* allocate memory for bookkeeping arrays */ oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); if (categories) category = (steptr)Malloc(sites*sizeof(long)); if (mixture) mixdata = (steptr)Malloc(sites*sizeof(long)); if (ancvar) ancdata = (steptr)Malloc(sites*sizeof(long)); where = (steptr)Malloc(loci*sizeof(long)); how_many = (steptr)Malloc(loci*sizeof(long)); factor = (Char *)Malloc(sites*sizeof(Char)); factorr = (steptr)Malloc(sites*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); } /* allocrest */ void allocnew(void) { /* allocate memory for arrays that depend on the lenght of the output sequence*/ long i; newwhere = (steptr)Malloc(loci*sizeof(long)); newhowmany = (steptr)Malloc(loci*sizeof(long)); newerwhere = (steptr)Malloc(loci*sizeof(long)); newerhowmany = (steptr)Malloc(loci*sizeof(long)); newerfactor = (steptr)Malloc(maxnewsites*maxalleles*sizeof(long)); charorder = (steptr *)Malloc(spp*sizeof(steptr)); for (i = 0; i < spp; i++) charorder[i] = (steptr)Malloc(maxnewsites*sizeof(long)); } void doinput(int argc, Char *argv[]) { /* reads the input data */ seqboot_inputnumbersstate(phylostate[0]); allocrest(); inputoptions(); seqboot_inputdatastate(phylostate[0]); } /* doinput */ void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; for (i = 0; i < (newgroups); i++) newerwhere[i] = 0; for (i = 0; i < (newgroups); i++) newerhowmany[i] = 0; newergroups = 0; newersites = 0; for (i = 0; i < (newgroups); i++) { for (j = 1; j <= (weight[i]); j++) { newergroups++; for (k = 1; k <= (newhowmany[i]); k++) { newersites++; newerfactor[newersites - 1] = newergroups; } newerwhere[newergroups - 1] = newwhere[i]; newerhowmany[newergroups - 1] = newhowmany[i]; } } } /* bootweights */ void sppermute(long n) { /* permute the species order as given in array sppord */ long i, j, k; for (i = 1; i <= (spp - 1); i++) { k = (long)((i+1) * randum(seed)); j = sppord[n - 1][i]; sppord[n - 1][i] = sppord[n - 1][k]; sppord[n - 1][k] = j; } } /* sppermute */ void charpermute(long m, long n) { /* permute the n+1 characters of species m+1 */ long i, j, k; for (i = 1; i <= (n - 1); i++) { k = (long)((i+1) * randum(seed)); j = charorder[m][i]; charorder[m][i] = charorder[m][k]; charorder[m][k] = j; } } /* charpermute */ void writedata() { /* write out one set of bootstrapped sequences */ long i, j, k, l, m, n, n2=0; double x; Char charstate; sppord = (long **)Malloc(newergroups*sizeof(long *)); for (i = 0; i < (newergroups); i++) sppord[i] = (long *)Malloc(spp*sizeof(long)); for (j = 1; j <= spp; j++) sppord[0][j - 1] = j; for (i = 1; i < newergroups; i++) { for (j = 1; j <= (spp); j++) sppord[i][j - 1] = sppord[i - 1][j - 1]; } if (!justwts || permute) { if (data == restsites && enzymes) fprintf(outfile, "%5ld %5ld% 4ld\n", spp, newergroups, nenzymes); else if (data == genefreqs) fprintf(outfile, "%5ld %5ld\n", spp, newergroups); else { if ((data == seqs) && !(bootstrap || jackknife || permute || ild || lockhart) && xml) fprintf(outfile, "\n"); else if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) { fprintf(outfile, "#NEXUS\n"); fprintf(outfile, "BEGIN DATA\n"); fprintf(outfile, " DIMENSIONS NTAX=%ld NCHAR=%ld;\n", spp, newersites); fprintf(outfile, " FORMAT"); fprintf(outfile, " interleave"); fprintf(outfile, " DATATYPE="); if (data == seqs) { switch (seq) { case (dna): fprintf(outfile, "DNA missing=N gap=-"); break; case (rna): fprintf(outfile, "RNA missing=N gap=-"); break; case (protein): fprintf(outfile, "protein missing=? gap=-"); break; } } if (data == morphology) fprintf(outfile, "STANDARD"); fprintf(outfile, ";\n MATRIX\n"); } else fprintf(outfile, "%5ld %5ld\n", spp, newersites); } if (data == genefreqs) { for (i = 0; i < (newergroups); i++) fprintf(outfile, " %3ld", alleles[factorr[newerwhere[i] - 1] - 1]); putc('\n', outfile); } } l = 1; if ((!(bootstrap || jackknife || permute || ild || lockhart | nexus)) && ((data == seqs) || (data == restsites))) { interleaved = !interleaved; if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) interleaved = false; } if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; for (j = 0; j < spp; j++) { n = 0; if ((l == 1) || (interleaved && nexus)) { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { fprintf(outfile, " \n"); fprintf(outfile, " "); } n2 = nmlngth-1; if (!(bootstrap || jackknife || permute || ild || lockhart) && (xml || nexus)) { while (nayme[j][n2] == ' ') n2--; } if (nexus) fprintf(outfile, " "); for (k = 0; k <= n2; k++) if (nexus && (nayme[j][k] == ' ') && (k < n2)) putc('_', outfile); else putc(nayme[j][k], outfile); if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) fprintf(outfile, "\n "); } else { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { fprintf(outfile, " "); } else { for (k = 1; k <= nmlngth; k++) putc(' ', outfile); } } if (nexus) for (k = 0; k < nmlngth+1-n2; k++) fprintf(outfile, " "); for (k = l - 1; k < m; k++) { if (permute && j + 1 == 1) sppermute(newerfactor[n]); /* we can assume chars not permuted */ for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (data == genefreqs) { if (n > 1 && (n & 7) == 1) fprintf(outfile, "\n "); x = nodef[sppord[newerfactor[charorder[j][n - 1]] - 1][j] - 1] [newerwhere[charorder[j][k]] + n2]; fprintf(outfile, "%8.5f", x); } else { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); else if (!nexus && !interleaved && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); charstate = nodep[sppord[newerfactor[charorder[j][n - 1]] - 1] [j] - 1][newerwhere[charorder[j][k]] + n2]; putc(charstate, outfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfile); } } } if (!(bootstrap || jackknife || permute || ild || lockhart ) && xml) { fprintf(outfile, "\n \n"); } putc('\n', outfile); } if (interleaved) { if ((m <= newersites) && (newersites > 60)) putc('\n', outfile); l += 60; m += 60; } } while (interleaved && l <= newersites); if ((data == seqs) && (!(bootstrap || jackknife || permute || ild || lockhart) && xml)) fprintf(outfile, "\n"); if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) fprintf(outfile, " ;\nEND;\n"); for (i = 0; i < (newergroups); i++) free(sppord[i]); free(sppord); } /* writedata */ void writeweights() { /* write out one set of post-bootstrapping weights */ long j, k, l, m, n, o; j = 0; l = 1; if (interleaved) m = 60; else m = sites; do { if(m > sites) m = sites; n = 0; for (k = l - 1; k < m; k++) { for(o = 0 ; o < how_many[k] ; o++){ if(oldweight[k]==0){ fprintf(outweightfile, "0"); j++; } else{ if (weight[k-j] < 10) fprintf(outweightfile, "%c", (char)('0'+weight[k-j])); else fprintf(outweightfile, "%c", (char)('A'+weight[k-j]-10)); n++; if (!interleaved && n > 1 && n % 60 == 1) { fprintf(outweightfile, "\n"); if (n % 10 == 0 && n % 60 != 0) putc(' ', outweightfile); } } } } putc('\n', outweightfile); if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= sites); } /* writeweights */ void writecategories() { /* write out categories for the bootstrapped sequences */ long k, l, m, n, n2; Char charstate; if(justwts){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n=0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[k]; putc(charstate, outcatfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outcatfile, "\n"); return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[newerwhere[k] + n2]; putc(charstate, outcatfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outcatfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outcatfile, "\n"); } /* writecategories */ void writeauxdata(steptr auxdata, FILE *outauxfile) { /* write out auxiliary option data (mixtures, ancestors, ect) to appropriate file. Samples parralel to data, or just gives one output entry if justwts is true */ long k, l, m, n, n2; Char charstate; /* if we just output weights (justwts), and this is first set just output the data unsampled */ if(justwts){ if(firstrep){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[k]; putc(charstate, outauxfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outauxfile, "\n"); } return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[newerwhere[k] + n2]; putc(charstate, outauxfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outauxfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outauxfile, "\n"); } /* writeauxdata */ void writefactors(void) { long k, l, m, n, prevfact, writesites; char symbol; steptr wfactor; if(!justwts || firstrep){ if(justwts){ writesites = sites; wfactor = factorr; } else { writesites = newersites; wfactor = newerfactor; } prevfact = wfactor[0]; symbol = '+'; if (interleaved) m = 60; else m = writesites; l=1; do { if(m > writesites) m = writesites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outfactfile, "\n "); if(prevfact != wfactor[k]){ symbol = (symbol == '+') ? '-' : '+'; prevfact = wfactor[k]; } putc(symbol, outfactfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfactfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= writesites); fprintf(outfactfile, "\n"); } } /* writefactors */ void bootwrite() { /* does bootstrapping and writes out data sets */ long i, j, rr, repdiv10; if (!(bootstrap || jackknife || permute || ild || lockhart)) reps = 1; repdiv10 = reps / 10; if (repdiv10 < 1) repdiv10 = 1; if (progress) putchar('\n'); for (rr = 1; rr <= (reps); rr++) { for (i = 0; i < spp; i++) for (j = 0; j < maxnewsites; j++) charorder[i][j] = j; if(rr==1) firstrep = true; else firstrep = false; if (ild) { charpermute(0, maxnewsites); for (i = 1; i < spp; i++) for (j = 0; j < maxnewsites; j++) charorder[i][j] = charorder[0][j]; } if (lockhart) for (i = 0; i < spp; i++) charpermute(i, maxnewsites); bootweights(); if (!justwts || permute || ild || lockhart) writedata(); if (justwts && !(permute || ild || lockhart)) writeweights(); if (categories) writecategories(); if (factors) writefactors(); if (mixture) writeauxdata(mixdata, outmixfile); if (ancvar) writeauxdata(ancdata, outancfile); if (progress && (bootstrap || jackknife || permute || ild || lockhart) && ((reps < 10) || rr % repdiv10 == 0)) { printf("completed replicate number %4ld\n", rr); #ifdef WIN32 phyFillScreenColor(); #endif } } if (progress) { if (justwts) printf("\nOutput weights written to file \"%s\"\n\n", outweightfilename); else printf("\nOutput written to file \"%s\"\n\n", outfilename); } } /* bootwrite */ int main(int argc, Char *argv[]) { /* Read in sequences or frequencies and bootstrap or jackknife them */ #ifdef MAC argc = 1; /* macsetup("SeqBoot",""); */ argv[0] = "SeqBoot"; #endif init(argc,argv); emboss_getoptions("fdiscboot", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; doinput(argc, argv); bootwrite(); FClose(infile); if (weights) FClose(weightfile); if (categories) { FClose(catfile); FClose(outcatfile); } if(mixture) FClose(outmixfile); if(ancvar) FClose(outancfile); if (justwts && !permute) { FClose(outweightfile); } else FClose(outfile); #ifdef MAC fixmacfile(outfilename); if (justwts && !permute) fixmacfile(outweightfilename); if (categories) fixmacfile(outcatfilename); if (mixture) fixmacfile(outmixfilename); #endif if(progress) printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/phylip.c0000664000175000017500000021676411605067345012555 00000000000000 /* version 3.6. (c) Copyright 1993-2002 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, and Dan Fineman. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #ifdef OSX_CARBON #include #endif /* OSX_CARBON */ #include #include #include "phylip.h" #ifdef WIN32 #include /* for console code (clear screen, text color settings) */ CONSOLE_SCREEN_BUFFER_INFO savecsbi; boolean savecsbi_valid = false; HANDLE hConsoleOutput; void phyClearScreen(); void phySaveConsoleAttributes(); void phySetConsoleAttributes(); void phyRestoreConsoleAttributes(); void phyFillScreenColor(); #endif static void emboss_printtreenode(node *p, node* root); long countsemic(char *treestr); #if defined(OSX_CARBON) && defined(__MWERKS__) boolean fixedpath = false; #endif /* WIN32 */ FILE *outfile, *infile, *intree, *intree2, *outtree, *weightfile, *catfile, *ancfile, *mixfile, *factfile; AjPFile embossinfile; AjPFile embossoutfile; AjPFile embossintree; AjPFile embossintree2; AjPFile embossouttree; AjPFile embossweightfile; AjPFile embosscatfile; AjPFile embossancfile; AjPFile embossmixfile; AjPFile embossfactfile; long spp, words, bits; boolean ibmpc, ansi, tranvsp; naym *nayme; /* names of species */ void init(int argc, char** argv) { /* initialization routine for all programs * anything done at the beginning for every program should be done here */ /* set up signal handler for * segfault, floating point exception, illegal instruction, bad pipe, bus error * there are more signals that can cause a crash, but these are the most common * even these aren't found on all machines. */ } int filexists(char *filename) { /* check whether file already exists */ FILE *fp; fp =fopen(filename,"r"); if (fp) { fclose(fp); return 1; } else return 0; } /*filexists*/ const char* get_command_name (const char *vektor) { /* returns the name of the program from vektor without the whole path */ char *last_slash; /* Point to the last slash... */ last_slash = strrchr (vektor, DELIMITER); if (last_slash) /* If there was a last slash, return the character after it */ return last_slash + 1; else /* If not, return the vector */ return vektor; } /* get_command_name */ void EOF_error() { /* Print a message and exit when EOF is reached prematurely. */ puts("\n\nERROR: Unexpected end-of-file.\n"); exxit(-1); } /* EOF_error */ void getstryng(char *fname) { /* read in a file name from stdin and take off newline if any */ char *end; fflush(stdout); fname = fgets(fname, FNMLNGTH, stdin); if ( fname == NULL ) EOF_error(); if ( (end = strpbrk(fname, "\n\r")) != NULL) *end = '\0'; } /* getstryng */ void countup(long *loopcount, long maxcount) { /* count how many times this loop has tried to read data, bail out if exceeds maxcount */ (*loopcount)++; if ((*loopcount) >= maxcount) { ajErr("Made %ld attempts to read input in loop. Aborting run.", *loopcount); exxit(-1); } } /* countup */ void emboss_openfile(AjPFile outfile, FILE **fp, const char **perm) { if (outfile) { *fp = ajFileGetFileptr(outfile); outfile->fp = NULL; } else *fp = NULL; ajDebug("phylip emboss_openfile '%F'\n", outfile); *perm = ajFileGetNameC(outfile); return; } void cleerhome() { /* home cursor and clear screen, if possible */ #ifdef WIN32 if(ibmpc || ansi){ phyClearScreen(); } else { printf("\n\n"); } #else printf("%s", ((ibmpc || ansi) ? ("\033[2J\033[H") : "\n\n")); #endif } /* cleerhome */ double randum(longer seed) { /* random number generator -- slow but machine independent This is a multiplicative congruential 32-bit generator x(t+1) = 1664525 * x(t) mod 2^32, one that passes the Coveyou-Macpherson and Lehmer tests, see Knuth ACP vol. 2 We here implement it representing each integer in base-64 notation -- i.e. as an array of 6 six-bit chunks */ long i, j, k, sum; longer mult, newseed; double x; mult[0] = 13; /* these four statements set the multiplier */ mult[1] = 24; /* -- they are its "digits" in a base-64 */ mult[2] = 22; /* notation: 1664525 = 6*64^3+22*64^2 */ mult[3] = 6; /* +24*64+13 */ for (i = 0; i <= 5; i++) newseed[i] = 0; for (i = 0; i <= 5; i++) { /* do the multiplication piecewise */ sum = newseed[i]; k = i; if (i > 3) k = 3; for (j = 0; j <= k; j++) sum += mult[j] * seed[i - j]; newseed[i] = sum; for (j = i; j <= 4; j++) { newseed[j + 1] += newseed[j] / 64; newseed[j] &= 63; } } memcpy(seed, newseed, sizeof(longer)); /* new seed replaces old one */ seed[5] &= 3; /* from the new seed, get a floating point fraction */ x = 0.0; for (i = 0; i <= 5; i++) x = x / 64.0 + seed[i]; x /= 4.0; return x; } /* randum */ void randumize(longer seed, long *enterorder) { /* randomize input order of species -- randomly permute array enterorder */ long i, j, k; for (i = 0; i < spp; i++) { j = (long)(randum(seed) * (i+1)); k = enterorder[j]; enterorder[j] = enterorder[i]; enterorder[i] = k; } } /* randumize */ double normrand(longer seed) {/* standardized Normal random variate */ double x; x = randum(seed)+randum(seed)+randum(seed)+randum(seed) + randum(seed)+randum(seed)+randum(seed)+randum(seed) + randum(seed)+randum(seed)+randum(seed)+randum(seed)-6.0; return(x); } /* normrand */ void uppercase(Char *ch) { /* convert ch to upper case */ *ch = (islower ((int)*ch) ? toupper((int)*ch) : ((int)*ch)); } /* uppercase */ /* @func emboss_initseed ****************************************************** ** ** Given a random number seed (inseed) ** ** Increments it until it gives a remainder of 1 when divided by 4 ** and returns the resulting corrected seed as *inseed0 ** ** Also returns an array of 6 seed values in seed array ** ******************************************************************************/ void emboss_initseed(long inseed, long *inseed0, longer seed) { /* input random number seed */ long i; long myinseed = inseed; while ((myinseed & 3)!=1) /* must be an 4n+1 - see main.html */ myinseed++; *inseed0 = myinseed; for (i = 0; i <= 5; i++) seed[i] = 0; i = 0; do { seed[i] = myinseed & 63; myinseed /= 64; i++; } while (myinseed != 0); } /*emboss_initseed*/ void emboss_initoutgroup(long *outgrno, long spp) { /* validate outgroup number against number of species */ if (spp < 1) { ajDie("Cannot set outgroup number: species count spp %ld less than 1", spp); } if (*outgrno > spp) { ajWarn("Bad outgroup number: %ld, set to maximum group %ld", *outgrno, spp); *outgrno = spp; } ajDebug("emboss_initoutgroup spp: %ld => outgrno %ld\n", spp, *outgrno); } /*initoutgroup*/ void emboss_initcatn(long *categs) { /* initialize category number for rate categories */ if (*categs > maxcategs) *categs = maxcategs; } /*initcatn*/ void emboss_initcategs(AjPFloat arrayvals, long categs, double *rate) { /* initialize category rates */ long i; long maxi; if (!rate) return; maxi = ajFloatLen(arrayvals); if (maxi != categs) ajWarn("HMM category rates read %d values, expected %d values", maxi, categs); for (i=0; i < categs; i++) { if (i > maxi) rate[i] = 0.0; else rate[i] = ajFloatGet(arrayvals, i); } } /*initrcategs*/ double emboss_initprobcat(AjPFloat arrayvals, long categs, double *probcat) { /* input probabilities of rate categories for HMM rates */ long i; long maxi; double probsum = 0.0; if (!categs) return probsum; maxi = ajFloatLen(arrayvals); if (maxi != categs) ajWarn("Category probabilities read %d values, expected %d values", maxi, categs); for (i=0; i < categs; i++) { if (i > maxi) probcat[i] = 0.0; else probcat[i] = ajFloatGet(arrayvals, i); probsum += probcat[i]; } return probsum; } /*initprobcat*/ void lgr(long m, double b, raterootarray lgroot) { /* For use by initgammacat. Get roots of m-th Generalized Laguerre polynomial, given roots of (m-1)-th, these are to be stored in lgroot[m][] */ long i; double upper, lower, x, y; boolean dwn; /* is function declining in this interval? */ if (m == 1) { lgroot[1][1] = 1.0+b; } else { dwn = true; for (i=1; i<=m; i++) { if (i < m) { if (i == 1) lower = 0.0; else lower = lgroot[m-1][i-1]; upper = lgroot[m-1][i]; } else { /* i == m, must search above */ lower = lgroot[m-1][i-1]; x = lgroot[m-1][m-1]; do { x = 2.0*x; y = glaguerre(m, b, x); } while ((dwn && (y > 0.0)) || ((!dwn) && (y < 0.0))); upper = x; } while (upper-lower > 0.000000001) { x = (upper+lower)/2.0; if (glaguerre(m, b, x) > 0.0) { if (dwn) lower = x; else upper = x; } else { if (dwn) upper = x; else lower = x; } } lgroot[m][i] = (lower+upper)/2.0; dwn = !dwn; /* switch for next one */ } } } /* lgr */ double logfac (long n) { /* log(n!) values were calculated with Mathematica with a precision of 30 digits */ long i; double x; switch (n) { case 0: return 0.; case 1: return 0.; case 2: return 0.693147180559945309417232121458; case 3: return 1.791759469228055000812477358381; case 4: return 3.1780538303479456196469416013; case 5: return 4.78749174278204599424770093452; case 6: return 6.5792512120101009950601782929; case 7: return 8.52516136106541430016553103635; case 8: return 10.60460290274525022841722740072; case 9: return 12.80182748008146961120771787457; case 10: return 15.10441257307551529522570932925; case 11: return 17.50230784587388583928765290722; case 12: return 19.98721449566188614951736238706; default: x = 19.98721449566188614951736238706; for (i = 13; i <= n; i++) x += log(i); return x; } } /* logfac */ double glaguerre(long m, double b, double x) { /* Generalized Laguerre polynomial computed recursively. For use by initgammacat */ long i; double gln, glnm1, glnp1; /* L_n, L_(n-1), L_(n+1) */ if (m == 0) return 1.0; else { if (m == 1) return 1.0 + b - x; else { gln = 1.0+b-x; glnm1 = 1.0; for (i=2; i <= m; i++) { glnp1 = ((2*(i-1)+b+1.0-x)*gln - (i-1+b)*glnm1)/i; glnm1 = gln; gln = glnp1; } return gln; } } } /* glaguerre */ void initlaguerrecat(long categs, double alpha, double *rate, double *probcat) { /* calculate rates and probabilities to approximate Gamma distribution of rates with "categs" categories and shape parameter "alpha" using rates and weights from Generalized Laguerre quadrature */ long i; raterootarray lgroot; /* roots of GLaguerre polynomials */ double f, x, xi, y; alpha = alpha - 1.0; lgroot[1][1] = 1.0+alpha; for (i = 2; i <= categs; i++) lgr(i, alpha, lgroot); /* get roots for L^(a)_n */ /* here get weights */ /* Gamma weights are (1+a)(1+a/2) ... (1+a/n)*x_i/((n+1)^2 [L_{n+1}^a(x_i)]^2) */ f = 1; for (i = 1; i <= categs; i++) f *= (1.0+alpha/i); for (i = 1; i <= categs; i++) { xi = lgroot[categs][i]; y = glaguerre(categs+1, alpha, xi); x = f*xi/((categs+1)*(categs+1)*y*y); rate[i-1] = xi/(1.0+alpha); probcat[i-1] = x; } } /* initlaguerrecat */ double hermite(long n, double x) { /* calculates hermite polynomial with degree n and parameter x */ /* seems to be unprecise for n>13 -> root finder does not converge*/ double h1 = 1.; double h2 = 2. * x; double xx = 2. * x; long i; for (i = 1; i < n; i++) { xx = 2. * x * h2 - 2. * (i) * h1; h1 = h2; h2 = xx; } return xx; } /* hermite */ void root_hermite(long n, double *hroot) { /* find roots of Hermite polynmials */ long z; long ii; long start; if (n % 2 == 0) { start = n/2; z = 1; } else { start = n/2 + 1; z=2; hroot[start-1] = 0.0; } for (ii = start; ii < n; ii++) { /* search only upwards*/ hroot[ii] = halfroot(hermite, n, hroot[ii-1]+EPSILON, 1./n); hroot[start - z] = -hroot[ii]; z++; } } /* root_hermite */ double halfroot(double (*func)(long m, double x), long n, double startx, double delta) { /* searches from the bound (startx) only in one direction (by positive or negative delta, which results in other-bound=startx+delta) delta should be small. (*func) is a function with two arguments */ double xl; double xu; double xm = 0.0; double fu; double fl; double fm = 100000.; double gradient; boolean dwn = false; /* decide if we search above or below startx and escapes to trace back to the starting point that most often will be the root from the previous calculation */ if (delta < 0) { xu = startx; xl = xu + delta; } else { xl = startx; xu = xl + delta; } delta = fabs(delta); fu = (*func)(n, xu); fl = (*func)(n, xl); gradient = (fl-fu)/(xl-xu); while(fabs(fm) > EPSILON) { /* is root outside of our bracket?*/ if ((fu<0.0 && fl<0.0) || (fu>0.0 && fl > 0.0)) { xu += delta; fu = (*func)(n, xu); fl = (*func)(n, xl); gradient = (fl-fu)/(xl-xu); dwn = (gradient < 0.0) ? true : false; } else { xm = xl - fl / gradient; fm = (*func)(n, xm); if (dwn) { if (fm > 0.) { xl = xm; fl = fm; } else { xu = xm; fu = fm; } } else { if (fm > 0.) { xu = xm; fu = fm; } else { xl = xm; fl = fm; } } gradient = (fl-fu)/(xl-xu); } } return xm; } /* halfroot */ void hermite_weight(long n, double * hroot, double * weights) { /* calculate the weights for the hermite polynomial at the roots using formula from Abramowitz and Stegun chapter 25.4.46 p.890 */ long i; double hr2; double numerator; numerator = exp(0.6931471805599 * ( n-1.) + logfac(n)) / (n*n); for (i = 0; i < n; i++) { hr2 = hermite(n-1, hroot[i]); weights[i] = numerator / (hr2*hr2); } } /* hermiteweight */ void inithermitcat(long categs, double alpha, double *rate, double *probcat) { /* calculates rates and probabilities */ long i; double *hroot; double std; std = SQRT2 /sqrt(alpha); hroot = (double *) Malloc((categs+1) * sizeof(double)); root_hermite(categs, hroot); /* calculate roots */ hermite_weight(categs, hroot, probcat); /* set weights */ for (i=0; i= 100.0) inithermitcat(categs, alpha, rate, probcat); else initlaguerrecat(categs, alpha, rate, probcat); } /* initgammacat */ void inithowmany(long *howmanny, long howoften) {/* input how many cycles */ long loopcount; loopcount = 0; for (;;) { printf("How many cycles of %4ld trees?\n", howoften); fflush(stdout); if (scanf("%ld%*[^\n]", howmanny) == 1) { getchar(); if (*howmanny >= 1) break; } countup(&loopcount, 10); } } /*inithowmany*/ void inithowoften(long *howoften) { /* input how many trees per cycle */ long loopcount; loopcount = 0; for (;;) { printf("How many trees per cycle?\n"); fflush(stdout); if (scanf("%ld%*[^\n]", howoften) == 1) { getchar(); if (*howoften >= 1) break; } countup(&loopcount, 10); } } /*inithowoften*/ void initlambda(double *lambda) { /* input patch length parameter for autocorrelated HMM rates */ long loopcount; loopcount = 0; for (;;) { printf("Mean block length of sites having the same rate (greater than 1)?\n"); fflush(stdout); if (scanf("%lf%*[^\n]", lambda) == 1) { getchar(); if (*lambda > 1.0) break; } countup(&loopcount, 10); } *lambda = 1.0 / *lambda; } /* initlambda */ void initfreqs(double *freqa, double *freqc, double *freqg, double *freqt) { /* input frequencies of the four bases */ char input[100]; long scanned, loopcount; printf("Base frequencies for A, C, G, T/U (use blanks to separate)?\n"); loopcount = 0; do { fflush(stdout); getstryng(input); scanned = sscanf(input,"%lf%lf%lf%lf%*[^\n]", freqa, freqc, freqg, freqt); if (scanned == 4) break; else printf("Please enter exactly 4 values.\n"); countup(&loopcount, 100); } while (1); } /* initfreqs */ void initratio(double *ttratio) { /* input transition/transversion ratio */ long loopcount; loopcount = 0; for (;;) { printf("Transition/transversion ratio?\n"); fflush(stdout); if (scanf("%lf%*[^\n]", ttratio) == 1) { getchar(); if (*ttratio >= 0.0) break; else printf("Transition/transversion ratio cannot be negative.\n"); } countup(&loopcount, 10); } } /* initratio */ void initpower(double *power) { for (;;) { printf("New power?\n"); fflush(stdout); if (scanf("%lf%*[^\n]", power) == 1) { getchar(); break; } } } /* initpower */ void initdatasets(long *datasets) { /* handle multi-data set option */ long loopcount; loopcount = 0; for (;;) { printf("How many data sets?\n"); fflush(stdout); if (scanf("%ld%*[^\n]", datasets) == 1) { getchar(); if (*datasets > 1) break; else printf("Bad data sets number: it must be greater than 1\n"); } countup(&loopcount, 10); } } /* initdatasets */ void justweights(long *datasets) { /* handle multi-data set option by weights */ long loopcount; loopcount = 0; for (;;) { printf("How many sets of weights?\n"); fflush(stdout); if (scanf("%ld%*[^\n]", datasets) == 1) { getchar(); if (*datasets >= 1) break; else printf("BAD NUMBER: it must be greater than 1\n"); } countup(&loopcount, 10); } } /* justweights */ void initterminal(boolean *ibmpc, boolean *ansi) { /* handle terminal option */ if (*ibmpc) { *ibmpc = false; *ansi = true; } else if (*ansi) *ansi = false; else *ibmpc = true; } /*initterminal*/ void initbestrees(bestelm *bestrees, long maxtrees, boolean glob) { /* initializes either global or local field of each array in bestrees */ long i; if (glob) for (i = 0; i < maxtrees; i++) bestrees[i].gloreange = false; else for (i = 0; i < maxtrees; i++) bestrees[i].locreange = false; } /* initbestrees */ void newline(FILE *filename, long i, long j, long k) { /* go to new line if i is a multiple of j, indent k spaces */ long m; if ((i - 1) % j != 0 || i <= 1) return; putc('\n', filename); for (m = 1; m <= k; m++) putc(' ', filename); } /* newline */ void inputnumbersseq(AjPSeqset seqset, long *spp, long *chars, long *nonodes, long n) { int begin2,end2; /* input the numbers of species and of characters */ /* revised for EMBOSS to take numbers from seqset input */ ajSeqsetFmtUpper(seqset); *spp = ajSeqsetGetSize(seqset); *chars = ajSeqsetGetRange(seqset,&begin2,&end2); *nonodes = *spp * 2 - n; ajDebug("inputnumbersseq n: %ld spp: %ld chars: %ld nonodes: %ld\n", n, *spp, *chars, *nonodes); } /* inputnumbersseq */ void inputnumbersfreq(AjPPhyloFreq freq, long *spp, long *chars, long *nonodes, long n) { *spp = freq->Size; *chars = freq->Loci; *nonodes = *spp * 2 - n; } void inputnumbersstate(AjPPhyloState state, long *spp, long *chars, long *nonodes, long n) { ajDebug("inputnumbersstate size %d len %d\n", state->Size, state->Len); *spp = state->Size; *chars = state->Len; *nonodes = *spp * 2 - n; } void inputnumbers2seq(AjPPhyloDist dist, long *spp, long *nonodes, long n) { *spp = dist->Size; fprintf(outfile, "\n%4ld Populations\n", *spp); *nonodes = *spp * 2 - n; } /* inputnumbers2seq */ void samenumspfreq(AjPPhyloFreq freq, long *chars, long ith) { /* check if spp is same as the first set in other data sets */ if (freq->Size != spp) { ajErr("\nERROR: Inconsistent number of species in data set %ld", ith); exxit(-1); } *chars = freq->Loci; } /* samenumspfreq */ void samenumspstate(AjPPhyloState state, long *chars, long ith) { /* check if spp is same as the first set in other data sets */ if (state->Size != spp) { ajErr("\nERROR: Inconsistent number of species in data set %ld", ith); exxit(-1); } *chars = state->Len; } /* samenumspstate */ void samenumspseq(AjPSeqset set, long *chars, long ith) { /* check if spp is same as the first set in other data sets */ if (set->Size != spp) { ajErr("\nERROR: Inconsistent number of species in data set %ld", ith); exxit(-1); } *chars = set->Len; } /* samenumspstate */ void samenumspseq2(AjPPhyloDist set, long ith) { /* check if spp is same as the first set in other data sets */ if (set->Size != spp) { ajErr("\nERROR: Inconsistent number of species in data set %ld", ith); exxit(-1); } } /* samenumspphylodist */ void inputweightsstr(AjPStr wtstr, long chars, steptr weight, boolean *weights) { Char ch; int i; for (i = 0; i < chars; i++) { ch = ajStrGetCharPos(wtstr, i); weight[i] = 1; if (isdigit((int) ch)) weight[i] = ch - '0'; else if (isalpha((int) ch)) { uppercase(&ch); weight[i] = ch - 'A' + 10; } else { ajErr("ERROR: Bad weight character: %c", ch); exxit(-1); } } *weights = true; } /*inputweightsstr*/ void inputweightsstr2(AjPStr str, long a, long b, long *weightsum, steptr weight, boolean *weights, const char *prog) { /* input the character weights, 0 or 1 */ Char ch = '\0'; long i; *weightsum = 0; for (i = a; i < b; i++) { ch = ajStrGetCharPos(str, i-1); weight[i] = 1; if (ch == '0' || ch == '1') weight[i] = ch - '0'; else { ajErr("ERROR: Bad weight character: %c -- " "weights in %s must be 0 or 1\n", ch, prog); exxit(-1); } *weightsum += weight[i]; } *weights = true; } void printweights(FILE *filename, long inc, long chars, steptr weight, const char *letters) { /* print out the weights of sites */ long i, j; boolean letterweights; letterweights = false; for (i = 0; i < chars; i++) if (weight[i] > 9) letterweights = true; fprintf(filename, "\n %s are weighted as follows:", letters); if (letterweights) fprintf(filename, " (A = 10, B = 11, etc.)\n"); else putc('\n', filename); for (i = 0; i < chars; i++) { if (i % 60 == 0) { putc('\n', filename); for (j = 1; j <= nmlngth + 3; j++) putc(' ', filename); } if (weight[i+inc] < 10) fprintf(filename, "%ld", weight[i + inc]); else fprintf(filename, "%c", 'A'-10+(int)weight[i + inc]); if ((i+1) % 5 == 0 && (i+1) % 60 != 0) putc(' ', filename); } fprintf(filename, "\n\n"); } /* printweights */ void inputcategsstr(AjPStr str, long a, long b, steptr category, long categs, const char *prog) { /* input the categories, 1-9 */ Char ch; long i; for (i = a; i < b; i++) { ch = ajStrGetCharPos(str, i); if ((ch >= '1') && (ch <= ('0'+categs))) category[i] = ch - '0'; } } void printcategs(FILE *filename, long chars, steptr category, const char *letters) { /* print out the sitewise categories */ long i, j; fprintf(filename, "\n %s are:\n", letters); for (i = 0; i < chars; i++) { if (i % 60 == 0) { putc('\n', filename); for (j = 1; j <= nmlngth + 3; j++) putc(' ', filename); } fprintf(filename, "%ld", category[i]); if ((i+1) % 10 == 0 && (i+1) % 60 != 0) putc(' ', filename); } fprintf(filename, "\n\n"); } /* printcategs */ void inputfactorsstr(AjPStr str, long chars, Char *factor, boolean *factors) { /* reads the factor symbols */ long i; for (i = 0; i < (chars); i++) { factor[i] = ajStrGetCharPos(str, i); } *factors = true; } /* inputfactorsnew */ void printfactors(FILE *filename, long chars, Char *factor, const char *letters) { /* print out list of factor symbols */ long i; fprintf(filename, "Factors%s:\n\n", letters); for (i = 1; i <= nmlngth - 5; i++) putc(' ', filename); for (i = 1; i <= (chars); i++) { newline(filename, i, 55, nmlngth + 3); putc(factor[i - 1], filename); if (i % 5 == 0) putc(' ', filename); } putc('\n', filename); } /* printfactors */ void headings(long chars, const char *letters1, const char *letters2) { long i, j; putc('\n', outfile); j = nmlngth + (chars + (chars - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "%s\n", letters1); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "%s\n\n", letters2); } /* headings */ void initnamestate(AjPPhyloState state, long i) { /* read in species name */ long j; AjPStr names = state->Names[i]; for (j = 0; j < nmlngth; j++) { if (j < ajStrGetLen(names)) nayme[i][j] = ajStrGetCharPos(names, j); else nayme[i][j] = ' '; if ((nayme[i][j] == '(') || (nayme[i][j] == ')') || (nayme[i][j] == ':') || (nayme[i][j] == ',') || (nayme[i][j] == ';') || (nayme[i][j] == '[') || (nayme[i][j] == ']')) { ajErr("\nERROR: Species name may not contain characters ( ) : ; , [ ] \n" " In name of species number %ld there is character %c\n\n", i+1, nayme[i][j]); exxit(-1); } } } /* initnamestate */ void initnamedist(AjPPhyloDist dist, long i) { /* read in species name */ long j; AjPStr names = dist->Names[i]; for (j = 0; j < nmlngth; j++) { if (j < ajStrGetLen(names)) nayme[i][j] = ajStrGetCharPos(names, j); else nayme[i][j] = ' '; if ((nayme[i][j] == '(') || (nayme[i][j] == ')') || (nayme[i][j] == ':') || (nayme[i][j] == ',') || (nayme[i][j] == ';') || (nayme[i][j] == '[') || (nayme[i][j] == ']')) { ajErr("\nERROR: Species name may not contain characters ( ) : ; , [ ] \n" " In name of species number %ld there is character %c\n\n", i+1, nayme[i][j]); exxit(-1); } } } /* initnamedist */ void initnameseq(AjPSeqset set, long i) { /* read in species name */ long j; AjPStr names = ajStrNewS(ajSeqGetNameS(ajSeqsetGetseqSeq(set, i))); for (j = 0; j < nmlngth; j++) { if (j < ajStrGetLen(names)) nayme[i][j] = ajStrGetCharPos(names, j); else nayme[i][j] = ' '; if ((nayme[i][j] == '(') || (nayme[i][j] == ')') || (nayme[i][j] == ':') || (nayme[i][j] == ',') || (nayme[i][j] == ';') || (nayme[i][j] == '[') || (nayme[i][j] == ']')) { ajErr("\nERROR: Species name may not contain characters ( ) : ; , [ ] \n" " In name of species number %ld there is character %c\n\n", i+1, nayme[i][j]); exxit(-1); } } ajStrDel(&names); } /* initnameseq */ void initnamefreq(AjPPhyloFreq freq, long i) { /* read in species name */ long j; AjPStr names = freq->Names[i]; for (j = 0; j < nmlngth; j++) { if (j < ajStrGetLen(names)) nayme[i][j] = ajStrGetCharPos(names, j); else nayme[i][j] = ' '; if ((nayme[i][j] == '(') || (nayme[i][j] == ')') || (nayme[i][j] == ':') || (nayme[i][j] == ',') || (nayme[i][j] == ';') || (nayme[i][j] == '[') || (nayme[i][j] == ']')) { ajErr("\nERROR: Species name may not contain characters ( ) : ; , [ ] \n" " In name of species number %ld there is character %c\n\n", i+1, nayme[i][j]); exxit(-1); } } } /* initnamefreq */ void findtree(boolean *found, long *pos, long nextree, long *place, bestelm *bestrees) { /* finds tree given by array place in array bestrees by binary search */ /* used by dnacomp, dnapars, dollop, mix, & protpars */ long i, lower, upper; boolean below, done; below = false; lower = 1; upper = nextree - 1; (*found) = false; while (!(*found) && lower <= upper) { (*pos) = (lower + upper) / 2; i = 3; done = false; while (!done) { done = (i > spp); if (!done) done = (place[i - 1] != bestrees[(*pos) - 1].btree[i - 1]); if (!done) i++; } (*found) = (i > spp); if (*found) break; below = (place[i - 1] < bestrees[(*pos )- 1].btree[i - 1]); if (below) upper = (*pos) - 1; else lower = (*pos) + 1; } if (!(*found) && !below) (*pos)++; } /* findtree */ void addtree(long pos, long *nextree, boolean collapse, long *place, bestelm *bestrees) { /* puts tree from array place in its proper position in array bestrees */ /* used by dnacomp, dnapars, dollop, mix, & protpars */ long i; for (i = *nextree - 1; i >= pos; i--){ memcpy(bestrees[i].btree, bestrees[i - 1].btree, spp * sizeof(long)); bestrees[i].gloreange = bestrees[i - 1].gloreange; bestrees[i - 1].gloreange = false; bestrees[i].locreange = bestrees[i - 1].locreange; bestrees[i - 1].locreange = false; bestrees[i].collapse = bestrees[i - 1].collapse; } for (i = 0; i < spp; i++) bestrees[pos - 1].btree[i] = place[i]; bestrees[pos - 1].collapse = collapse; (*nextree)++; } /* addtree */ long findunrearranged(bestelm *bestrees, long nextree, boolean glob) { /* finds bestree with either global or local field false */ long i; if (glob) { for (i = 0; i < nextree - 1; i++) if (!bestrees[i].gloreange) return i; } else { for (i = 0; i < nextree - 1; i++) if (!bestrees[i].locreange) return i; } return -1; } /* findunrearranged */ boolean torearrange(bestelm *bestrees, long nextree) { /* sees if any best tree is yet to be rearranged */ if (findunrearranged(bestrees, nextree, true) >= 0) return true; else if (findunrearranged(bestrees, nextree, false) >= 0) return true; else return false; } /* torearrange */ void reducebestrees(bestelm *bestrees, long *nextree) { /* finds best trees with collapsible branches and deletes them */ long i, j; i = 0; j = *nextree - 2; do { while (!bestrees[i].collapse && i < *nextree - 1) i++; while (bestrees[j].collapse && j >= 0) j--; if (i < j) { memcpy(bestrees[i].btree, bestrees[j].btree, spp * sizeof(long)); bestrees[i].gloreange = bestrees[j].gloreange; bestrees[i].locreange = bestrees[j].locreange; bestrees[i].collapse = false; bestrees[j].collapse = true; } } while (i < j); *nextree = i + 1; } /* reducebestrees */ void shellsort(double *a, long *b, long n) { /* Shell sort keeping a, b in same order */ /* used by dnapenny, dolpenny, & penny */ long gap, i, j, itemp; double rtemp; gap = n / 2; while (gap > 0) { for (i = gap + 1; i <= n; i++) { j = i - gap; while (j > 0) { if (a[j - 1] > a[j + gap - 1]) { rtemp = a[j - 1]; a[j - 1] = a[j + gap - 1]; a[j + gap - 1] = rtemp; itemp = b[j - 1]; b[j - 1] = b[j + gap - 1]; b[j + gap - 1] = itemp; } j -= gap; } } gap /= 2; } } /* shellsort */ void sgetch(Char *c, long *parens, char **treestr) { /* get next nonblank character */ do { (*c) = *(*treestr)++; if ((*c) == '\n' || (*c) == '\t') (*c) = ' '; } while ( *c == ' ' && (**treestr) ); if ((*c) == '(') (*parens)++; if ((*c) == ')') (*parens)--; } /* sgetch */ void processlength(double *valyew, double *divisor, Char *ch, boolean *lengthIsNegative, char **treestr, long *parens) { /* read a branch length from a treestr */ long digit, ordzero, exponent, exponentIsNegative; boolean pointread, hasExponent; ordzero = '0'; *lengthIsNegative = false; pointread = false; hasExponent = false; exponentIsNegative = -1; // 3 states: -1 = unassigned, 1 = true, 0 = false exponent = 0; *valyew = 0.0; *divisor = 1.0; sgetch(ch, parens, treestr); if ('+' == *ch) sgetch(ch, parens, treestr); // ignore leading '+', because "+1.2345" == "1.2345" else if ('-' == *ch) { *lengthIsNegative = true; sgetch(ch, parens, treestr); } digit = (long)(*ch - ordzero); while ( ((digit <= 9) && (digit >= 0)) || '.' == *ch || '-' == *ch || '+' == *ch || 'E' == *ch || 'e' == *ch) { if ('.' == *ch ) { if (!pointread) pointread = true; else { printf("\n\nERROR: Branch length found with more than one \'.\' in it.\n\n"); exxit(-1); } } else if ('+' == *ch) { if (hasExponent && -1 == exponentIsNegative) exponentIsNegative = 0; // 3 states: -1 = unassigned, 1 = true, 0 = false else { printf("\n\nERROR: Branch length found with \'+\' in an unexpected place.\n\n"); exxit(-1); } } else if ('-' == *ch) { if (hasExponent && -1 == exponentIsNegative) exponentIsNegative = 1; // 3 states: -1 = unassigned, 1 = true, 0 = false else { printf("\n\nERROR: Branch length found with \'-\' in an unexpected place.\n\n"); exxit(-1); } } else if ('E' == *ch || 'e' == *ch) { if (!hasExponent) hasExponent = true; else { printf("\n\nERROR: Branch length found with more than one \'E\' in it.\n\n"); exxit(-1); } } else { if (!hasExponent) { *valyew = *valyew * 10.0 + digit; if (pointread) *divisor *= 10.0; } else exponent = 10*exponent + digit; } sgetch(ch, parens, treestr); digit = (long)(*ch - ordzero); } if (hasExponent) { if (exponentIsNegative) *divisor *= pow(10.,(double)exponent); else *divisor /= pow(10.,(double)exponent); } if (*lengthIsNegative) *valyew = -(*valyew); } /* processlength */ void writename(long start, long n, long *enterorder) { /* write species name and number in entry order */ long i, j; for (i = start; i < start+n; i++) { printf(" %3ld. ", i+1); for (j = 0; j < nmlngth; j++) putchar(nayme[enterorder[i] - 1][j]); putchar('\n'); fflush(stdout); } } /* writename */ void memerror() { printf("Error allocating memory\n"); exxit(-1); } /* memerror */ void odd_malloc(long x) { /* error message if attempt to malloc too little or too much memory */ printf ("ERROR: a function asked for an inappropriate amount of memory:"); printf (" %ld bytes\n", x); printf (" This can mean one of two things:\n"); printf (" 1. The input file is incorrect"); printf (" (perhaps it was not saved as Text Only),\n"); printf (" 2. There is a bug in the program.\n"); printf (" Please check your input file carefully.\n"); printf (" If it seems to be a bug, please mail joe (at) gs.washington.edu\n"); printf (" with the name of the program, your computer system type,\n"); printf (" a full description of the problem, and with the input data file.\n"); printf (" (which should be in the body of the message, not as an Attachment).\n"); /* abort() can be used to crash */ exxit(-1); } MALLOCRETURN *mymalloc(long x) { /* wrapper for malloc, allowing error message if too little, too much */ MALLOCRETURN *new_block; if ((x <= 0) || (x > TOO_MUCH_MEMORY)) odd_malloc(x); new_block = (MALLOCRETURN *)calloc(1, x); if (!new_block) { memerror(); return (MALLOCRETURN *) new_block; } else return (MALLOCRETURN *) new_block; } /* mymalloc */ void gnu(node **grbg, node **p) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (*grbg != NULL) { *p = *grbg; *grbg = (*grbg)->next; } else *p = (node *)Malloc(sizeof(node)); (*p)->back = NULL; (*p)->next = NULL; (*p)->tip = false; (*p)->times_in_tree = 0.0; (*p)->r = 0.0; (*p)->theta = 0.0; (*p)->x = NULL; (*p)->protx = NULL; /* for the sake of proml */ } /* gnu */ void chuck(node **grbg, node *p) { /* collect garbage on p -- put it on front of garbage list */ p->back = NULL; p->next = *grbg; *grbg = p; } /* chuck */ void zeronumnuc(node *p, long endsite) { long i,j; for (i = 0; i < endsite; i++) for (j = (long)A; j <= (long)O; j++) p->numnuc[i][j] = 0; } /* zeronumnuc */ void zerodiscnumnuc(node *p, long endsite) { long i,j; for (i = 0; i < endsite; i++) for (j = (long)zero; j <= (long)seven; j++) p->discnumnuc[i][j] = 0; } /* zerodiscnumnuc */ void allocnontip(node *p, long *zeros, long endsite) { /* allocate an interior node */ /* used by dnacomp, dnapars, & dnapenny */ p->numsteps = (steptr)Malloc(endsite*sizeof(long)); p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); p->base = (baseptr)Malloc(endsite*sizeof(long)); p->oldbase = (baseptr)Malloc(endsite*sizeof(long)); p->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); memcpy(p->base, zeros, endsite*sizeof(long)); memcpy(p->numsteps, zeros, endsite*sizeof(long)); memcpy(p->oldbase, zeros, endsite*sizeof(long)); memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); zeronumnuc(p, endsite); } /* allocnontip */ void allocdiscnontip(node *p, long *zeros, unsigned char *zeros2, long endsite) { /* allocate an interior node */ /* used by pars */ p->numsteps = (steptr)Malloc(endsite*sizeof(long)); p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); p->discbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); p->olddiscbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); p->discnumnuc = (discnucarray *)Malloc(endsite*sizeof(discnucarray)); memcpy(p->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(p->numsteps, zeros, endsite*sizeof(long)); memcpy(p->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); zerodiscnumnuc(p, endsite); } /* allocdiscnontip */ void allocnode(node **anode, long *zeros, long endsite) { /* allocate a node */ /* used by dnacomp, dnapars, & dnapenny */ *anode = (node *)Malloc(sizeof(node)); allocnontip(*anode, zeros, endsite); } /* allocnode */ void allocdiscnode(node **anode, long *zeros, unsigned char *zeros2, long endsite) { /* allocate a node */ /* used by pars */ *anode = (node *)Malloc(sizeof(node)); allocdiscnontip(*anode, zeros, zeros2, endsite); } /* allocdiscnontip */ void gnutreenode(node **grbg, node **p, long i, long endsite, long *zeros) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (*grbg != NULL) { *p = *grbg; *grbg = (*grbg)->next; memcpy((*p)->numsteps, zeros, endsite*sizeof(long)); memcpy((*p)->oldnumsteps, zeros, endsite*sizeof(long)); memcpy((*p)->base, zeros, endsite*sizeof(long)); memcpy((*p)->oldbase, zeros, endsite*sizeof(long)); zeronumnuc(*p, endsite); } else allocnode(p, zeros, endsite); (*p)->back = NULL; (*p)->next = NULL; (*p)->tip = false; (*p)->visited = false; (*p)->index = i; (*p)->numdesc = 0; (*p)->sumsteps = 0.0; } /* gnutreenode */ void gnudisctreenode(node **grbg, node **p, long i, long endsite, long *zeros, unsigned char *zeros2) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (*grbg != NULL) { *p = *grbg; *grbg = (*grbg)->next; memcpy((*p)->numsteps, zeros, endsite*sizeof(long)); memcpy((*p)->oldnumsteps, zeros, endsite*sizeof(long)); memcpy((*p)->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy((*p)->olddiscbase, zeros2, endsite*sizeof(unsigned char)); zerodiscnumnuc(*p, endsite); } else allocdiscnode(p, zeros, zeros2, endsite); (*p)->back = NULL; (*p)->next = NULL; (*p)->tip = false; (*p)->visited = false; (*p)->index = i; (*p)->numdesc = 0; (*p)->sumsteps = 0.0; } /* gnudisctreenode */ void setupnode(node *p, long i) { /* initialization of node pointers, variables */ p->next = NULL; p->back = NULL; p->times_in_tree = (double) i * 1.0; p->index = i; p->tip = false; } /* setupnode */ node *pnode(tree *t, node *p) { /* Get the "parent nodelet" of p's node group */ return t->nodep[p->index - 1]; } long count_sibs (node *p) { /* Count the number of nodes in a ring, return the total number of */ /* nodes excluding the one passed into the function (siblings) */ node *q; long return_int = 0; if (p->tip) { printf ("Error: the function count_sibs called on a tip. This is a bug.\n"); exxit (-1); } q = p->next; while (q != p) { if (q == NULL) { printf ("Error: a loop of nodes was not closed.\n"); exxit (-1); } else { return_int++; q = q->next; } } return return_int; } /* count_sibs */ void inittrav (node *p) { /* traverse to set pointers uninitialized on inserting */ long i, num_sibs; node *sib_ptr; if (p == NULL) return; if (p->tip) return; num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_ptr->initialized = false; inittrav(sib_ptr->back); } } /* inittrav */ void commentskipper(char **treestr, long *bracket) { /* skip over comment bracket contents in reading tree */ char c; c = *(*treestr)++; while (c != ']') { if(!(**treestr)) { ajErr("ERROR: Unmatched comment brackets"); exxit(-1); } if(c == '[') { (*bracket)++; commentskipper(treestr, bracket); } c = *(*treestr)++; } (*bracket)--; } /* commentskipper */ long countcomma(char *treestr, long *comma) { /* Modified by Dan F. 11/10/96 */ /* countcomma rewritten so it passes back both lparen+comma to allocate nodep and a pointer to the comma variable. This allows the tree to know how many species exist, and the tips to be placed in the front of the nodep array */ Char c; long lparen = 0; long bracket = 0; char *treeptr = treestr; (*comma) = 0; for (;;){ c = *treeptr++; if (!c) break; if (c == ';') break; if (c == ',') (*comma)++; if (c == '(') lparen++; if (c == '[') { bracket++; commentskipper(&treeptr, &bracket); } } return lparen + (*comma); } /*countcomma*/ long countsemic(char *treestr) { /* Used to determine the number of user trees. Return either a: the number of semicolons in the file outside comments or b: the first integer in the file */ Char c; long return_val, semic = 0; long bracket = 0; char *treeptr = treestr; /* Eat all whitespace */ c = *treeptr++; while ((c == ' ') || (c == '\t') || (c == '\n')) { c = *treeptr++; } /* Then figure out if the first non-white character is a digit; if so, return it */ if (isdigit ((int) c)) { return_val = atoi(&c); } else { /* Loop past all characters, count the number of semicolons outside of comments */ for (;;){ c = *treeptr++; if (!c) break; if (c == ';') semic++; if (c == '[') { bracket++; commentskipper(&treeptr, &bracket); } } return_val = semic; } return return_val; } /* countsemic */ void hookup(node *p, node *q) { /* hook together two nodes */ assert(p != NULL); assert(q != NULL); p->back = q; q->back = p; } /* hookup */ void unhookup(node *p, node *q) { /* unhook two nodes. Not strictly required, but helps check assumptions */ assert(p != NULL); assert(q != NULL); assert(p->back != NULL); assert(q->back != NULL); assert(p->back == q); assert(q->back == p); p->back = NULL; q->back = NULL; } void link_trees(long local_nextnum, long nodenum, long local_nodenum, pointarray nodep) { if(local_nextnum == 0) hookup(nodep[nodenum], nodep[local_nodenum]); else if(local_nextnum == 1) hookup(nodep[nodenum], nodep[local_nodenum]->next); else if(local_nextnum == 2) hookup(nodep[nodenum], nodep[local_nodenum]->next->next); else printf("Error in Link_trees()"); } /* link_trees() */ void allocate_nodep(pointarray *nodep, char *treestr, long *precalc_tips) { /* pre-compute space and allocate memory for nodep */ long numnodes; /* returns number commas & ( */ long numcom = 0; /* returns number commas */ numnodes = countcomma(treestr, &numcom) + 1; *nodep = (pointarray)Malloc(2*numnodes*sizeof(node *)); (*precalc_tips) = numcom + 1; /* this will be used in placing the tip nodes in the front region of nodep. Used for species check? */ } /* allocate_nodep -plc */ void malloc_pheno (node *p, long endsite, long rcategs) { /* Allocate the phenotype arrays; used by dnaml */ long i; p->x = (phenotype)Malloc(endsite*sizeof(ratelike)); p->underflows = (double *)Malloc(endsite * sizeof(double)); for (i = 0; i < endsite; i++) p->x[i] = (ratelike)Malloc(rcategs*sizeof(sitelike)); } /* malloc_pheno */ void malloc_ppheno (node *p,long endsite, long rcategs) { /* Allocate the phenotype arrays; used by proml */ long i; p->protx = (pphenotype)Malloc(endsite*sizeof(pratelike)); p->underflows = (double *)Malloc(endsite*sizeof(double)); for (i = 0; i < endsite; i++) p->protx[i] = (pratelike)Malloc(rcategs*sizeof(psitelike)); } /* malloc_ppheno */ long take_name_from_tree (Char *ch, Char *str, char **treestr) { /* This loop reads a name from treefile and stores it in *str. Returns the length of the name string. str must be at least MAXNCH bytes, but no effort is made to null-terminate the string. Underscores and newlines are converted to spaces. Characters beyond MAXNCH are discarded. */ long name_length = 0; do { if ((*ch) == '_') (*ch) = ' '; if ( name_length < MAXNCH ) str[name_length++] = (*ch); (*ch) = *(*treestr)++; if (*ch == '\n') *ch = ' '; } while ( strchr(":,)[;", *ch) == NULL ); return name_length; } /* take_name_from_tree */ void match_names_to_data (Char *str, pointarray treenode, node **p, long spp) { /* This loop matches names taken from treestr to indexed names in the data file */ boolean found; long i, n; n = 1; do { found = true; for (i = 0; i < nmlngth; i++) { found = (found && ((str[i] == nayme[n - 1][i]) || (((nayme[n - 1][i] == '_') && (str[i] == ' ')) || ((nayme[n - 1][i] == ' ') && (str[i] == '\0'))))); } if (found) *p = treenode[n - 1]; else n++; } while (!(n > spp || found)); if (n > spp) { printf("\n\nERROR: Cannot find species: "); for (i = 0; (str[i] != '\0') && (i < MAXNCH); i++) putchar(str[i]); printf(" in data file\n\n"); exxit(-1); } } /* match_names_to_data */ void addelement(node **p, node *q, Char *ch, long *parens, char **treestr, pointarray treenode, boolean *goteof, boolean *first, pointarray nodep, long *nextnode, long *ntips, boolean *haslengths, node **grbg, initptr initnode, boolean unifok, long maxnodes) { /* Recursive procedure adds nodes to user-defined tree This is the main (new) tree-reading procedure */ node *pfirst; long i, len = 0, nodei = 0; boolean notlast; Char str[MAXNCH+1]; node *r; long furs = 0; if ((*ch) == '(') { (*nextnode)++; /* get ready to use new interior node */ nodei = *nextnode; /* do what needs to be done at bottom */ if ( maxnodes != -1 && nodei > maxnodes) { printf("ERROR in input tree file: Attempting to allocate too\n"); printf("many nodes. This is usually caused by a unifurcation.\n"); printf("To use this tree with this program use Retree to read\n"); printf("and write this tree.\n"); exxit(-1); } /* do what needs to be done at bottom */ (*initnode)(p, grbg, q, len, nodei, ntips, parens, bottom, treenode, nodep, str, ch, treestr); pfirst = (*p); notlast = true; while (notlast) { /* loop through immediate descendants */ furs++; (*initnode)(&(*p)->next, grbg, q, len, nodei, ntips, parens, nonbottom, treenode, nodep, str, ch, treestr); /* ... doing what is done before each */ r = (*p)->next; sgetch(ch, parens, treestr); /* look for next character */ addelement(&(*p)->next->back, (*p)->next, ch, parens, treestr, treenode, goteof, first, nodep, nextnode, ntips, haslengths, grbg, initnode, unifok, maxnodes); (*initnode)(&r, grbg, q, len, nodei, ntips, parens, hslength, treenode, nodep, str, ch, treestr); /* do what is done after each about length */ pfirst->numdesc++; /* increment number of descendants */ *p = r; /* make r point back to p */ if ((*ch) == ')') { notlast = false; do { sgetch(ch, parens, treestr); } while ((*ch) != ',' && (*ch) != ')' && (*ch) != '[' && (*ch) != ';' && (*ch) != ':'); } } if ( furs <= 1 && !unifok ) { printf("ERROR in input tree file: A Unifurcation was detetected.\n"); printf("To use this tree with this program use retree to read and"); printf(" write this tree\n"); exxit(-1); } (*p)->next = pfirst; (*p) = pfirst; } else if ((*ch) != ')') { /* if it's a species name */ for (i = 0; i < MAXNCH+1; i++) /* fill string with nulls */ str[i] = '\0'; len = take_name_from_tree (ch, str, treestr); /* get the name */ if ((*ch) == ')') (*parens)--; /* decrement count of open parentheses */ (*initnode)(p, grbg, q, len, nodei, ntips, parens, tip, treenode, nodep, str, ch, treestr); /* do what needs to be done at a tip */ } else sgetch(ch, parens, treestr); if (q != NULL) hookup(q, (*p)); /* now hook up */ (*initnode)(p, grbg, q, len, nodei, ntips, parens, iter, treenode, nodep, str, ch, treestr); /* do what needs to be done to variable iter */ if ((*ch) == ':') (*initnode)(p, grbg, q, len, nodei, ntips, parens, length, treenode, nodep, str, ch, treestr); /* do what needs to be done with length */ else if ((*ch) != ';' && (*ch) != '[') (*initnode)(p, grbg, q, len, nodei, ntips, parens, hsnolength, treenode, nodep, str, ch, treestr); /* ... or what needs to be done when no length */ if ((*ch) == '[') (*initnode)(p, grbg, q, len, nodei, ntips, parens, treewt, treenode, nodep, str, ch, treestr); /* ... for processing a tree weight */ else if ((*ch) == ';') /* ... and at end of tree */ (*initnode)(p, grbg, q, len, nodei, ntips, parens, unittrwt, treenode, nodep, str, ch, treestr); } /* addelement */ void treeread (char** treestr, node **root, pointarray treenode, boolean *goteof, boolean *first, pointarray nodep, long *nextnode, boolean *haslengths, node **grbg, initptr initnode, boolean unifok, long maxnodes) { /* read in user-defined tree and set it up */ /* Eats everything up to the first open paren, then * calls the recursive function addelement, which builds the * tree and calls back to initnode. */ char ch; long parens = 0; long ntips = 0; (*goteof) = false; (*nextnode) = spp; if (!**treestr) { (*goteof) = true; return; } sgetch(&ch, &parens, treestr); while (ch != '(') { /* Eat everything in the file (i.e. digits, tabs) until you encounter an open-paren */ sgetch(&ch, &parens, treestr); } if (haslengths != NULL) (*haslengths) = true; addelement(root, NULL, &ch, &parens, treestr, treenode, goteof, first, nodep, nextnode, &ntips, haslengths, grbg, initnode, unifok, maxnodes); if (first) (*first) = false; if (parens != 0) { printf("\n\nERROR in tree file: unmatched parentheses\n\n"); exxit(-1); } } /* treeread */ void addelement2(node *q, Char *ch, long *parens, char **treestr, pointarray treenode, boolean lngths, double *trweight, boolean *goteof, long *nextnode, long *ntips, long no_species, boolean *haslengths, boolean unifok, long maxnodes) { /* recursive procedure adds nodes to user-defined tree -- old-style bifurcating-only version */ node *pfirst = NULL, *p; long i, len, current_loop_index; boolean notlast, minusread; Char str[MAXNCH]; double valyew, divisor; long furs = 0; if ((*ch) == '(') { current_loop_index = (*nextnode) + spp; (*nextnode)++; if ( maxnodes != -1 && current_loop_index > maxnodes) { printf("ERROR in intree file: Attempting to allocate too many nodes\n"); printf("This is usually caused by a unifurcation. To use this\n"); printf("intree with this program use retree to read and write\n"); printf("this tree.\n"); exxit(-1); } /* This is an assignment of an interior node */ p = treenode[current_loop_index]; pfirst = p; notlast = true; while (notlast) { furs++; /* This while loop goes through a circle (triad for bifurcations) of nodes */ p = p->next; /* added to ensure that non base nodes in loops have indices */ p->index = current_loop_index + 1; sgetch(ch, parens, treestr); addelement2(p, ch, parens, treestr, treenode, lngths, trweight, goteof, nextnode, ntips, no_species, haslengths, unifok, maxnodes); if ((*ch) == ')') { notlast = false; do { sgetch(ch, parens, treestr); } while ((*ch) != ',' && (*ch) != ')' && (*ch) != '[' && (*ch) != ';' && (*ch) != ':'); } } if ( furs <= 1 && !unifok ) { printf("ERROR in intree file: A Unifurcation was detected.\n"); printf("To use this intree with this program use retree to read and"); printf(" write this tree\n"); exxit(-1); } } else if ((*ch) != ')') { for (i = 0; i < MAXNCH; i++) str[i] = '\0'; len = take_name_from_tree (ch, str, treestr); match_names_to_data (str, treenode, &p, spp); pfirst = p; if ((*ch) == ')') (*parens)--; (*ntips)++; strncpy (p->nayme, str, len); } else sgetch(ch, parens, treestr); if ((*ch) == '[') { /* getting tree weight from last comment field */ if (**treestr) { *trweight = strtod(*treestr, treestr); if(trweight) { sgetch(ch, parens, treestr); if (*ch != ']') { ajErr("ERROR: Missing right square bracket"); exxit(-1); } else { sgetch(ch, parens, treestr); if (*ch != ';') { ajErr("ERROR: Missing semicolon after square brackets"); exxit(-1); } } } else { ajErr("ERROR: Expecting tree weight in last comment field"); exxit(-1); } } } else if ((*ch) == ';') { (*trweight) = 1.0 ; /* the ajWarn should be for multiple trees as input */ /* ajWarn("WARNING: tree weight set to 1.0");*/ } else if (haslengths != NULL) (*haslengths) = ((*haslengths) && q == NULL); if (q != NULL) hookup(q, pfirst); if ((*ch) == ':') { processlength(&valyew, &divisor, ch, &minusread, treestr, parens); printf("processlength valyew:%f divisor:%f q: %p\n", valyew, divisor, q); if (q != NULL) { if (!minusread) q->oldlen = valyew / divisor; else q->oldlen = 0.0; if (lngths) { q->v = valyew / divisor; q->back->v = q->v; q->iter = false; q->back->iter = false; } } } } /* addelement2 */ void treeread2 (char **treestr, node **root, pointarray treenode, boolean lngths, double *trweight, boolean *goteof, boolean *haslengths, long *no_species, boolean unifok, long maxnodes) { /* read in user-defined tree and set it up -- old-style bifurcating-only version */ char ch; long parens = 0; long ntips = 0; long nextnode; (*goteof) = false; nextnode = 0; if (!**treestr) { (*goteof) = true; return; } sgetch(&ch, &parens, treestr); while (ch != '(') { /* Eat everything in the file (i.e. digits, tabs) until you encounter an open-paren */ sgetch(&ch, &parens, treestr); } addelement2(NULL, &ch, &parens, treestr, treenode, lngths, trweight, goteof, &nextnode, &ntips, (*no_species), haslengths, unifok, maxnodes); (*root) = treenode[*no_species]; (*root)->oldlen = 0.0; if (parens != 0) { ajErr("ERROR in tree file: unmatched parentheses"); exxit(-1); } } /* treeread2 */ void exxit(int exitcode) { #ifdef WIN32 if (exitcode == 0) #endif if (exitcode == 0) ajExit(); else ajExitBad(); #ifdef WIN32 else { puts ("Hit Enter or Return to close program."); puts(" You may have to hit Enter or Return twice."); getchar (); getchar (); phyRestoreConsoleAttributes(); exit (exitcode); } #endif } /* exxit */ void unroot(tree *t, long nonodes) { /* used by fitch, restml and contml */ if (t->start->back == NULL) { if (t->start->next->back->tip) t->start = t->start->next->next->back; else t->start = t->start->next->back; } if (t->start->next->back == NULL) { if (t->start->back->tip) t->start = t->start->next->next->back; else t->start = t->start->back; } if (t->start->next->next->back == NULL) { if (t->start->back->tip) t->start = t->start->next->back; else t->start = t->start->back; } unroot_r(t->start,t->nodep,nonodes); unroot_r(t->start->back, t->nodep, nonodes); } void unroot_here(node* root, node** nodep, long nonodes) { node* tmpnode; double newl; /* used by unroot */ /* assumes bifurcation this is ok in the programs that use it */ newl = root->next->oldlen + root->next->next->oldlen; root->next->back->oldlen = newl; root->next->next->back->oldlen = newl; newl = root->next->v + root->next->next->v; root->next->back->v = newl; root->next->next->back->v = newl; root->next->back->back=root->next->next->back; root->next->next->back->back = root->next->back; while ( root->index != nonodes ) { tmpnode = nodep[ root->index ]; nodep[root->index] = root; root->index++; root->next->index++; root->next->next->index++; nodep[root->index - 2] = tmpnode; tmpnode->index--; tmpnode->next->index--; tmpnode->next->next->index--; } } void unroot_r(node* p, node** nodep, long nonodes) { /* used by unroot */ node *q; if ( p->tip) return; q = p->next; while ( q != p ) { if (q->back == NULL) unroot_here(q, nodep, nonodes); else unroot_r(q->back, nodep, nonodes); q = q->next; } } void clear_connections(tree *t, long nonodes) { long i; node *p; for ( i = 0 ; i < nonodes ; i++) { p = t->nodep[i]; if (p != NULL) { p->back = NULL; p->v = 0; for (p = p->next; p && p != t->nodep[i]; p = p->next) { p->next->back = NULL; p->next->v = 0; } } } } #ifdef WIN32 void phySaveConsoleAttributes() { if ( GetConsoleScreenBufferInfo(hConsoleOutput, &savecsbi) ) savecsbi_valid = true; } /* PhySaveConsoleAttributes */ void phySetConsoleAttributes() { hConsoleOutput = GetStdHandle(STD_OUTPUT_HANDLE); if ( hConsoleOutput == INVALID_HANDLE_VALUE ) hConsoleOutput = NULL; if ( hConsoleOutput != NULL ) { phySaveConsoleAttributes(); SetConsoleTextAttribute(hConsoleOutput, BACKGROUND_GREEN | BACKGROUND_BLUE | BACKGROUND_INTENSITY); } } /* phySetConsoleAttributes */ void phyRestoreConsoleAttributes() { COORD coordScreen = { 0, 0 }; DWORD cCharsWritten; DWORD dwConSize; printf("Press enter to quit.\n"); fflush(stdout); getchar(); if ( savecsbi_valid ) { dwConSize = savecsbi.dwSize.X * savecsbi.dwSize.Y; SetConsoleTextAttribute(hConsoleOutput, savecsbi.wAttributes); FillConsoleOutputAttribute( hConsoleOutput, savecsbi.wAttributes, dwConSize, coordScreen, &cCharsWritten ); } } /* phyRestoreConsoleAttributes */ void phyFillScreenColor() { COORD coordScreen = { 0, 0 }; DWORD cCharsWritten; CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */ DWORD dwConSize; if ( GetConsoleScreenBufferInfo( hConsoleOutput, &csbi ) ) { dwConSize = csbi.dwSize.X * csbi.dwSize.Y; FillConsoleOutputAttribute( hConsoleOutput, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten ); } } /* PhyFillScreenColor */ void phyClearScreen() { COORD coordScreen = { 0, 0 }; /* here's where we'll home the cursor */ DWORD cCharsWritten; CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */ DWORD dwConSize; /* number of character cells in the current buffer */ /* get the number of character cells in the current buffer */ GetConsoleScreenBufferInfo( hConsoleOutput, &csbi ); dwConSize = csbi.dwSize.X * csbi.dwSize.Y; /* fill the entire screen with blanks */ FillConsoleOutputCharacter( hConsoleOutput, (TCHAR) ' ', dwConSize, coordScreen, &cCharsWritten ); /* get the current text attribute */ GetConsoleScreenBufferInfo( hConsoleOutput, &csbi ); /* now set the buffer's attributes accordingly */ FillConsoleOutputAttribute( hConsoleOutput, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten ); /* put the cursor at (0, 0) */ SetConsoleCursorPosition( hConsoleOutput, coordScreen ); return; } /* PhyClearScreen */ #endif /* WIN32 */ /* These functions are temporarily used for translating the fixed-width * space-padded nayme array to an array of null-terminated char *. */ char **stringnames_new(void) { /* Copy nayme array to null terminated strings and return array of char *. * Spaces are stripped from end of naym's. * Returned array size is spp+1; last element is NULL. */ char **names; char *ch; long /*len,*/ i; names = (char **)Malloc((spp+1) * sizeof(char *)); for ( i = 0; i < spp; i++ ) { /*len = strlen(nayme[i]);*/ names[i] = (char *)Malloc((MAXNCH+1) * sizeof(char)); strncpy(names[i], nayme[i], MAXNCH); names[i][MAXNCH] = '\0'; /* Strip trailing spaces */ for ( ch = names[i] + MAXNCH - 1; *ch == ' ' || *ch == '\0'; ch-- ) *ch = '\0'; } names[spp] = NULL; return names; } void stringnames_delete(char **names) { /* Free a string array returned by stringnames_new() */ long i; assert( names != NULL ); for ( i = 0; i < spp; i++ ) { assert( names[i] != NULL ); free(names[i]); } free(names); } int fieldwidth_double(double val, unsigned int precision) { /* Printf a double to a temporary buffer with specified precision using %g * and return its length. Precision must not be greater than 999,999 */ char format[10]; char buf[0x200]; /* TODO: What's the largest possible? */ if (precision > 999999) abort(); sprintf(format, "%%.%uf", precision); /* %.Nf */ /* snprintf() would be better, but is it avaliable on all systems? */ return sprintf(buf, format, val); } void output_matrix_d(FILE *fp, double **matrix, unsigned long rows, unsigned long cols, char **row_head, char **col_head, int flags) { /* * Print a matrix of double to file. Headings are given in row_head and * col_head, either of which may be NULL to indicate that headings should not * be printed. Otherwise, they must be null-terminated arrays of pointers to * null-terminalted character arrays. * * The macro OUTPUT_PRECISION defines the number of significant figures to * print, and OUTPUT_TEXTWIDTH defines the maximum length of each line. * * Optional formatting is specified by flags argument, using macros MAT_* * defined in phylip.h. */ unsigned *colwidth; /* [0..spp-1] min width of each column */ unsigned headwidth; /* min width of row header column */ unsigned long linelen; /* length of current printed line */ unsigned fw; unsigned long row, col; unsigned long i; unsigned long cstart, cend; unsigned long textwidth = OUTPUT_TEXTWIDTH; const unsigned int gutter = 1; boolean do_block; boolean lower_triangle; boolean border; boolean output_cols; boolean pad_row_head; if ( flags & MAT_NOHEAD ) col_head = NULL; if ( flags & MAT_NOBREAK ) textwidth = 0; do_block = (flags & MAT_BLOCK) && (textwidth > 0); lower_triangle = flags & MAT_LOWER; border = flags & MAT_BORDER; output_cols = flags & MAT_PCOLS; pad_row_head = flags & MAT_PADHEAD; /* Determine minimal width for row headers, if given */ headwidth = 0; if ( row_head != NULL ) { for (row = 0; row < rows; row++) { fw = strlen(row_head[row]); if ( headwidth < fw ) headwidth = fw; } } /* Enforce minimum of 10 ch for machine-readable output */ if ( (pad_row_head) && (headwidth < 10) ) headwidth = 10; /* Determine minimal width for each matrix col */ colwidth = (unsigned int *)Malloc(spp * sizeof(int)); for (col = 0; col < cols; col++) { if ( col_head != NULL ) colwidth[col] = strlen(col_head[col]); else colwidth[col] = 0; for (row = 0; row < rows; row++) { fw = fieldwidth_double(matrix[row][col], PRECISION); if ( colwidth[col] < fw ) colwidth[col] = fw; } } /*** Print the matrix ***/ /* Number of columns if requested */ if ( output_cols ) { fprintf(fp, "%5lu\n", cols); } /* Omit last column for lower triangle */ if ( lower_triangle ) cols--; /* Blocks */ cstart = cend = 0; while ( cend != cols ) { if ( do_block ) { linelen = headwidth; for ( col = cstart; col < cols; col++ ) { if ( linelen + colwidth[col] + gutter > textwidth ) { break; } linelen += colwidth[col] + gutter; } cend = col; /* Always print at least one, regardless of line len */ if ( cend == cstart ) cend++; } else { cend = cols; } /* Column headers */ if ( col_head != NULL ) { /* corner space */ for ( i = 0; i < headwidth; i++ ) putc(' ', fp); if ( border ) { for ( i = 0; i < gutter+1; i++ ) putc(' ', fp); } /* Names */ for ( col = cstart; col < cend; col++ ) { for ( i = 0; i < gutter; i++ ) putc(' ', fp); /* right justify */ fw = strlen(col_head[col]); for ( i = 0; i < colwidth[col] - fw; i++ ) putc(' ', fp); fputs(col_head[col], fp); } putc('\n', fp); } /* Top border */ if ( border ) { for ( i = 0; i < headwidth + gutter; i++ ) putc(' ', fp); putc('\\', fp); for ( col = cstart; col < cend; col++ ) { for ( i = 0; i < colwidth[col] + gutter; i++ ) putc('-', fp); } putc('\n', fp); } /* Rows */ for (row = 0; row < rows; row++) { /* Row header, if given */ if ( row_head != NULL ) { /* right-justify for non-machine-readable */ if ( !pad_row_head ) { for ( i = strlen(row_head[row]); i < headwidth; i++ ) putc(' ', fp); } fputs(row_head[row], fp); /* left-justify for machine-readable */ if ( pad_row_head ) { for ( i = strlen(row_head[row]); i < headwidth; i++ ) putc(' ', fp); } } linelen = headwidth; /* Left border */ if ( border ) { for ( i = 0; i < gutter; i++ ) putc(' ', fp); putc('|', fp); linelen += 2; } /* Row data */ for (col = cstart; col < cend; col++) { /* cols */ /* Stop after col == row for lower triangle */ if ( lower_triangle && col >= row ) break; /* Break line if going over max text width */ if ( !do_block && textwidth > 0 ) { if ( linelen + colwidth[col] > textwidth ) { putc('\n', fp); linelen = 0; } linelen += colwidth[col] + gutter; } for ( i = 0; i < gutter; i++ ) putc(' ', fp); /* Print the datum */ fprintf(fp, "%*.6f", colwidth[col], matrix[row][col]); } putc('\n', fp); } /* End of row */ if (col_head != NULL) putc('\n', fp); /* blank line */ cstart = cend; } /* End of block */ free(colwidth); } /* output_matrix_d */ void emboss_printtree(node *p, char* title) { int i; int ilen; node* root = p; printf("\n%s\n", title); ilen = strlen(title); for (i=0;i < ilen; i++) printf("="); printf("\n"); emboss_printtreenode(p, root); return; } static void emboss_printtreenode(node *p, node* root) { int i; node* q; static int margin=0; char name[256]; int ended = false; double x; char spaces[256]; for(i=0;itip) /* named node */ { strncpy(name, nayme[p->index - 1], nmlngth); for (i=nmlngth;i;i--) { if (name[i-1] == ' ') { name[i-1] = '_'; } else { if (!ended) { name[i] = '\0'; ended = true; } } } printf("'%s'\n", name); if (p->index) printf("%s : index:%ld\n",spaces, p->index); if (p->tip) printf("%s : tip:%s\n", spaces, (p->tip ? "true" : "false")); if (p->initialized) printf("%s : initialized:%s\n", spaces, (p->initialized ? "true" : "false")); if (p->visited) printf("%s : visited:%s\n", spaces, (p->visited ? "true" : "false")); if (p->numdesc) printf("%s : numdesc:%ld\n",spaces, p->numdesc); if (p->times_in_tree) printf("%s : times_in_tree:%f\n",spaces, p->times_in_tree); if (p->sumsteps) printf("%s : sumsteps:%f\n",spaces, p->sumsteps); printf("\n"); } else /* link to next nodes - loop until we get back here */ { printf("(\n"); /* numdesc: number of immediate descendants */ if (p->index) printf("%s : index:%ld\n",spaces, p->index); if (p->tip) printf("%s : tip:%s\n", spaces, (p->tip ? "true" : "false")); if (p->initialized) printf("%s : initialized:%s\n", spaces, (p->initialized ? "true" : "false")); if (p->visited) printf("%s : visited:%s\n", spaces, (p->visited ? "true" : "false")); if (p->numdesc) printf("%s : numdesc:%ld\n",spaces, p->numdesc); if (p->times_in_tree) printf("%s : times_in_tree:%f\n",spaces, p->times_in_tree); if (p->sumsteps) printf("%s : sumsteps:%f\n",spaces, p->sumsteps); printf("\n"); margin += 2; q=p->next; while (q != p) { emboss_printtreenode(q->back, root); q = q->next; if (q == p) break; printf("%s ,\n\n",spaces); } margin -= 2; printf("%s)\n", spaces); } if (p != root) { x = p->v; printf("%s+ len:%.5f\n\n", spaces, x); return; } printf(";\n\n"); margin = 0; } PHYLIPNEW-3.69.650/src/dollop.c0000664000175000017500000006200711305225544012520 00000000000000 #include "phylip.h" #include "disc.h" #include "dollo.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxtrees 100 /* maximum number of tied trees stored */ AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void allocrest(void); void doinit(void); void inputoptions(void); void doinput(void); void dollop_count(node *, steptr, steptr); void preorder(node *, steptr, steptr, long, boolean, long, bitptr, pointptr); void evaluate(node *); void savetree(void); void dollop_addtree(long *); void tryadd(node *, node **, node **); void addpreorder(node *, node *, node *); void tryrearr(node *, node **, boolean *); void repreorder(node *, node **, boolean *); void rearrange(node **); void describe(void); void initdollopnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void maketree(void); void reallocchars(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); /* function prototypes */ #endif Char infilename[FNMLNGTH], intreename[FNMLNGTH]; Char weightfilename[FNMLNGTH], ancfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; node *root; long col, msets, ith, j, l, njumble, jumb, numtrees; long inseed, inseed0; boolean jumble, usertree, weights, ancvar, questions, dollo, trout, progress, treeprint, stepbox, ancseq, mulsets, firstset, justwts; boolean *ancone, *anczero, *ancone0, *anczero0; pointptr treenode; /* pointers to all nodes in tree */ double threshold; double *threshwt; longer seed; long *enterorder; double **fsteps; steptr numsteps; bestelm *bestrees; Char *guess; gbit *garbage; char *progname; /* Variables for treeread */ boolean goteof, firsttree, haslengths, phirst; pointarray nodep; node *grbg; long *zeros; /* Local variables for maketree, propagated globally for C version: */ long minwhich; double like, bestyet, bestlike, bstlike2, minsteps; boolean lastrearr; double nsteps[maxuser]; node *there; long fullset; long shimotrees; bitptr zeroanc, oneanc; long *place; Char ch; boolean *names; steptr numsone, numszero; bitptr steps; void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs=0; ajint numwts=0; AjPStr method = NULL; ancvar = false; dollo = true; jumble = false; njumble = 1; trout = true; usertree = false; goteof = false; weights = false; justwts = false; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); while (phylostates[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; } phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "d")) dollo = true; else dollo = false; if(!usertree) { njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; threshold = ajAcdGetFloat("threshold"); printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } printf("\nDollo and polymorphism parsimony algorithm, version %s\n\n", VERSION); fprintf(outfile,"\nDollo and polymorphism parsimony algorithm,"); fprintf(outfile," version %s\n\n",VERSION); } /* emboss_getoptions */ void reallocchars(void) { long i; free(extras); free(weight); free(threshwt); free(numsteps); free(ancone); free(anczero); free(ancone0); free(anczero0); free(numsone); free(numszero); free(guess); if (usertree) { for (i = 1; i <= maxuser; i++){ free(fsteps); fsteps[i - 1] = (double *)Malloc(chars*sizeof(double)); } } extras = (steptr)Malloc(chars*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); numsteps = (steptr)Malloc(chars*sizeof(long)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); numsone = (steptr)Malloc(chars*sizeof(long)); numszero = (steptr)Malloc(chars*sizeof(long)); guess = (Char *)Malloc(chars*sizeof(Char)); } void allocrest(void) { long i; extras = (steptr)Malloc(chars*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); if (usertree) { fsteps = (double **)Malloc(maxuser*sizeof(double *)); for (i = 1; i <= maxuser; i++) fsteps[i - 1] = (double *)Malloc(chars*sizeof(double)); } bestrees = (bestelm *) Malloc(maxtrees*sizeof(bestelm)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1].btree = (long *)Malloc(nonodes*sizeof(long)); numsteps = (steptr)Malloc(chars*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); place = (long *)Malloc(nonodes*sizeof(long)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); numsone = (steptr)Malloc(chars*sizeof(long)); numszero = (steptr)Malloc(chars*sizeof(long)); guess = (Char *)Malloc(chars*sizeof(Char)); zeroanc = (bitptr)Malloc(words*sizeof(long)); oneanc = (bitptr)Malloc(words*sizeof(long)); steps = (bitptr)Malloc(words*sizeof(long)); } /* allocrest */ void doinit(void) { /* initializes variables */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); words = chars / bits + 1; // if (printdata) // fprintf(outfile, "%2ld species, %3ld characters\n\n", spp, chars); alloctree(&treenode); setuptree(treenode); allocrest(); } /* doinit */ void inputoptions(void) { /* input the information on the options */ long i; if(justwts){ if(firstset){ if (ancvar) { inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); } } for (i = 0; i < (chars); i++) weight[i] = 1; inputweightsstr(phyloweights->Str[0], chars, weight, &weights); } else { if (!firstset) { samenumspstate(phylostates[ith-1], &chars, ith); reallocchars(); } for (i = 0; i < (chars); i++) weight[i] = 1; if (ancvar) inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); if (weights) inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); } if ((weights || justwts) && printdata) printweights(outfile, 0, chars, weight, "Characters"); for (i = 0; i < (chars); i++) { if (!ancvar) { anczero[i] = true; ancone[i] = false; } else { anczero[i] = anczero0[i]; ancone[i] = ancone0[i]; } } if (ancvar && printdata) printancestors(outfile, anczero, ancone); questions = false; for (i = 0; i < (chars); i++) { questions = (questions || (ancone[i] && anczero[i])); threshwt[i] = threshold * weight[i]; } } /* inputoptions */ void doinput(void) { /* reads the input data */ inputoptions(); if(!justwts || firstset) disc_inputdata(phylostates[ith-1], treenode, dollo, printdata, outfile); } /* doinput */ void dollop_count(node *p, steptr numsone, steptr numszero) { /* counts the number of steps in a fork of the tree. The program spends much of its time in this PROCEDURE */ long i, j, l; if (dollo) { for (i = 0; i < (words); i++) steps[i] = (treenode[p->back->index - 1]->stateone[i] & p->statezero[i] & zeroanc[i]) | (treenode[p->back->index - 1]->statezero[i] & p->stateone[i] & fullset & (~zeroanc[i])); } else { for (i = 0; i < (words); i++) steps[i] = treenode[p->back->index - 1]->stateone[i] & treenode[p->back->index - 1]->statezero[i] & p->stateone[i] & p->statezero[i]; } j = 1; l = 0; for (i = 0; i < (chars); i++) { l++; if (l > bits) { l = 1; j++; } if (((1L << l) & steps[j - 1]) != 0) { assert(j <= words); /* checking array indexing */ if (((1L << l) & zeroanc[j - 1]) != 0) numszero[i] += weight[i]; else numsone[i] += weight[i]; } } } /* dollop_count */ void preorder(node *p, steptr numsone, steptr numszero, long words, boolean dollo, long fullset, bitptr zeroanc, pointptr treenode) { /* go back up tree setting up and counting interior node states */ if (!p->tip) { correct(p, fullset, dollo, zeroanc, treenode); preorder(p->next->back, numsone,numszero, words, dollo, fullset, zeroanc, treenode); preorder(p->next->next->back, numsone,numszero, words, dollo, fullset, zeroanc, treenode); } if (p->back != NULL) dollop_count(p, numsone,numszero); } /* preorder */ void evaluate(node *r) { /* Determines the number of losses or polymorphisms needed for a tree. This is the minimum number needed to evolve chars on this tree */ long i, stepnum, smaller; double sum, term; sum = 0.0; for (i = 0; i < (chars); i++) { numszero[i] = 0; numsone[i] = 0; } for (i = 0; i < (words); i++) zeroanc[i] = fullset; postorder(r); preorder(r, numsone, numszero, words, dollo, fullset, zeroanc, treenode); for (i = 0; i < (words); i++) zeroanc[i] = 0; postorder(r); preorder(r, numsone, numszero, words, dollo, fullset, zeroanc, treenode); for (i = 0; i < (chars); i++) { smaller = spp * weight[i]; numsteps[i] = smaller; if (anczero[i]) { numsteps[i] = numszero[i]; smaller = numszero[i]; } if (ancone[i] && numsone[i] < smaller) numsteps[i] = numsone[i]; stepnum = numsteps[i] + extras[i]; if (stepnum <= threshwt[i]) term = stepnum; else term = threshwt[i]; sum += term; if (usertree && which <= maxuser) fsteps[which - 1][i] = term; guess[i] = '?'; if (!ancone[i] || (anczero[i] && numszero[i] < numsone[i])) guess[i] = '0'; else if (!anczero[i] || (ancone[i] && numsone[i] < numszero[i])) guess[i] = '1'; } if (usertree && which <= maxuser) { nsteps[which - 1] = sum; if (which == 1) { minwhich = 1; minsteps = sum; } else if (sum < minsteps) { minwhich = which; minsteps = sum; } } like = -sum; } /* evaluate */ void savetree() { /* record in place where each species has to be added to reconstruct this tree */ long i, j; node *p; boolean done; for (i = 0; i < (nonodes); i++) place[i] = 0; place[root->index - 1] = 1; for (i = 1; i <= (spp); i++) { p = treenode[i - 1]; while (place[p->index - 1] == 0) { place[p->index - 1] = i; p = p->back; if (p != NULL) p = treenode[p->index - 1]; } if (i > 1) { place[i - 1] = place[p->index - 1]; j = place[p->index - 1]; done = false; while (!done) { place[p->index - 1] = spp + i - 1; p = treenode[p->index - 1]; p = p->back; done = (p == NULL); if (!done) done = (place[p->index - 1] != j); } } } } /* savetree */ void dollop_addtree(long *pos) { /*puts tree from ARRAY place in its proper position in ARRAY bestrees */ long i; for (i =nextree - 1; i >= (*pos); i--) { memcpy(bestrees[i].btree, bestrees[i - 1].btree, spp*sizeof(long)); bestrees[i].gloreange = bestrees[i - 1].gloreange; bestrees[i].locreange = bestrees[i - 1].locreange; bestrees[i].collapse = bestrees[i - 1].collapse; } for (i = 0; i < (spp); i++) bestrees[(*pos) - 1].btree[i] = place[i]; nextree++; } /* dollop_addtree */ void tryadd(node *p, node **item, node **nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ long pos; boolean found; add(p, *item, *nufork, &root, treenode); evaluate(root); if (lastrearr) { if (like >= bstlike2) { savetree(); if (like > bstlike2) { bestlike = bstlike2 = like; pos = 1; nextree = 1; dollop_addtree(&pos); } else { pos = 0; findtree(&found, &pos, nextree, place, bestrees); /* findtree calls for a bestelm* but is getting */ /* a long**, LM */ if (!found) { if (nextree <= maxtrees) dollop_addtree(&pos); } } } } if (like > bestyet) { bestyet = like; there = p; } re_move(item, nufork, &root, treenode); } /* tryadd */ void addpreorder(node *p, node *item_, node *nufork_) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ node *item= item_; node *nufork = nufork_; if (p == NULL) return; tryadd(p, &item,&nufork); if (!p->tip) { addpreorder(p->next->back, item, nufork); addpreorder(p->next->next->back, item, nufork); } } /* addpreorder */ void tryrearr(node *p, node **r, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success := TRUE and keeps the new tree. otherwise, restores the old tree */ node *frombelow, *whereto, *forknode; double oldlike; if (p->back == NULL) return; forknode = treenode[p->back->index - 1]; if (forknode->back == NULL) return; oldlike = bestyet; if (p->back->next->next == forknode) frombelow = forknode->next->next->back; else frombelow = forknode->next->back; whereto = forknode->back; re_move(&p, &forknode, &root, treenode); add(whereto, p, forknode, &root, treenode); evaluate(*r); if (oldlike - like < LIKE_EPSILON) { re_move(&p, &forknode, &root, treenode); add(frombelow, p, forknode, &root, treenode); } else { (*success) = true; bestyet = like; } } /* tryrearr */ void repreorder(node *p, node **r, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ if (p == NULL) return; tryrearr(p, r,success); if (!p->tip) { repreorder(p->next->back, r,success); repreorder(p->next->next->back, r,success); } } /* repreorder */ void rearrange(node **r_) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ node **r = r_; boolean success = true; while (success) { success = false; repreorder(*r, r,&success); } } /* rearrange */ void describe() { /* prints ancestors, steps and table of numbers of steps in each character */ if (treeprint) fprintf(outfile, "\nrequires a total of %10.3f\n", -like); if (stepbox) { putc('\n', outfile); writesteps(weights, dollo, numsteps); } if (questions) guesstates(guess); if (ancseq) { hypstates(fullset, dollo, guess, treenode, root, garbage, zeroanc, oneanc); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout(root, nextree, &col, root); } } /* describe */ void initdollopnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ /* LM 7/27 I added this function and the commented lines around */ /* treeread() to get the program running, but all 4 move programs*/ /* are improperly integrated into the v4.0 support files. As is */ /* this is a patchwork function */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnutreenode(grbg, p, nodei, chars, zeros); treenode[nodei - 1] = *p; break; case nonbottom: gnutreenode(grbg, p, nodei, chars, zeros); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: /* if there is a length, read it and discard value */ processlength(&valyew, &divisor, ch, &minusread, treestr, parens); break; default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/ break; } } /* initdollopnode */ void maketree() { /* constructs a binary tree from the pointers in treenode. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j, nextnode; double gotlike; node *item, *nufork, *dummy, *p; char *treestr; fullset = (1L << (bits + 1)) - (1L << 1); if (!usertree) { for (i = 1; i <= (spp); i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); root = treenode[enterorder[0] - 1]; add(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1], treenode[spp], &root, treenode); if (progress) { printf("Adding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = false; for (i = 3; i <= (spp); i++) { bestyet = -350.0 * spp * chars; item = treenode[enterorder[i - 1] - 1]; nufork = treenode[spp + i - 2]; addpreorder(root, item, nufork); add(there, item, nufork, &root, treenode); like = bestyet; rearrange(&root); if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = (i == spp); if (lastrearr) { if (progress) { printf("\nDoing global rearrangements\n"); printf(" !"); for (j = 1; j <= (nonodes); j++) if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } bestlike = bestyet; if (jumb == 1) { bstlike2 = bestlike; nextree = 1; } do { if (progress) printf(" "); gotlike = bestlike; for (j = 0; j < (nonodes); j++) { bestyet = - 350.0 * spp * chars; item = treenode[j]; if (item != root) { nufork = treenode[j]->back; re_move(&item, &nufork, &root, treenode); there = root; addpreorder(root, item, nufork); add(there, item, nufork, &root, treenode); } if (progress) { if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); } } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } } while (bestlike > gotlike); } } if (progress) putchar('\n'); for (i = spp - 1; i >= 1; i--) re_move(&treenode[i], &dummy, &root, treenode); if (jumb == njumble) { if (treeprint) { putc('\n', outfile); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i <= (nextree - 2); i++) { root = treenode[0]; add(treenode[0], treenode[1], treenode[spp], &root, treenode); for (j = 3; j <= spp; j++) { add(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1], treenode[spp + j - 2], &root, treenode);} evaluate(root); printree(1.0, treeprint, root); describe(); for (j = 1; j < (spp); j++) re_move(&treenode[j], &dummy, &root, treenode); } } } else { if (numtrees > 2) { emboss_initseed(inseed, &inseed0, seed); printf("\n"); } if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n"); } names = (boolean *)Malloc(spp*sizeof(boolean)); which = 1; firsttree = true; /**/ nodep = NULL; /**/ nextnode = 0; /**/ haslengths = 0; /**/ phirst = 0; /**/ zeros = (long *)Malloc(chars*sizeof(long)); /**/ for (i = 0; i < chars; i++) /**/ zeros[i] = 0; /**/ while (which <= numtrees) { treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdollopnode,false,nonodes); for (i = spp; i < (nonodes); i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->stateone = (bitptr)Malloc(words*sizeof(long)); p->statezero = (bitptr)Malloc(words*sizeof(long)); p = p->next; } } /* debug: see comment at initdollopnode() */ if (treeprint) fprintf(outfile, "\n\n"); evaluate(root); printree(1.0, treeprint, root); describe(); which++; } FClose(intree); fprintf(outfile, "\n\n"); if (numtrees > 1 && chars > 1) standev(numtrees, minwhich, minsteps, nsteps, fsteps, seed); free(names); } if (jumb == njumble) { if (progress) { printf("Output written to file \"%s\"\n\n", outfilename); if (trout) printf("Trees also written onto file \"%s\"\n\n", outtreename); } if (ancseq) freegarbage(&garbage); } } /* maketree */ int main(int argc, Char *argv[]) { /* Dollo or polymorphism parsimony by uphill search */ #ifdef MAC argc = 1; /* macsetup("Dollop",""); */ argv[0] = "Dollop"; #endif init(argc, argv); emboss_getoptions("fdollop", argc, argv); /* reads in spp, chars, and the data. Then calls maketree to construct the tree */ progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; garbage = NULL; firstset = true; bits = 8*sizeof(long) - 1; doinit(); if (dollo) fprintf(outfile, "Dollo"); else fprintf(outfile, "Polymorphism"); fprintf(outfile, " parsimony method\n\n"); if (printdata && justwts) fprintf(outfile, "%2ld species, %3ld characters\n\n", spp, chars); for (ith = 1; ith <= (msets); ith++) { if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } if (justwts){ fprintf(outfile, "Weights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata && !justwts) fprintf(outfile, "%2ld species, %3ld characters\n\n", spp, chars); doinput(); if (ith == 1) firstset = false; for (jumb = 1; jumb <= njumble; jumb++) maketree(); } /* this would be an appropriate place to deallocate memory, including these items: */ free(steps); FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Dollo or polymorphism parsimony by uphill search */ PHYLIPNEW-3.69.650/src/dist.c0000664000175000017500000002640511253743724012203 00000000000000#include "phylip.h" #include "dist.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ void alloctree(pointptr *treenode, long nonodes) { /* allocate spp tips and (nonodes - spp) forks, each containing three * nodes. Fill in treenode where 0..spp-1 are pointers to tip nodes, and * spp..nonodes-1 are pointers to one node in each fork. */ /* used in fitch, kitsch, neighbor */ long i, j; node *p, *q; *treenode = (pointptr)Malloc(nonodes*sizeof(node *)); for (i = 0; i < spp; i++) (*treenode)[i] = (node *)Malloc(sizeof(node)); for (i = spp; i < nonodes; i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node *)Malloc(sizeof(node)); p->next = q; q = p; } p->next->next->next = p; (*treenode)[i] = p; } } /* alloctree */ void freetree(pointptr *treenode, long nonodes) { long i; node *p, *q; for (i = 0; i < spp; i++) free((*treenode)[i]); for (i = spp; i < nonodes; i++) { p = (*treenode)[i]; q = p->next; while(q != p) { node * r = q; q = q->next; free(r); } free(p); } free(*treenode); } /* freetree */ void allocd(long nonodes, pointptr treenode) { /* used in fitch & kitsch */ long i, j; node *p; for (i = 0; i < (spp); i++) { treenode[i]->d = (vector)Malloc(nonodes*sizeof(double)); } for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->d = (vector)Malloc(nonodes*sizeof(double)); p = p->next; } } } void freed(long nonodes, pointptr treenode) { /* used in fitch */ long i, j; node *p; for (i = 0; i < (spp); i++) { free(treenode[i]->d); } for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { free(p->d); p = p->next; } } } void allocw(long nonodes, pointptr treenode) { /* used in fitch & kitsch */ long i, j; node *p; for (i = 0; i < (spp); i++) { treenode[i]->w = (vector)Malloc(nonodes*sizeof(double)); } for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->w = (vector)Malloc(nonodes*sizeof(double)); p = p->next; } } } void freew(long nonodes, pointptr treenode) { /* used in fitch */ long i, j; node *p; for (i = 0; i < (spp); i++) { free(treenode[i]->w); } for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { free(p->w); p = p->next; } } } void setuptree(tree *a, long nonodes) { /* initialize a tree */ /* used in fitch, kitsch, & neighbor */ long i=0; node *p; for (i = 1; i <= nonodes; i++) { a->nodep[i - 1]->back = NULL; a->nodep[i - 1]->tip = (i <= spp); a->nodep[i - 1]->iter = true; a->nodep[i - 1]->index = i; a->nodep[i - 1]->t = 0.0; a->nodep[i - 1]->sametime = false; a->nodep[i - 1]->v = 0.0; if (i > spp) { p = a->nodep[i-1]->next; while (p != a->nodep[i-1]) { p->back = NULL; p->tip = false; p->iter = true; p->index = i; p->t = 0.0; p->sametime = false; p = p->next; } } } a->likelihood = -1.0; a->start = a->nodep[0]; a->root = NULL; } /* setuptree */ void dist_inputdata(AjPPhyloDist dist, boolean replicates, boolean printdata, boolean lower, boolean upper, vector *x, intvector *reps) { /* read in distance matrix */ /* used in fitch & neighbor */ long i=0, j=0, k=0, columns=0; ajint ipos; if (replicates) columns = 4; else columns = 6; if (printdata) { fprintf(outfile, "\nName Distances"); if (replicates) fprintf(outfile, " (replicates)"); fprintf(outfile, "\n---- ---------"); if (replicates) fprintf(outfile, "-------------"); fprintf(outfile, "\n\n"); } ipos = 0; for (i = 0; i < spp; i++) { x[i][i] = 0.0; initnamedist(dist,i); for (j = 0; j < spp; j++) { x[i][j] = dist->Data[ipos]; reps[i][j] = dist->Replicates[ipos++]; } } if (!printdata) return; for (i = 0; i < spp; i++) { for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); putc(' ', outfile); for (j = 1; j <= spp; j++) { fprintf(outfile, "%10.5f", x[i][j - 1]); if (replicates || !replicates) fprintf(outfile, " (%3ld)", reps[i][j - 1]); if (j % columns == 0 && j < spp) { putc('\n', outfile); for (k = 1; k <= nmlngth + 1; k++) putc(' ', outfile); } } putc('\n', outfile); } putc('\n', outfile); } /* inputdata */ void coordinates(node *p, double lengthsum, long *tipy, double *tipmax, node *start, boolean njoin) { /* establishes coordinates of nodes */ node *q, *first, *last; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; if (lengthsum > *tipmax) *tipmax = lengthsum; return; } q = p->next; do { if (q->back) coordinates(q->back, lengthsum + q->v, tipy,tipmax, start, njoin); q = q->next; } while ((p == start || p != q) && (p != start || p->next != q)); first = p->next->back; q = p; while (q->next != p && q->next->back) /* is this right ? */ q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if (p == start && p->back) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* coordinates */ void drawline(long i, double scale, node *start, boolean rooted) { /* draws one row of the tree diagram by moving up tree */ node *p, *q; long n=0, j=0; boolean extra=false, trif=false; node *r, *first =NULL, *last =NULL; boolean done=false; p = start; q = start; extra = false; trif = false; if (i == (long)p->ycoord && p == start) { /* display the root */ if (rooted) { if (p->index - spp >= 10) fprintf(outfile, "-"); else fprintf(outfile, "--"); } else { if (p->index - spp >= 10) fprintf(outfile, " "); else fprintf(outfile, " "); } if (p->index - spp >= 10) fprintf(outfile, "%2ld", p->index - spp); else fprintf(outfile, "%ld", p->index - spp); extra = true; trif = true; } else fprintf(outfile, " "); do { if (!p->tip) { /* internal nodes */ r = p->next; /* r->back here is going to the same node. */ do { if (!r->back) { r = r->next; continue; } if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; break; } r = r->next; } while (!((p != start && r == p) || (p == start && r == p->next))); first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; if (!rooted && (p == start)) last = p->back; } /* end internal node case... */ /* draw the line: */ done = (p->tip || p == q); n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); if (!q->tip) { if ((n < 3) && (q->index - spp >= 10)) n = 3; if ((n < 2) && (q->index - spp < 10)) n = 2; } if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if (p->ycoord != q->ycoord) putc('+', outfile); if (trif) { n++; trif = false; } if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); trif = false; } } if (q != p) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); } /* drawline */ void printree(node *start, boolean treeprint, boolean njoin, boolean rooted) { /* prints out diagram of the tree */ /* used in fitch & neighbor */ long i; long tipy; double scale,tipmax; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; coordinates(start, 0.0, &tipy, &tipmax, start, njoin); scale = 1.0 / (long)(tipmax + 1.000); for (i = 1; i <= (tipy - down); i++) drawline(i, scale, start, rooted); putc('\n', outfile); } /* printree */ void treeoutr(node *p, long *col, tree *curtree) { /* write out file with representation of final tree. * Rooted case. Used in kitsch and neighbor. */ long i, n, w; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } (*col) += n; } else { putc('(', outtree); (*col)++; treeoutr(p->next->back,col,curtree); putc(',', outtree); (*col)++; if ((*col) > 55) { putc('\n', outtree); (*col) = 0; } treeoutr(p->next->next->back,col,curtree); putc(')', outtree); (*col)++; } x = p->v; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p == curtree->root) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.5f", (int)(w + 7), x); (*col) += w + 8; } } /* treeoutr */ void treeout(node *p, long *col, double m, boolean njoin, node *start) { /* write out file with representation of final tree */ /* used in fitch & neighbor */ long i=0, n=0, w=0; Char c; double x=0.0; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { putc('(', outtree); (*col)++; treeout(p->next->back, col, m, njoin, start); putc(',', outtree); (*col)++; if (*col > 55) { putc('\n', outtree); *col = 0; } treeout(p->next->next->back, col, m, njoin, start); if (p == start && njoin) { putc(',', outtree); treeout(p->back, col, m, njoin, start); } putc(')', outtree); (*col)++; } x = p->v; if (x > 0.0) w = (long)(m * log(x)); else if (x == 0.0) w = 0; else w = (long)(m * log(-x)) + 1; if (w < 0) w = 0; if (p == start) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.5f", (int) w + 7, x); *col += w + 8; } } /* treeout */ PHYLIPNEW-3.69.650/src/mlclock.c0000664000175000017500000003360711253743724012666 00000000000000/* PHYLIP version 3.6. (c) Copyright 1986-2007 by the University of * Washington and by Joseph Felsenstein. Written by Joseph * Felsenstein. Permission is granted to copy and use this program * provided no fee is charged for it and provided that this copyright * notice is not removed. */ #include "phylip.h" #include "seq.h" #include "mlclock.h" /* Define the minimum branch length to be enforced for clocklike trees */ const double MIN_BRANCH_LENGTH = 1e-6; /* MIN_ROOT_TYME is added to the current root tyme and used as the lower * bound when optimizing at the root. */ const double MIN_ROOT_TYME = -10; static evaluator_t evaluate = NULL; static tree *curtree = NULL; /* current tree in use */ static node *current_node = NULL; /* current node being optimized */ static double cur_node_eval(double x); static double evaluate_tyme(tree *t, node *p, double tyme); void mlclock_init(tree *t, evaluator_t f) { curtree = t; evaluate = f; } boolean all_tymes_valid(node *p, double minlength, boolean fix) { /* Ensures that all node tymes at node p and descending from it are * valid, with all branches being not less than minlength. If any * inconsistencies are found, returns true. If 'fix' is given, * adjustments are made to make the subtree consistent. Otherwise if * assertions are enabled, all inconsistencies are fatal. No effort is * made to check that the parent node tyme p->back->tyme is less than * p->tyme. */ node *q; double max_tyme; boolean ret = true; /* All tips should have tyme == 0.0 */ if ( p->tip ) { if ( p->tyme == 0.0 ) return true; else { /* this would be very bad. */ if ( fix ) p->tyme = 0.0; else assert( p->tyme == 0 ); return false; } } for ( q = p->next; q != p; q = q->next ) { /* All nodes in ring should have same tyme */ if ( q && q->tyme != p->tyme ) { if ( fix ) q->tyme = p->tyme; else assert( q->tyme == p->tyme ); ret = false; } /* All subtrees should be OK too */ if (!q->back) continue; if ( all_tymes_valid(q->back, minlength, fix) == false ) ret = false; } /* Tymes cannot be greater than the minimum child time, less * branch length */ max_tyme = min_child_tyme(p) - minlength; if ( p->tyme > max_tyme ) { if ( fix ) setnodetymes(p, max_tyme); else assert( p->tyme < max_tyme ); return false; } return ret; } void setnodetymes(node* p, double newtyme) { /* Set node tyme for an entire fork. Also clears initialized flags on this * fork, but not recursively. inittrav() must be called before evaluating * elsewhere. */ node * q; curtree->likelihood = UNDEFINED; p->tyme = newtyme; p->initialized = false; if ( p->tip ) return; for ( q = p->next; q != p; q = q->next ) { assert(q); q->tyme = newtyme; q->initialized = false; } } /* setnodetymes */ double min_child_tyme(node *p) { /* Return the minimum tyme of all children. p must be a parent nodelet */ double min; node *q; min = 1.0; /* Tymes are always nonpositive */ for ( q = p->next; q != p; q = q->next ) { if ( q->back == NULL ) continue; if ( q->back->tyme < min ) min = q->back->tyme; } return min; } /* min_child_tyme */ double parent_tyme(node *p) { /* Return the tyme of the parent of node p. p must be a parent nodelet. */ if ( p->back ) { return p->back->tyme; } else { return p->tyme + MIN_ROOT_TYME; } } /* parent_tyme */ boolean valid_tyme(node *p, double tyme) { /* Return true if tyme is a valid tyme to assign to node p. tyme must be * finite, not greater than any of p's children, and not less than p's * parent. Also, tip nodes can only be assigned 0. Otherwise false is * returned. */ /* p must be the parent nodelet of its node group. */ assert( p->tip != true || tyme == 0.0 ); assert( tyme <= min_child_tyme(p) ); assert( tyme >= parent_tyme(p) ); return true; } /* valid_tyme */ static long node_max_depth(tree *t, node *p) { /* Return the largest number of branches between node p and any tip node. */ long max_depth = 0; long cdep; node *q; assert(p = pnode(t, p)); if (p->tip) return 0; for (q = p->next; q != p; q = q->next) { cdep = node_max_depth(t, q->back) + 1; if (cdep > max_depth) max_depth = cdep; } return max_depth; } static double node_max_tyme(tree *t, node *p) { /* Return the absolute maximum tyme a node can be pushed to. */ return -node_max_depth(t, p) * MIN_BRANCH_LENGTH; } void save_tymes(tree* save_tree, double tymes[]) { /* Save the current node tymes of save_tree in tymes[]. tymes must point to * an array of (nonodes - spp) elements. Tyme for node i gets saved in * tymes[i-spp]. */ int i; assert( all_tymes_valid(curtree->root, 0.0, false) ); for ( i = spp ; i < nonodes ; i++) { tymes[i - spp] = save_tree->nodep[i]->tyme; } } void restore_tymes(tree *load_tree, double tymes[]) { /* Restore the tymes saved in tymes[] to tree load_tree. See save_tymes() * */ int i; for ( i = spp ; i < nonodes ; i++) { if (load_tree->nodep[i]->tyme != tymes[i-spp]) setnodetymes(load_tree->nodep[i], tymes[i-spp]); } /* Check for invalid tymes */ assert( all_tymes_valid(curtree->root, 0.0, false) ); } static void push_tymes_to_root(tree *t, node *p, double tyme) { /* Set tyme for node p to tyme. Ancestors of p are moved down if necessary to prevent * negative branch lengths. */ node *q, *r; assert(p = pnode(t, p)); setnodetymes(p, tyme); r = p; while (r->back != NULL) { q = pnode(t, r->back); /* q = parent(r); */ if (q->tyme > r->tyme - MIN_BRANCH_LENGTH) setnodetymes(q, r->tyme - MIN_BRANCH_LENGTH); else break; r = q; } } static void push_tymes_to_tips(tree *t, node *p, double tyme) { /* Set tyme for node p to tyme. Descendants of p are moved up if necessary to * prevent negative branch lengths. */ node *q; assert( p == pnode(t, p) ); setnodetymes(p, tyme); for (q = p->next; q != p; q = q->next) { if (q->back->tyme < p->tyme + MIN_BRANCH_LENGTH) { if (q->back->tip && q->back->tyme < p->tyme) { fprintf(stderr, "Error: Attempt to move node past tips.\n" "%s line %d\n", __FILE__, __LINE__); exxit(-1); } else { if(!( q->back->tip ) ) { push_tymes_to_tips(t, q->back, p->tyme + MIN_BRANCH_LENGTH); } } } } } static void set_tyme(tree *t, node *p, double tyme) { /* Set the tyme for node p, pushing others out of the way */ /* Use rootward node in fork */ p = pnode(t, p); /* Set node tyme and push other nodes out of the way */ if (tyme < p->tyme) push_tymes_to_root(t, p, tyme); else push_tymes_to_tips(t, p, tyme); } static double evaluate_tyme(tree *t, node *p, double tyme) { /* Evaluate curtree if node p is at tyme. Return the score. Leaves original * tymes intact. */ static double *savetymes = NULL; static long savetymes_sz = 0; long nforks = nonodes - spp; double score = 1.0; if (savetymes_sz < nforks + 1) { if (savetymes != NULL) free(savetymes); savetymes_sz = nforks; savetymes = (double *)Malloc(savetymes_sz * sizeof(double)); } /* Save the current tymes */ save_tymes(t, savetymes); set_tyme(t, p, tyme); /* Evaluate the tree */ score = evaluate(p); /* Restore original tymes */ restore_tymes(t, savetymes); assert( all_tymes_valid(curtree->root, 0.0, false) ); return score; } static double cur_node_eval(double x) { return evaluate_tyme(curtree, current_node, x); } double maximize(double min_tyme, double cur, double max_tyme, double(*f)(double), double eps, boolean *success) { /* Find the maximum of function f by parabolic interpolation and golden section search. * (based on Brent method in NR) */ /* [min_tyme, max_tyme] is the domain, cur is the best guess and must be * within the domain, eps is the fractional accuracy of the result, i.e. the * returned value x will be accurate to +/- x*eps. */ boolean bracket = false; static long max_iterations = 100; /* maximum iterations */ long it; /* iteration counter */ double x[3], lnl[3]; /* tyme (x) and log likelihood (lnl) points below, at, and above the current tyme */ double xn, yn; /* New point */ double d; /* delta x to new point */ double mid; /* Midpoint of (x[0], x[2]) */ double xmax, lnlmax; double tdelta; /* uphill step for bracket finding */ double last_d = 0.0; double prec; /* epsilon * tyme */ double t1, t2, t3, t4; /* temps for parabolic fit */ /* Bracket our maximum; We will assume that we are already close and move * uphill by exponentially increasing steps until we find a smaller value. * The initial step should be small to allow us to finish quickly if we're * still on the maximum from previous smoothings */ x[1] = cur; tdelta = fabs(10.0 * cur * eps); x[0] = cur - tdelta; if (x[0] < min_tyme) x[0] = min_tyme; lnl[1] = (*f)(x[1]); lnl[0] = (*f)(x[0]); if (lnl[0] < lnl[1]) { do { x[2] = x[1] + tdelta; if (x[2] > max_tyme) x[2] = max_tyme; lnl[2] = (*f)(x[2]); if (lnl[2] < lnl[1]) break; x[0] = x[1]; lnl[0] = lnl[1]; x[1] = x[2]; lnl[1] = lnl[2]; tdelta *= 2; } while (x[2] < max_tyme); } else { /* lnl[0] > lnl[1] */ /* shift points (0, 1) -> (1, 2) */ x[2] = x[1]; x[1] = x[0]; lnl[2] = lnl[1]; lnl[1] = lnl[0]; do { x[0] = x[1] - tdelta; if (x[0] < min_tyme) x[0] = min_tyme; lnl[0] = (*f)(x[0]); if (lnl[0] < lnl[1]) break; x[2] = x[1]; lnl[2] = lnl[1]; x[1] = x[0]; lnl[1] = lnl[0]; tdelta *= 2; } while (x[0] > min_tyme); } /* FIXME: this should not be necessary. Somewhere we fail to enforce * MIN_BRANCH_LENGTH */ if ( x[1] < x[0] || x[2] < x[1] ) { x[1] = (x[2] + x[0]) / 2.0; lnl[1] = (*f)(x[1]); } assert(x[0] <= x[1] && x[1] <= x[2]); xmax = x[1]; lnlmax = lnl[1]; if (lnl[0] > lnlmax) { xmax = x[0]; lnlmax = lnl[0]; } if (lnl[2] > lnlmax) { xmax = x[2]; lnlmax = lnl[2]; } bracket = false; for (it = 0; it < max_iterations; it++) { assert(x[0] <= x[1] && x[1] <= x[2]); prec = fabs(x[1] * eps) + 1e-7; if (x[2] - x[0] < 4.0*prec) break; d = 0.0; mid = (x[2] + x[0]) / 2.0; if (lnl[0] < lnl[1] && lnl[1] > lnl[0]) { /* We have a bracket */ bracket = true; /* Try parabolic interpolation */ t1 = (x[1] - x[0]) * (lnl[1] - lnl[2]); t2 = (x[1] - x[2]) * (lnl[1] - lnl[0]); t3 = t1*(x[1] - x[0]) - t2*(x[1] - x[2]); t4 = 2.0*(t1 - t2); if (t4 > 0.0) t3 = -t3; t4 = fabs(t4); if ( fabs(t3) < fabs(0.5*t4*last_d) && t3 > t4 * (x[0] - x[1]) && t3 < t4 * (x[2] - x[1]) ) { d = t3 / t4; xn = x[1] + d; /* Keep the new point from getting too close to the end points */ if (xn - x[0] < 2.0*prec || x[2] - xn < 2.0*prec) d = xn - mid > 0 ? -prec : prec; } } else { /* We should never lose our bracket once we've found it. */ assert( !bracket ); } if (d == 0.0) { /* Bisect larger interval using golden ratio */ d = x[1] > mid ? 0.38 * (x[0] - x[1]) : 0.38 * (x[2] - x[1]); } /* Keep the new point from getting too close to the middle one */ if (fabs(d) < prec) d = d > 0 ? prec : -prec; xn = x[1] + d; last_d = d; yn = (*f)(xn); if (yn > lnlmax) { *success = true; xmax = xn; lnlmax = yn; } if (yn > lnl[1]) { /* (xn, yn) is the new middle point */ if (xn > x[1]) x[0] = x[1]; else x[2] = x[1]; x[1] = xn; lnl[1] = yn; } else { /* xn is the new bound */ if (xn > x[1]) x[2] = xn; else x[0] = xn; } } return xmax; } boolean makenewv(node *p) { /* Try to improve tree by moving node p. Returns true if a better likelihood * was found */ double min_tyme, max_tyme; /* absolute tyme limits */ double new_tyme; /* result from maximize() */ boolean success = false; /* return value */ node *s = curtree->nodep[p->index - 1]; assert( valid_tyme(s, s->tyme) ); /* Tyme cannot be less than parent */ if (s == curtree->root) min_tyme = s->tyme + MIN_ROOT_TYME; else min_tyme = parent_tyme(s) + MIN_BRANCH_LENGTH; /* Tyme cannot be greater than any children */ max_tyme = min_child_tyme(s) - MIN_BRANCH_LENGTH; /* * EXPERIMENTAL: * Allow nodes to move pretty much anywhere by pushing others outta the way. */ /* First, find the absolute maximum and minimum tymes. */ /* Minimum tyme is somewhere past the root */ min_tyme = curtree->root->tyme + MIN_ROOT_TYME; /* Max tyme is the minimum branch length times the maximal number of branches * to any tip node. */ max_tyme = node_max_tyme(curtree, s); /* Nothing to do if we can't move */ if ( max_tyme < min_tyme + 2.0 * MIN_BRANCH_LENGTH ) { return false; } /* Fix a failure to enforce minimum branch lengths which occurs somewhere in * dnamlk_add() */ if (s->tyme > max_tyme) set_tyme(curtree, s, max_tyme); current_node = s; new_tyme = maximize(min_tyme, s->tyme, max_tyme, &cur_node_eval, epsilon, &success); set_tyme(curtree, s, new_tyme); return success; } /* makenewv */ PHYLIPNEW-3.69.650/src/freqboot.c0000664000175000017500000006437011616234204013053 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, and Doug Buxton. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef enum { seqs, morphology, restsites, genefreqs } datatype; typedef enum { dna, rna, protein } seqtype; AjPPhyloFreq phylofreqs = NULL; AjPPhyloProp phyloweights = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void seqboot_inputnumbersfreq(AjPPhyloFreq); void inputoptions(void); void seqboot_inputdatafreq(AjPPhyloFreq); void allocrest(void); void allocnew(void); void doinput(int argc, Char *argv[]); void bootweights(void); void sppermute(long); void charpermute(long, long); void writedata(void); void writeweights(void); void writecategories(void); void writeauxdata(steptr, FILE*); void writefactors(void); void bootwrite(void); void seqboot_inputaux(steptr, FILE*); /* function prototypes */ #endif FILE *outcatfile, *outweightfile, *outmixfile, *outancfile, *outfactfile; Char infilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH], mixfilename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; const char* outweightfilename; AjPFile embossoutweightfile; const char* outmixfilename; AjPFile embossoutmixfile; const char* outancfilename; AjPFile embossoutancfile; const char* outcatfilename; AjPFile embossoutcatfile; const char* outfactfilename; AjPFile embossoutfactfile; long sites, loci, maxalleles, groups, newsites, newersites, newgroups, newergroups, nenzymes, reps, ws, blocksize, categs, maxnewsites; boolean bootstrap, permute, ild, lockhart, jackknife, regular, xml, nexus, weights, categories, factors, enzymes, all, justwts, progress, mixture, firstrep, ancvar; double fracsample; datatype data; seqtype seq; steptr oldweight, where, how_many, newwhere, newhowmany, newerwhere, newerhowmany, factorr, newerfactor, mixdata, ancdata; steptr *charorder; Char *factor; long *alleles; Char **nodep; double **nodef; long **sppord; longer seed; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr test = NULL; AjPStr outputformat = NULL; AjPStr typeofseq = NULL; AjPStr justweights = NULL; AjBool rewrite = false; long inseed, inseed0; data = genefreqs; seq = dna; bootstrap = false; jackknife = false; permute = false; ild = false; lockhart = false; blocksize = 1; regular = true; fracsample = 1.0; all = true; reps = 100; weights = false; mixture = false; ancvar = false; categories = false; justwts = false; printdata = false; dotdiff = true; progress = true; interleaved = true; xml = false; nexus = false; factors = false; enzymes = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylofreqs = ajAcdGetFrequencies("infile"); test = ajAcdGetListSingle("test"); if(ajStrMatchC(test, "b")) { bootstrap = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 1.0; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } blocksize = ajAcdGetInt("blocksize"); } else if(ajStrMatchC(test, "j")) { jackknife = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 0.5; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } } else if(ajStrMatchC(test, "c")) permute = true; else if(ajStrMatchC(test, "o")) ild = true; else if(ajStrMatchC(test, "s")) lockhart = true; else if(ajStrMatchC(test, "r")) rewrite = true; if(rewrite) { if (data == seqs) { outputformat = ajAcdGetListSingle("rewriteformat"); if(ajStrMatchC(outputformat, "n")) nexus = true; else if(ajStrMatchC(outputformat, "x")) xml = true; if( (nexus) || (xml) ) { typeofseq = ajAcdGetListSingle("seqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } } else{ reps = ajAcdGetInt("reps"); inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); if(jackknife || bootstrap || permute) { phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; } if(!permute) { justweights = ajAcdGetListSingle("justweights"); if(ajStrMatchC(justweights, "j")) justwts = true; } } printdata = ajAcdGetBoolean("printdata"); if(printdata) dotdiff = ajAcdGetBoolean("dotdiff"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void seqboot_inputnumbersfreq(AjPPhyloFreq freq) { /* read numbers of species and of sites */ long i; spp = freq->Size; sites = freq->Loci; loci = sites; maxalleles = 1; if (!freq->ContChar) { alleles = (long *)Malloc(sites*sizeof(long)); sites = 0; for (i = 0; i < (loci); i++) { alleles[i] = freq->Allele[i]; if (alleles[i] > maxalleles) maxalleles = alleles[i]; sites += alleles[i]; } } } /* seqboot_inputnumbersfreq */ void inputoptions() { /* input the information on the options */ long weightsum, maxfactsize, i, j, k, l, m; if (data == genefreqs) { k = 0; l = 0; for (i = 0; i < (loci); i++) { m = alleles[i]; k++; for (j = 1; j <= m; j++) { l++; factorr[l - 1] = k; } } } else { for (i = 1; i <= (sites); i++) factorr[i - 1] = i; } for (i = 0; i < (sites); i++) oldweight[i] = 1; if (weights) inputweightsstr2(phyloweights->Str[0],0, sites, &weightsum, oldweight, &weights, "seqboot"); if (factors && printdata) { for(i = 0; i < sites; i++) factor[i] = (char)('0' + (factorr[i]%10)); printfactors(outfile, sites, factor, " (least significant digit)"); } if (weights && printdata) printweights(outfile, 0, sites, oldweight, "Sites"); for (i = 0; i < (loci); i++) how_many[i] = 0; for (i = 0; i < (loci); i++) where[i] = 0; for (i = 1; i <= (sites); i++) { how_many[factorr[i - 1] - 1]++; if (where[factorr[i - 1] - 1] == 0) where[factorr[i - 1] - 1] = i; } groups = factorr[sites - 1]; newgroups = 0; newsites = 0; maxfactsize = 0; for(i = 0 ; i < loci ; i++){ if(how_many[i] > maxfactsize){ maxfactsize = how_many[i]; } } maxnewsites = groups * maxfactsize; allocnew(); for (i = 0; i < (groups); i++) { if (oldweight[where[i] - 1] > 0) { newgroups++; newsites += how_many[i]; newwhere[newgroups - 1] = where[i]; newhowmany[newgroups - 1] = how_many[i]; } } } /* inputoptions */ void seqboot_inputdatafreq(AjPPhyloFreq freq) { /* input the names and sequences for each species */ long i, j, k, l, m, n; double x; ajint ipos=0; nodef = (double **)Malloc(spp*sizeof(double *)); for (i = 0; i < (spp); i++) nodef[i] = (double *)Malloc(sites*sizeof(double)); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); fprintf(outfile, " locus\n\n"); } else { if (ild) { fprintf(outfile, "Locus"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) fprintf(outfile, "Locus"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, %3ld loci\n\n", spp, loci); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } for (i = 1; i <= (spp); i++) { initnamefreq(freq,i - 1); j = 1; while (j <= sites) { x = freq->Data[ipos++]; if ((unsigned)x > 1.0) { printf("GENE FREQ OUTSIDE [0,1] in species %ld\n", i); embExitBad(); } else { nodef[i - 1][j - 1] = x; j++; } } } if (!printdata) return; m = (sites - 1) / 8 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 8; if (l > sites) l = sites; n = (i - 1) * 8; for (k = n; k < l; k++) { fprintf(outfile, "%8.5f", nodef[j][k]); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdatafreq */ void allocrest() { /* allocate memory for bookkeeping arrays */ oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); if (categories) category = (steptr)Malloc(sites*sizeof(long)); if (mixture) mixdata = (steptr)Malloc(sites*sizeof(long)); if (ancvar) ancdata = (steptr)Malloc(sites*sizeof(long)); where = (steptr)Malloc(loci*sizeof(long)); how_many = (steptr)Malloc(loci*sizeof(long)); factor = (Char *)Malloc(sites*sizeof(Char)); factorr = (steptr)Malloc(sites*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); } /* allocrest */ void allocnew(void) { /* allocate memory for arrays that depend on the lenght of the output sequence*/ long i; newwhere = (steptr)Malloc(loci*sizeof(long)); newhowmany = (steptr)Malloc(loci*sizeof(long)); newerwhere = (steptr)Malloc(loci*sizeof(long)); newerhowmany = (steptr)Malloc(loci*sizeof(long)); newerfactor = (steptr)Malloc(maxnewsites*maxalleles*sizeof(long)); charorder = (steptr *)Malloc(spp*sizeof(steptr)); for (i = 0; i < spp; i++) charorder[i] = (steptr)Malloc(maxnewsites*sizeof(long)); } void doinput(int argc, Char *argv[]) { /* reads the input data */ seqboot_inputnumbersfreq(phylofreqs); allocrest(); inputoptions(); seqboot_inputdatafreq(phylofreqs); } /* doinput */ void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; for (i = 0; i < (newgroups); i++) newerwhere[i] = 0; for (i = 0; i < (newgroups); i++) newerhowmany[i] = 0; newergroups = 0; newersites = 0; for (i = 0; i < (newgroups); i++) { for (j = 1; j <= (weight[i]); j++) { newergroups++; for (k = 1; k <= (newhowmany[i]); k++) { newersites++; newerfactor[newersites - 1] = newergroups; } newerwhere[newergroups - 1] = newwhere[i]; newerhowmany[newergroups - 1] = newhowmany[i]; } } } /* bootweights */ void sppermute(long n) { /* permute the species order as given in array sppord */ long i, j, k; for (i = 1; i <= (spp - 1); i++) { k = (long)((i+1) * randum(seed)); j = sppord[n - 1][i]; sppord[n - 1][i] = sppord[n - 1][k]; sppord[n - 1][k] = j; } } /* sppermute */ void charpermute(long m, long n) { /* permute the n+1 characters of species m+1 */ long i, j, k; for (i = 1; i <= (n - 1); i++) { k = (long)((i+1) * randum(seed)); j = charorder[m][i]; charorder[m][i] = charorder[m][k]; charorder[m][k] = j; } } /* charpermute */ void writedata() { /* write out one set of bootstrapped sequences */ long i, j, k, l, m, n, n2=0; double x; Char charstate; sppord = (long **)Malloc(newergroups*sizeof(long *)); for (i = 0; i < (newergroups); i++) sppord[i] = (long *)Malloc(spp*sizeof(long)); for (j = 1; j <= spp; j++) sppord[0][j - 1] = j; for (i = 1; i < newergroups; i++) { for (j = 1; j <= (spp); j++) sppord[i][j - 1] = sppord[i - 1][j - 1]; } if (!justwts || permute) { if (data == restsites && enzymes) fprintf(outfile, "%5ld %5ld% 4ld\n", spp, newergroups, nenzymes); else if (data == genefreqs) fprintf(outfile, "%5ld %5ld\n", spp, newergroups); else { if ((data == seqs) && !(bootstrap || jackknife || permute || ild || lockhart) && xml) fprintf(outfile, "\n"); else if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) { fprintf(outfile, "#NEXUS\n"); fprintf(outfile, "BEGIN DATA\n"); fprintf(outfile, " DIMENSIONS NTAX=%ld NCHAR=%ld;\n", spp, newersites); fprintf(outfile, " FORMAT"); fprintf(outfile, " interleave"); fprintf(outfile, " DATATYPE="); if (data == seqs) { switch (seq) { case (dna): fprintf(outfile, "DNA missing=N gap=-"); break; case (rna): fprintf(outfile, "RNA missing=N gap=-"); break; case (protein): fprintf(outfile, "protein missing=? gap=-"); break; } } if (data == morphology) fprintf(outfile, "STANDARD"); fprintf(outfile, ";\n MATRIX\n"); } else fprintf(outfile, "%5ld %5ld\n", spp, newersites); } if (data == genefreqs) { for (i = 0; i < (newergroups); i++) fprintf(outfile, " %3ld", alleles[factorr[newerwhere[i] - 1] - 1]); putc('\n', outfile); } } l = 1; if ((!(bootstrap || jackknife || permute || ild || lockhart | nexus)) && ((data == seqs) || (data == restsites))) { interleaved = !interleaved; if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) interleaved = false; } if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; for (j = 0; j < spp; j++) { n = 0; if ((l == 1) || (interleaved && nexus)) { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { fprintf(outfile, " \n"); fprintf(outfile, " "); } n2 = nmlngth-1; if (!(bootstrap || jackknife || permute || ild || lockhart) && (xml || nexus)) { while (nayme[j][n2] == ' ') n2--; } if (nexus) fprintf(outfile, " "); for (k = 0; k <= n2; k++) if (nexus && (nayme[j][k] == ' ') && (k < n2)) putc('_', outfile); else putc(nayme[j][k], outfile); if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) fprintf(outfile, "\n "); } else { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { fprintf(outfile, " "); } else { for (k = 1; k <= nmlngth; k++) putc(' ', outfile); } } if (nexus) for (k = 0; k < nmlngth+1-n2; k++) fprintf(outfile, " "); for (k = l - 1; k < m; k++) { if (permute && j + 1 == 1) sppermute(newerfactor[n]); /* we can assume chars not permuted */ for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (data == genefreqs) { if (n > 1 && (n & 7) == 1) fprintf(outfile, "\n "); x = nodef[sppord[newerfactor[charorder[j][n - 1]] - 1][j] - 1] [newerwhere[charorder[j][k]] + n2]; fprintf(outfile, "%8.5f", x); } else { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); else if (!nexus && !interleaved && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); charstate = nodep[sppord[newerfactor[charorder[j][n - 1]] - 1] [j] - 1][newerwhere[charorder[j][k]] + n2]; putc(charstate, outfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfile); } } } if (!(bootstrap || jackknife || permute || ild || lockhart ) && xml) { fprintf(outfile, "\n \n"); } putc('\n', outfile); } if (interleaved) { if ((m <= newersites) && (newersites > 60)) putc('\n', outfile); l += 60; m += 60; } } while (interleaved && l <= newersites); if ((data == seqs) && (!(bootstrap || jackknife || permute || ild || lockhart) && xml)) fprintf(outfile, "\n"); if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) fprintf(outfile, " ;\nEND;\n"); for (i = 0; i < (newergroups); i++) free(sppord[i]); free(sppord); } /* writedata */ void writeweights() { /* write out one set of post-bootstrapping weights */ long j, k, l, m, n, o; j = 0; l = 1; if (interleaved) m = 60; else m = sites; do { if(m > sites) m = sites; n = 0; for (k = l - 1; k < m; k++) { for(o = 0 ; o < how_many[k] ; o++){ if(oldweight[k]==0){ fprintf(outweightfile, "0"); j++; } else{ if (weight[k-j] < 10) fprintf(outweightfile, "%c", (char)('0'+weight[k-j])); else fprintf(outweightfile, "%c", (char)('A'+weight[k-j]-10)); n++; if (!interleaved && n > 1 && n % 60 == 1) { fprintf(outweightfile, "\n"); if (n % 10 == 0 && n % 60 != 0) putc(' ', outweightfile); } } } } putc('\n', outweightfile); if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= sites); } /* writeweights */ void writecategories() { /* write out categories for the bootstrapped sequences */ long k, l, m, n, n2; Char charstate; if(justwts){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n=0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[k]; putc(charstate, outcatfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outcatfile, "\n"); return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[newerwhere[k] + n2]; putc(charstate, outcatfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outcatfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outcatfile, "\n"); } /* writecategories */ void writeauxdata(steptr auxdata, FILE *outauxfile) { /* write out auxiliary option data (mixtures, ancestors, ect) to appropriate file. Samples parralel to data, or just gives one output entry if justwts is true */ long k, l, m, n, n2; Char charstate; /* if we just output weights (justwts), and this is first set just output the data unsampled */ if(justwts){ if(firstrep){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[k]; putc(charstate, outauxfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outauxfile, "\n"); } return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[newerwhere[k] + n2]; putc(charstate, outauxfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outauxfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outauxfile, "\n"); } /* writeauxdata */ void writefactors(void) { long k, l, m, n, prevfact, writesites; char symbol; steptr wfactor; if(!justwts || firstrep){ if(justwts){ writesites = sites; wfactor = factorr; } else { writesites = newersites; wfactor = newerfactor; } prevfact = wfactor[0]; symbol = '+'; if (interleaved) m = 60; else m = writesites; l=1; do { if(m > writesites) m = writesites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outfactfile, "\n "); if(prevfact != wfactor[k]){ symbol = (symbol == '+') ? '-' : '+'; prevfact = wfactor[k]; } putc(symbol, outfactfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfactfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= writesites); fprintf(outfactfile, "\n"); } } /* writefactors */ void bootwrite() { /* does bootstrapping and writes out data sets */ long i, j, rr, repdiv10; if (!(bootstrap || jackknife || permute || ild || lockhart)) reps = 1; repdiv10 = reps / 10; if (repdiv10 < 1) repdiv10 = 1; if (progress) putchar('\n'); for (rr = 1; rr <= (reps); rr++) { for (i = 0; i < spp; i++) for (j = 0; j < maxnewsites; j++) charorder[i][j] = j; if(rr==1) firstrep = true; else firstrep = false; if (ild) { charpermute(0, maxnewsites); for (i = 1; i < spp; i++) for (j = 0; j < maxnewsites; j++) charorder[i][j] = charorder[0][j]; } if (lockhart) for (i = 0; i < spp; i++) charpermute(i, maxnewsites); bootweights(); if (!justwts || permute || ild || lockhart) writedata(); if (justwts && !(permute || ild || lockhart)) writeweights(); if (categories) writecategories(); if (factors) writefactors(); if (mixture) writeauxdata(mixdata, outmixfile); if (ancvar) writeauxdata(ancdata, outancfile); if (progress && (bootstrap || jackknife || permute || ild || lockhart) && ((reps < 10) || rr % repdiv10 == 0)) { printf("completed replicate number %4ld\n", rr); #ifdef WIN32 phyFillScreenColor(); #endif } } if (progress) { if (justwts) printf("\nOutput weights written to file \"%s\"\n\n", outweightfilename); else printf("\nOutput written to file \"%s\"\n\n", outfilename); } } /* bootwrite */ int main(int argc, Char *argv[]) { /* Read in sequences or frequencies and bootstrap or jackknife them */ #ifdef MAC argc = 1; /* macsetup("SeqBoot",""); */ argv[0] = "SeqBoot"; #endif init(argc,argv); emboss_getoptions("ffreqboot", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; doinput(argc, argv); bootwrite(); FClose(infile); if (weights) FClose(weightfile); if (categories) { FClose(catfile); FClose(outcatfile); } if(mixture) FClose(outmixfile); if(ancvar) FClose(outancfile); if (justwts && !permute) { FClose(outweightfile); } else FClose(outfile); #ifdef MAC fixmacfile(outfilename); if (justwts && !permute) fixmacfile(outweightfilename); if (categories) fixmacfile(outcatfilename); if (mixture) fixmacfile(outmixfilename); #endif if(progress) printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/dolpenny.c0000664000175000017500000004101511616234204013051 00000000000000#include "phylip.h" #include "disc.h" #include "dollo.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxtrees 1000 /* maximum number of trees to be printed out */ #define often 100 /* how often to notify how many trees examined */ #define many 1000 /* how many multiples of howoften before stop */ typedef long *treenumbers; typedef double *valptr; typedef long *placeptr; AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phyloweights = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(void); void doinput(void); void preorder(node *); void evaluate(node *); void addtraverse(node *, node *, node *, placeptr, valptr, long *); void addit(long); void describe(void); void maketree(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], weightfilename[FNMLNGTH], ancfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; node *root; long howmany, howoften, col, msets, ith; boolean weights, thresh, ancvar, questions, dollo, simple, trout, progress, treeprint, stepbox, ancseq, mulsets, firstset, justwts; boolean *ancone, *anczero, *ancone0, *anczero0; pointptr treenode; /* pointers to all nodes in tree */ double fracdone, fracinc; double threshold; double *threshwt; boolean *added; Char *guess; steptr numsteps, numsone, numszero; gbit *garbage; long **bestorders, **bestrees; /* Local variables for maketree, propagated globally for C version: */ long examined, mults; boolean firsttime, done; double like, bestyet; treenumbers current, order; long fullset; bitptr zeroanc, oneanc; bitptr stps; void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs=0; ajint numwts=0; AjPStr method = NULL; howoften = often; howmany = many; simple = true; thresh = false; threshold = spp; trout = true; weights = false; justwts = false; ancvar = false; dollo = true; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); while (phylostates[numseqs]) numseqs++; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "d")) dollo = true; else dollo = false; phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; howmany = ajAcdGetInt("howmany"); howoften = ajAcdGetInt("howoften"); simple = ajAcdGetBoolean("simple"); thresh = ajAcdGetToggle("dothreshold"); if(thresh) ajAcdGetFloat("threshold"); printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nPenny algorithm for Dollo or polymorphism parsimony, version %s\n",VERSION); fprintf(outfile, " branch-and-bound to find all most parsimonious trees\n\n"); } /* emboss_getoptions */ void allocrest() { long i; extras = (long *)Malloc(chars*sizeof(long)); weight = (long *)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); guess = (Char *)Malloc(chars*sizeof(Char)); numsteps = (long *)Malloc(chars*sizeof(long)); numszero = (long *)Malloc(chars*sizeof(long)); numsone = (long *)Malloc(chars*sizeof(long)); bestorders = (long **)Malloc(maxtrees*sizeof(long *)); bestrees = (long **)Malloc(maxtrees*sizeof(long *)); for (i = 1; i <= maxtrees; i++) { bestorders[i - 1] = (long *)Malloc(spp*sizeof(long)); bestrees[i - 1] = (long *)Malloc(spp*sizeof(long)); } current = (treenumbers)Malloc(spp*sizeof(long)); order = (treenumbers)Malloc(spp*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); added = (boolean *)Malloc(nonodes*sizeof(boolean)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); zeroanc = (bitptr)Malloc(words*sizeof(long)); oneanc = (bitptr)Malloc(words*sizeof(long)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); words = chars / bits + 1; if (printdata) fprintf(outfile, "%2ld species, %3ld characters\n", spp, chars); alloctree(&treenode); setuptree(treenode); allocrest(); } /* doinit */ void inputoptions() { /* input the information on the options */ long i; if(justwts){ if(firstset){ if (ancvar) { inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); } } for (i = 0; i < (chars); i++) weight[i] = 1; inputweightsstr(phyloweights->Str[0], chars, weight, &weights); } else { if (!firstset) samenumspstate(phylostates[ith-1], &chars, ith); for (i = 0; i < (chars); i++) weight[i] = 1; if (ancvar) inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); if (weights) inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); } for (i = 0; i < (chars); i++) { if (!ancvar) { anczero[i] = true; ancone[i] = false; } else { anczero[i] = anczero0[i]; ancone[i] = ancone0[i]; } } questions = false; if (!thresh) threshold = spp; for (i = 0; i < (chars); i++) { questions = (questions || (ancone[i] && anczero[i])); threshwt[i] = threshold * weight[i]; } } /* inputoptions */ void doinput() { /* reads the input data */ inputoptions(); if(!justwts || firstset) disc_inputdata(phylostates[ith-1], treenode, dollo, printdata, outfile); } /* doinput */ void preorder(node *p) { /* go back up tree setting up and counting interior node states */ long i; if (!p->tip) { correct(p, fullset, dollo, zeroanc, treenode); preorder(p->next->back); preorder(p->next->next->back); } if (p->back == NULL) return; if (dollo) { for (i = 0; i < (words); i++) stps[i] = (treenode[p->back->index - 1]->stateone[i] & p->statezero[i] & zeroanc[i]) | (treenode[p->back->index - 1]->statezero[i] & p->stateone[i] & fullset & (~zeroanc[i])); } else { for (i = 0; i < (words); i++) stps[i] = treenode[p->back->index - 1]->stateone[i] & treenode[p->back->index - 1]->statezero[i] & p->stateone[i] & p->statezero[i]; } count(stps, zeroanc, numszero, numsone); } /* preorder */ void evaluate(node *r) { /* Determines the number of losses or polymorphisms needed for a tree. This is the minimum number needed to evolve chars on this tree */ long i, stepnum, smaller; double sum; sum = 0.0; for (i = 0; i < (chars); i++) { numszero[i] = 0; numsone[i] = 0; } for (i = 0; i < (words); i++) zeroanc[i] = fullset; postorder(r); preorder(r); for (i = 0; i < (words); i++) zeroanc[i] = 0; postorder(r); preorder(r); for (i = 0; i < (chars); i++) { smaller = spp * weight[i]; numsteps[i] = smaller; if (anczero[i]) { numsteps[i] = numszero[i]; smaller = numszero[i]; } if (ancone[i] && numsone[i] < smaller) numsteps[i] = numsone[i]; stepnum = numsteps[i] + extras[i]; if (stepnum <= threshwt[i]) sum += stepnum; else sum += threshwt[i]; guess[i] = '?'; if (!ancone[i] || (anczero[i] && numszero[i] < numsone[i])) guess[i] = '0'; else if (!anczero[i] || (ancone[i] && numsone[i] < numszero[i])) guess[i] = '1'; } if (examined == 0 && mults == 0) bestyet = -1.0; like = sum; } /* evaluate */ void addtraverse(node *a, node *b, node *c, placeptr place, valptr valyew, long *n) { /* traverse all places to add b */ if (done) return; add(a, b, c, &root, treenode); (*n)++; evaluate(root); examined++; if (examined == howoften) { examined = 0; mults++; if (mults == howmany) done = true; if (progress) { printf("%6ld",mults); if (bestyet >= 0) printf("%18.5f", bestyet); else printf(" - "); printf("%17ld%20.2f\n", nextree - 1, fracdone * 100); #ifdef WIN32 phyFillScreenColor(); #endif } } valyew[*n - 1] = like; place[*n - 1] = a->index; re_move(&b, &c, &root, treenode); if (!a->tip) { addtraverse(a->next->back, b, c, place,valyew,n); addtraverse(a->next->next->back, b, c, place,valyew,n); } } /* addtraverse */ void addit(long m) { /* adds the species one by one, recursively */ long n; valptr valyew; placeptr place; long i, j, n1, besttoadd = 0; valptr bestval; placeptr bestplace; double oldfrac, oldfdone, sum, bestsum; valyew = (valptr)Malloc(nonodes*sizeof(double)); bestval = (valptr)Malloc(nonodes*sizeof(double)); place = (placeptr)Malloc(nonodes*sizeof(long)); bestplace = (placeptr)Malloc(nonodes*sizeof(long)); if (simple && !firsttime) { n = 0; added[order[m - 1] - 1] = true; addtraverse(root, treenode[order[m - 1] - 1], treenode[spp + m - 2], place, valyew, &n); besttoadd = order[m - 1]; memcpy(bestplace, place, nonodes*sizeof(long)); memcpy(bestval, valyew, nonodes*sizeof(double)); } else { bestsum = -1.0; for (i = 1; i <= (spp); i++) { if (!added[i - 1]) { n = 0; added[i - 1] = true; addtraverse(root, treenode[i - 1], treenode[spp + m - 2], place, valyew, &n); added[i - 1] = false; sum = 0.0; for (j = 0; j < n; j++) sum += valyew[j]; if (sum > bestsum) { bestsum = sum; besttoadd = i; memcpy(bestplace, place, nonodes*sizeof(long)); memcpy(bestval, valyew, nonodes*sizeof(double)); } } } } order[m - 1] = besttoadd; memcpy(place, bestplace, nonodes*sizeof(long)); memcpy(valyew, bestval, nonodes*sizeof(double)); shellsort(valyew, place, n); oldfrac = fracinc; oldfdone = fracdone; n1 = 0; for (i = 0; i < (n); i++) { if (valyew[i] <= bestyet || bestyet < 0.0) n1++; } if (n1 > 0) fracinc /= n1; for (i = 0; i < n; i++) { if (valyew[i] <=bestyet ||bestyet < 0.0) { current[m - 1] = place[i]; add(treenode[place[i] - 1], treenode[besttoadd - 1], treenode[spp + m - 2], &root, treenode); added[besttoadd - 1] = true; if (m < spp) addit(m + 1); else { if (valyew[i] < bestyet || bestyet < 0.0) { nextree = 1; bestyet = valyew[i]; } if (nextree <= maxtrees) { memcpy(bestorders[nextree - 1], order, spp*sizeof(long)); memcpy(bestrees[nextree - 1], current, spp*sizeof(long)); } nextree++; firsttime = false; } re_move(&treenode[besttoadd - 1], &treenode[spp + m - 2], &root, treenode); added[besttoadd - 1] = false; } fracdone += fracinc; } fracinc = oldfrac; fracdone = oldfdone; free(valyew); free(bestval); free(place); free(bestplace); } /* addit */ void describe() { /* prints ancestors, steps and table of numbers of steps in each character */ if (stepbox) { putc('\n', outfile); writesteps(weights, dollo, numsteps); } if (questions) guesstates(guess); if (ancseq) { hypstates(fullset, dollo, guess, treenode, root, garbage, zeroanc, oneanc); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout(root, nextree, &col, root); } } /* describe */ void maketree() { /* tree construction recursively by branch and bound */ long i, j, k; node *dummy; fullset = (1L << (bits + 1)) - (1L << 1); if (progress) { printf("\nHow many\n"); printf("trees looked Approximate\n"); printf("at so far Length of How many percentage\n"); printf("(multiples shortest tree trees this long searched\n"); printf("of %4ld): found so far found so far so far\n", howoften); printf("---------- ------------ ------------ ------------\n"); #ifdef WIN32 phyFillScreenColor(); #endif } done = false; mults = 0; examined = 0; nextree = 1; root = treenode[0]; firsttime = true; for (i = 0; i < (spp); i++) added[i] = false; added[0] = true; order[0] = 1; k = 2; fracdone = 0.0; fracinc = 1.0; bestyet = -1.0; stps = (bitptr)Malloc(words*sizeof(long)); addit(k); if (done) { if (progress) { printf("Search broken off! Not guaranteed to\n"); printf(" have found the most parsimonious trees.\n"); } if (treeprint) { fprintf(outfile, "Search broken off! Not guaranteed to\n"); fprintf(outfile, " have found the most parsimonious\n"); fprintf(outfile, " trees, but here is what we found:\n"); } } if (treeprint) { fprintf(outfile, "\nrequires a total of %18.3f\n\n", bestyet); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%5ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i < (spp); i++) added[i] = true; for (i = 0; i <= (nextree - 2); i++) { for (j = k; j <= (spp); j++) add(treenode[bestrees[i][j - 1] - 1], treenode[bestorders[i][j - 1] - 1], treenode[spp + j - 2], &root, treenode); evaluate(root); printree(1.0, treeprint, root); describe(); for (j = k - 1; j < (spp); j++) re_move(&treenode[bestorders[i][j] - 1], &dummy, &root, treenode); } if (progress) { printf("\nOutput written to file \"%s\"\n\n", outfilename); if (trout) printf("Trees also written onto file \"%s\"\n\n", outtreename); } free(stps); if (ancseq) freegarbage(&garbage); } /* maketree */ int main(int argc, Char *argv[]) { /* branch-and-bound method for Dollo, polymorphism parsimony */ /* Reads in the number of species, number of characters, options and data. Then finds all most parsimonious trees */ #ifdef MAC argc = 1; /* macsetup("Dolpenny",""); */ argv[0] = "Dolpenny"; #endif init(argc, argv); emboss_getoptions("fdolpenny", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; garbage = NULL; firstset = true; bits = 8*sizeof(long) - 1; doinit(); if(ancvar) fprintf(outfile,"%s parsimony method\n\n",dollo ? "Dollo" : "Polymorphism"); for (ith = 1; ith <= msets; ith++) { doinput(); if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } if (justwts){ fprintf(outfile, "Weights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata){ if (weights || justwts) printweights(outfile, 0, chars, weight, "Characters"); if (ancvar) printancestors(outfile, anczero, ancone); } if (ith == 1) firstset = false; maketree(); } FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* branch-and-bound method for Dollo, polymorphism parsimony */ PHYLIPNEW-3.69.650/src/factor.c0000664000175000017500000003613711305225544012512 00000000000000 #include "phylip.h" /* version 3.6. (c) Copyright 1988-2004 by the University of Washington. A program to factor multistate character trees. Originally version 29 May 1983 by C. A. Meacham, Botany Department, University of Georgia Additional code by Joe Felsenstein, 1988-1991 C version code by Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxstates 20 /* Maximum number of states in multi chars */ #define maxoutput 80 /* Maximum length of output line */ #define sizearray 5000 /* Size of symbarray; must be >= the sum of */ /* squares of the number of states in each multi*/ /* char to be factored */ #define factchar ':' /* character to indicate state connections */ #define unkchar '?' /* input character to indicate state unknown */ typedef struct statenode { /* Node of multifurcating tree */ struct statenode *ancstr, *sibling, *descendant; Char state; /* Symbol of character state */ long edge; /* Number of subtending edge */ } statenode; AjPStr rdline = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void readtree(void); void attachnodes(statenode *, Char *); void maketree(statenode *, Char *); void construct(void); void numberedges(statenode *, long *); void factortree(void); void dotrees(void); void writech(Char ch, long *, FILE *outauxfile); void writefactors(long *); void writeancestor(long *); void doeu(long *, long); void dodatamatrix(void); /* function prototypes */ #endif FILE *outfactfile, *outancfile; Char infilename[FNMLNGTH]; const char* outfilename; const char* outfactname; const char* outancname; AjPFile inputfile; AjPFile embossoutfile; AjPFile embossoutfact; AjPFile embossoutanc; long neus, nchars, charindex, lastindex; Char ch; boolean ancstrrequest, factorrequest, rooted, progress; Char symbarray[sizearray]; /* Holds multi symbols and their factored equivs */ long *charnum; /* Multis */ long *chstart; /* Position of each */ long *numstates; /* Number of states */ Char *ancsymbol; /* Ancestral state */ /* local variables for dotrees, propagated to global level. */ long npairs, offset, charnumber, nstates; statenode *root; Char pair[maxstates][2]; statenode *nodes[maxstates]; void emboss_getoptions(char *pgm, int argc, char *argv[]) { ibmpc = IBMCRT; ansi = ANSICRT; progress = true; factorrequest = false; ancstrrequest = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); inputfile = ajAcdGetInfile("infile"); factorrequest = ajAcdGetBoolean("factors"); ancstrrequest = ajAcdGetBoolean("anc"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(factorrequest) { embossoutfact = ajAcdGetOutfile("outfactorfile"); emboss_openfile(embossoutfact, &outfactfile, &outfactname); } if(ancstrrequest) { embossoutanc = ajAcdGetOutfile("outancfile"); emboss_openfile(embossoutanc, &outancfile, &outfactname); } } /* emboss_getoptions */ void readtree() { /* Reads a single character-state tree; puts adjacent symbol pairs into array 'pairs' */ int npairs = 0; const char* cp; cp = ajStrGetPtr(rdline); while (*cp && isspace((int)*cp)) cp++; while (*cp && isdigit((int)*cp)) cp++; while (*cp) { while (*cp && isspace((int)*cp)) cp++; ch = *cp++; npairs++; pair[npairs - 1][0] = ch; while (*cp && isspace((int)*cp)) cp++; ch = *cp++; if (!(*cp) || (ch != factchar)) { printf("\n\nERROR: Character %d: bad character state tree format1\n\n", (int)(cp - ajStrGetPtr(rdline))); printf("\n\nch: %c\n", ch); embExitBad(); } while (*cp && isspace((int)*cp)) cp++; ch = *cp++; pair[npairs - 1][1] = ch; while (*cp && isspace((int)*cp)) cp++; if (pair[npairs - 1][1] == ' ') { printf("\n\nERROR: Character %d: bad character state tree format2\n\n", (int)(cp - ajStrGetPtr(rdline))); embExitBad(); } while (*cp && isspace((int)*cp)) cp++; } } /* readtree */ void attachnodes(statenode *poynter, Char *otherone) { /* Makes linked list of all nodes to which passed node is ancestral. First such node is 'descendant'; second such node is 'sibling' of first; third such node is sibling of second; etc. */ statenode *linker, *ptr; long i, j, k; linker = poynter; for (i = 0; i < (npairs); i++) { for (j = 1; j <= 2; j++) { if (poynter->state == pair[i][j - 1]) { if (j == 1) *otherone = pair[i][1]; else *otherone = pair[i][0]; if (*otherone != '.' && *otherone != poynter->ancstr->state) { k = offset + 1; while (*otherone != symbarray[k - 1]) k++; if (nodes[k - offset - 1] != NULL) embExitBad(); ptr = (statenode *)Malloc(sizeof(statenode)); ptr->ancstr = poynter; ptr->descendant = NULL; ptr->sibling = NULL; ptr->state = *otherone; if (linker == poynter) /* If not first */ poynter->descendant = ptr; /* If first */ else linker->sibling = ptr; nodes[k - offset - 1] = ptr; /* Save pntr to node */ linker = ptr; } } } } } /* attachnodes */ void maketree(statenode *poynter, Char *otherone) { /* Recursively attach nodes */ if (poynter == NULL) return; attachnodes(poynter, otherone); maketree(poynter->descendant, otherone); maketree(poynter->sibling, otherone); } /* maketree */ void construct() { /* Puts tree together from array 'pairs' */ Char rootstate; long i, j, k; boolean done; statenode *poynter; char otherone; rooted = false; ancsymbol[charindex - 1] = '?'; rootstate = pair[0][0]; nstates = 0; for (i = 0; i < (npairs); i++) { for (j = 1; j <= 2; j++) { k = 1; done = false; while (!done) { if (k > nstates) { done = true; break; } if (pair[i][j - 1] == symbarray[offset + k - 1]) done = true; else k++; } if (k > nstates) { if (pair[i][j - 1] == '.') { if (rooted) embExitBad(); rooted = true; ancsymbol[charindex - 1] = '0'; if (j == 1) rootstate = pair[i][1]; else rootstate = pair[i][0]; } else { nstates++; symbarray[offset + nstates - 1] = pair[i][j - 1]; } } } } if ((rooted && nstates != npairs) || (!rooted && nstates != npairs + 1)) embExitBad(); root = (statenode *)Malloc(sizeof(statenode)); root->state = ' '; root->descendant = (statenode *)Malloc(sizeof(statenode)); root->descendant->ancstr = root; root = root->descendant; root->descendant = NULL; root->sibling = NULL; root->state = rootstate; for (i = 0; i < (nstates); i++) nodes[i] = NULL; i = 1; while (symbarray[offset + i - 1] != rootstate) i++; nodes[i - 1] = root; maketree(root, &otherone); for (i = 0; i < (nstates); i++) { if (nodes[i] != root) { if (nodes[i] == NULL){ printf( "\n\nERROR: Character %ld: invalid character state tree description\n", charnumber); embExitBad();} else { poynter = nodes[i]->ancstr; while (poynter != root && poynter != nodes[i]) poynter = poynter->ancstr; if (poynter != root){ printf( "ERROR: Character %ld: invalid character state tree description\n\n", charnumber); embExitBad();} } } } } /* construct */ void numberedges(statenode *poynter, long *edgenum) { /* Assign to each node a number for the edge below it. The root is zero */ if (poynter == NULL) return; poynter->edge = *edgenum; (*edgenum)++; numberedges(poynter->descendant, edgenum); numberedges(poynter->sibling, edgenum); } /* numberedges */ void factortree() { /* Generate the string of 0's and 1's that will be substituted for each symbol of the multistate char. */ long i, j, place, factoroffset; statenode *poynter; long edgenum=0; numberedges(root, &edgenum); factoroffset = offset + nstates; for (i = 0; i < (nstates); i++) { place = factoroffset + (nstates - 1) * i; for (j = place; j <= (place + nstates - 2); j++) symbarray[j] = '0'; poynter = nodes[i]; while (poynter != root) { symbarray[place + poynter->edge - 1] = '1'; poynter = poynter->ancstr; } } } /* factortree */ void dotrees() { /* Process character-state trees */ long lastchar; ajint ival=0; charindex = 0; lastchar = 0; offset = 0; charnumber = 0; ajReadlineTrim(inputfile, &rdline); if(ajFmtScanS(rdline, "%d", &ival) != 1) { printf("Invalid input file!\n"); embExitBad(); } charnumber = ival; while (charnumber < 999) { if (charnumber < lastchar) { printf("\n\nERROR: Character state tree"); printf(" for character %ld: out of order\n\n", charnumber); embExitBad(); } charindex++; lastindex = charindex; readtree(); /* Process character-state tree */ if (npairs > 0) { construct(); /* Link tree together */ factortree(); } else { nstates = 0; ancsymbol[charindex - 1] = '?'; } lastchar = charnumber; charnum[charindex - 1] = charnumber; chstart[charindex - 1] = offset; numstates[charindex - 1] = nstates; offset += nstates * nstates; ajReadlineTrim(inputfile, &rdline); ajFmtScanS(rdline, "%d", &ival); charnumber = ival; } /* each multistate character */ /* symbol */ } /* dotrees */ void writech(Char ch, long *chposition, FILE *outauxfile) { /* Writes a single character to output */ if (*chposition > maxoutput) { putc('\n', outauxfile); *chposition = 1; } putc(ch, outauxfile); (*chposition)++; } /* writech */ void writefactors(long *chposition) { /* Writes 'FACTORS' line */ long i, charindex; Char symbol; *chposition = 11; symbol = '-'; for (charindex = 0; charindex < (lastindex); charindex++) { if (symbol == '-') symbol = '+'; else symbol = '-'; if (numstates[charindex] == 0) writech(symbol, chposition, outfactfile); else { for (i = 1; i < (numstates[charindex]); i++) writech(symbol, chposition, outfactfile); } } putc('\n', outfactfile); } /* writefactors */ void writeancestor(long *chposition) { /* Writes 'ANCESTOR' line */ long i, charindex; charindex = 1; while (ancsymbol[charindex - 1] == '?') charindex++; if (charindex > lastindex) return; *chposition = 11; for (charindex = 0; charindex < (lastindex); charindex++) { if (numstates[charindex] == 0) writech(ancsymbol[charindex], chposition, outancfile); else { for (i = 1; i < (numstates[charindex]); i++) writech(ancsymbol[charindex], chposition, outancfile); } } putc('\n', outancfile); } /* writeancestor */ void doeu(long *chposition, long eu) { /* Writes factored data for a single species */ long i, charindex, place; Char *multichar; const char* cp; ajReadlineTrim(inputfile, &rdline); cp = ajStrGetPtr(rdline); for (i = 1; i <= nmlngth; i++) { ch = *cp++; putc(ch, outfile); if ((ch == '(') || (ch == ')') || (ch == ':') || (ch == ',') || (ch == ';') || (ch == '[') || (ch == ']')) { printf( "\n\nERROR: Species name may not contain characters ( ) : ; , [ ] \n"); printf(" In name of species number %ld there is character %c\n\n", i+1, ch); embExitBad(); } } multichar = (Char *)Malloc(nchars*sizeof(Char)); *chposition = 11; for (i = 0; i < (nchars); i++) { ch = *cp++; while (isspace((int)ch)) { ch = *cp++; if (!*cp) { ajReadlineTrim(inputfile, &rdline); cp = ajStrGetPtr(rdline); ch = *cp++; } } multichar[i] = ch; } for (charindex = 0; charindex < (lastindex); charindex++) { if (numstates[charindex] == 0) writech(multichar[charnum[charindex] - 1], chposition, outfile); else { i = 1; while (symbarray[chstart[charindex] + i - 1] != multichar[charnum[charindex] - 1] && i <= numstates[charindex]) i++; if (i > numstates[charindex]) { if( multichar[charnum[charindex] - 1] == unkchar){ for (i = 1; i < (numstates[charindex]); i++) writech('?', chposition, outfile); } else { putc('\n', outfile); printf("\n\nERROR: In species %ld, multistate character %ld: ", eu, charnum[charindex]); printf("'%c' is not a documented state\n\n", multichar[charnum[charindex] - 1]); embExitBad(); } } else { place = chstart[charindex] + numstates[charindex] + (numstates[charindex] - 1) * (i - 1); for (i = 0; i <= (numstates[charindex] - 2); i++) writech(symbarray[place + i], chposition, outfile); } } } putc('\n', outfile); free(multichar); } /* doeu */ void dodatamatrix() { /* Reads species information and write factored data set */ long charindex, totalfactors, eu, chposition; totalfactors = 0; for (charindex = 0; charindex < (lastindex); charindex++) { if (numstates[charindex] == 0) totalfactors++; else totalfactors += numstates[charindex] - 1; } if (rooted && ancstrrequest) fprintf(outfile, "%5ld %4ld\n", neus + 1, totalfactors); else fprintf(outfile, "%5ld %4ld\n", neus, totalfactors); if (factorrequest) writefactors(&chposition); if (ancstrrequest) writeancestor(&chposition); eu = 1; while (eu <= neus) { eu++; doeu(&chposition, eu); } if (progress) printf("\nData matrix written on file \"%s\"\n\n", outfilename); } /* dodatamatrix */ int main(int argc, Char *argv[]) { #ifdef MAC argc = 1; /* macsetup("Factor",""); */ argv[0] = "Factor"; #endif init(argc,argv); emboss_getoptions("ffactor", argc, argv); ajReadlineTrim(inputfile, &rdline); sscanf(ajStrGetPtr(rdline), "%ld%ld", &neus, &nchars); charnum = (long *)Malloc(nchars*sizeof(long)); chstart = (long *)Malloc(nchars*sizeof(long)); numstates = (long *)Malloc(nchars*sizeof(long)); ancsymbol = (Char *)Malloc(nchars*sizeof(Char)); dotrees(); /* Read and factor character-state trees */ dodatamatrix(); ajFileClose(&inputfile); FClose(outfile); #ifdef MAC fixmacfile(outfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* factor */ PHYLIPNEW-3.69.650/src/seqboot.c0000664000175000017500000007612611616234204012710 00000000000000/* version 3.6. (c) Copyright 1993-2005 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, and Doug Buxton. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include "phylip.h" #include "seq.h" typedef enum { seqs, morphology, restsites, genefreqs } datatype; typedef enum { dna, rna, protein } seqtype; AjPSeqset seqset = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void seqboot_inputnumbersseq(AjPSeqset); void inputoptions(void); char **matrix_char_new(long rows, long cols); void matrix_char_delete(char **mat, long rows); double **matrix_double_new(long rows, long cols); void matrix_double_delete(double **mat, long rows); void seqboot_inputdataseq(AjPSeqset); void allocrest(void); void freerest(void); void allocnew(void); void freenew(void); void allocnewer(long newergroups, long newersites); void doinput(int argc, Char *argv[]); void bootweights(void); void permute_vec(long *a, long n); void sppermute(long); void charpermute(long, long); void writedata(void); void writeweights(void); void writecategories(void); void writeauxdata(steptr, FILE*); void writefactors(void); void bootwrite(void); void seqboot_inputaux(steptr, FILE*); void freenewer(void); /* function prototypes */ #endif /*** Config vars ***/ /* Mutually exclusive booleans for boostrap type */ boolean bootstrap, jackknife; boolean permute; /* permute char order */ boolean ild; /* permute species for each char */ boolean lockhart; /* permute chars within species */ boolean rewrite; boolean factors = false; /* Use factors (only with morph data) */ /* Bootstrap/jackknife sample frequency */ boolean regular = true; /* Use 50% sampling with bootstrap/jackknife */ double fracsample = 0.5; /* ...or user-defined sample freq, [0..inf) */ /* Output format: mutually exclusive, none indicates PHYLIP */ boolean xml = false; boolean nexus = false; boolean weights = false;/* Read weights file */ boolean categories = false;/* Use categories (permuted with dataset) */ boolean enzymes; boolean all; /* All alleles present in infile? */ boolean justwts = false; /* Write boot'd/jack'd weights, no datasets */ boolean mixture; boolean ancvar; boolean progress = true; /* Enable progress indications */ boolean firstrep; /* TODO Must this be global? */ longer seed; /* Filehandles and paths */ /* Usual suspects declared in phylip.c/h */ FILE *outcatfile, *outweightfile, *outmixfile, *outancfile, *outfactfile; Char infilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH], mixfilename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; const char* outweightfilename; AjPFile embossoutweightfile; const char* outmixfilename; AjPFile embossoutmixfile; const char* outancfilename; AjPFile embossoutancfile; const char* outcatfilename; AjPFile embossoutcatfile; const char* outfactfilename; AjPFile embossoutfactfile; long sites, loci, maxalleles, groups, nenzymes, reps, ws, blocksize, categs, maxnewsites; datatype data; seqtype seq; steptr oldweight, where, how_many, mixdata, ancdata; /* Original dataset */ /* [0..spp-1][0..sites-1] */ Char **nodep = NULL; /* molecular or morph data */ double **nodef = NULL; /* gene freqs */ Char *factor = NULL; /* factor[sites] - direct read-in of factors file */ long *factorr = NULL; /* [0..sites-1] => nondecreasing [1..groups] */ long *alleles = NULL; /* Mapping with read-in weights eliminated * Allocated once in allocnew() */ long newsites; long newgroups; long *newwhere = NULL; /* Map [0..newgroups-1] => [1..newsites] */ long *newhowmany = NULL; /* Number of chars for each [0..newgroups-1] */ /* Mapping with bootstrapped weights applied */ /* (re)allocated by allocnewer() */ long newersites, newergroups; long *newerfactor = NULL; /* Map [0..newersites-1] => [1..newergroups] */ long *newerwhere = NULL; /* Map [0..newergroups-1] => [1..newersites] */ long *newerhowmany = NULL; /* Number of chars for each [0..newergroups-1] */ long **charorder = NULL; /* Permutation [0..spp-1][0..newergroups-1] */ long **sppord = NULL; /* Permutation [0..newergroups-1][0..spp-1] */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr test = NULL; AjPStr outputformat = NULL; AjPStr typeofseq = NULL; AjPStr justweights = NULL; AjBool rewrite = false; long inseed, inseed0; data = seqs; seq = dna; bootstrap = false; jackknife = false; permute = false; ild = false; lockhart = false; blocksize = 1; regular = true; fracsample = 1.0; all = false; reps = 100; weights = false; mixture = false; ancvar = false; categories = false; justwts = false; printdata = false; dotdiff = true; progress = true; interleaved = true; xml = false; nexus = false; factors = false; enzymes = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqset = ajAcdGetSeqset("sequence"); test = ajAcdGetListSingle("test"); if(ajStrMatchC(test, "b")) { bootstrap = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 1.0; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } blocksize = ajAcdGetInt("blocksize"); } else if(ajStrMatchC(test, "j")) { jackknife = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 0.5; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } } else if(ajStrMatchC(test, "c")) permute = true; else if(ajStrMatchC(test, "o")) ild = true; else if(ajStrMatchC(test, "s")) lockhart = true; else if(ajStrMatchC(test, "r")) rewrite = true; if(rewrite) { outputformat = ajAcdGetListSingle("rewriteformat"); if(ajStrMatchC(outputformat, "n")) nexus = true; else if(ajStrMatchC(outputformat, "x")) xml = true; if( (nexus) || (xml) ) { typeofseq = ajAcdGetListSingle("seqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } else{ reps = ajAcdGetInt("reps"); inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); if(jackknife || bootstrap || permute) { phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; phyloratecat = ajAcdGetProperties("categories"); if(phyloratecat) categories = true; if(!permute) { justweights = ajAcdGetListSingle("justweights"); if(ajStrMatchC(justweights, "j")) justwts = true; } } } printdata = ajAcdGetBoolean("printdata"); if(printdata) dotdiff = ajAcdGetBoolean("dotdiff"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void seqboot_inputnumbersseq(AjPSeqset seqset) { /* read numbers of species and of sites */ spp = ajSeqsetGetSize(seqset); sites = ajSeqsetGetLen(seqset); loci = sites; maxalleles = 1; } /* seqboot_inputnumbersseq */ void inputoptions() { /* input the information on the options */ long weightsum, maxfactsize, i, j, k, l, m; if (data == genefreqs) { k = 0; l = 0; for (i = 0; i < (loci); i++) { m = alleles[i]; k++; for (j = 1; j <= m; j++) { l++; factorr[l - 1] = k; } } } else { for (i = 1; i <= (sites); i++) factorr[i - 1] = i; } for (i = 0; i < (sites); i++) oldweight[i] = 1; if (weights) inputweightsstr2(phyloweights->Str[0],0, sites, &weightsum, oldweight, &weights, "seqboot"); if (factors && printdata) { for(i = 0; i < sites; i++) factor[i] = (char)('0' + (factorr[i]%10)); printfactors(outfile, sites, factor, " (least significant digit)"); } if (weights && printdata) printweights(outfile, 0, sites, oldweight, "Sites"); for (i = 0; i < (loci); i++) how_many[i] = 0; for (i = 0; i < (loci); i++) where[i] = 0; for (i = 1; i <= (sites); i++) { how_many[factorr[i - 1] - 1]++; if (where[factorr[i - 1] - 1] == 0) where[factorr[i - 1] - 1] = i; } groups = factorr[sites - 1]; newgroups = 0; newsites = 0; maxfactsize = 0; for(i = 0 ; i < loci ; i++){ if(how_many[i] > maxfactsize){ maxfactsize = how_many[i]; } } maxnewsites = groups * maxfactsize; allocnew(); for (i = 0; i < groups; i++) { if (oldweight[where[i] - 1] > 0) { newgroups++; newsites += how_many[i]; newwhere[newgroups - 1] = where[i]; newhowmany[newgroups - 1] = how_many[i]; } } } /* inputoptions */ char **matrix_char_new(long rows, long cols) { char **mat; long i; assert(rows > 0); assert(cols > 0); mat = (char **)Malloc(rows*sizeof(char *)); for (i = 0; i < rows; i++) mat[i] = (char *)Malloc(cols*sizeof(char)); return mat; } void matrix_char_delete(char **mat, long rows) { long i; assert(mat != NULL); for (i = 0; i < rows; i++) free(mat[i]); free(mat); } double **matrix_double_new(long rows, long cols) { double **mat; long i; assert(rows > 0); assert(cols > 0); mat = (double **)Malloc(rows*sizeof(double *)); for (i = 0; i < rows; i++) mat[i] = (double *)Malloc(cols*sizeof(double)); return mat; } void matrix_double_delete(double **mat, long rows) { long i; assert(mat != NULL); for (i = 0; i < rows; i++) free(mat[i]); free(mat); } void seqboot_inputdataseq(AjPSeqset seqset) { /* input the names and sequences for each species */ long i, j, k, l, m, n; Char charstate; boolean allread, done; const AjPStr str; nodep = matrix_char_new(spp, sites); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); fprintf(outfile, " site\n\n"); } else { if (ild) { fprintf(outfile, "Site"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) fprintf(outfile, "Site"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, ", spp); fprintf(outfile, "%3ld sites\n\n", sites); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } allread = false; while (!allread) { i = 1; while (i <= spp) { initnameseq(seqset, i-1); str = ajSeqGetSeqS(ajSeqsetGetseqSeq(seqset, i-1)); j=0; done = false; while (!done) { while (j < sites ) { charstate = ajStrGetCharPos(str, j); uppercase(&charstate); j++; if (charstate == '.') charstate = nodep[0][j-1]; nodep[i-1][j-1] = charstate; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (!printdata) return; m = (sites - 1) / 60 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; n = (i - 1) * 60; for (k = n; k < l; k++) { if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) charstate = '.'; else charstate = nodep[j][k]; putc(charstate, outfile); if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdataseq */ void allocrest() { /* allocate memory for bookkeeping arrays */ oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); if (categories) category = (steptr)Malloc(sites*sizeof(long)); if (mixture) mixdata = (steptr)Malloc(sites*sizeof(long)); if (ancvar) ancdata = (steptr)Malloc(sites*sizeof(long)); where = (steptr)Malloc(loci*sizeof(long)); how_many = (steptr)Malloc(loci*sizeof(long)); factor = (Char *)Malloc(sites*sizeof(Char)); factorr = (steptr)Malloc(sites*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); } /* allocrest */ void freerest() { /* Free bookkeeping arrays */ if (alleles) free(alleles); free(oldweight); free(weight); if (categories) free(category); if (mixture) free(mixdata); if (ancvar) free(ancdata); free(where); free(how_many); free(factor); free(factorr); free(nayme); } void allocnew(void) { /* allocate memory for arrays that depend on the lenght of the output sequence*/ /* Only call this function once */ assert(newwhere == NULL && newhowmany == NULL); newwhere = (steptr)Malloc(loci*sizeof(long)); newhowmany = (steptr)Malloc(loci*sizeof(long)); } void freenew(void) { /* free arrays allocated by allocnew() */ /* Only call this function once */ assert(newwhere != NULL); assert(newhowmany != NULL); free(newwhere); free(newhowmany); } void allocnewer(long newergroups, long newersites) { /* allocate memory for arrays that depend on the length of the bootstrapped output sequence */ /* Assumes that spp remains constant */ static long curnewergroups = 0; static long curnewersites = 0; long i; if (newerwhere != NULL) { if (newergroups > curnewergroups) { free(newerwhere); free(newerhowmany); for (i = 0; i < spp; i++) free(charorder[i]); newerwhere = NULL; } if (newersites > curnewersites) { free(newerfactor); newerfactor = NULL; } } if (charorder == NULL) charorder = (steptr *)Malloc(spp*sizeof(steptr)); /* Malloc() will fail if either is 0, so add a dummy element */ if (newergroups == 0) newergroups++; if (newersites == 0) newersites++; if (newerwhere == NULL) { newerwhere = (steptr)Malloc(newergroups*sizeof(long)); newerhowmany = (steptr)Malloc(newergroups*sizeof(long)); for (i = 0; i < spp; i++) charorder[i] = (steptr)Malloc(newergroups*sizeof(long)); curnewergroups = newergroups; } if (newerfactor == NULL) { newerfactor = (steptr)Malloc(newersites*sizeof(long)); curnewersites = newersites; } } void freenewer() { /* Free memory allocated by allocnewer() */ /* spp must be the same as when allocnewer was called */ long i; if (newerwhere) { free(newerwhere); free(newerhowmany); free(newerfactor); for (i = 0; i < spp; i++) free(charorder[i]); free(charorder); } } void doinput(int argc, Char *argv[]) { /* reads the input data */ seqboot_inputnumbersseq(seqset); allocrest(); inputoptions(); seqboot_inputdataseq(seqset); } /* doinput */ void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; long grp = 0, site = 0; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; /* Count number of replicated groups */ newergroups = 0; newersites = 0; for (i = 0; i < newgroups; i++) { newergroups += weight[i]; newersites += newhowmany[i] * weight[i]; } if (newergroups < 1) { fprintf(stdout, "ERROR: sampling frequency or number of sites is too small\n"); exxit(-1); } /* reallocate "newer" arrays, sized by output groups: * newerfactor, newerwhere, newerhowmany, and charorder */ allocnewer(newergroups, newersites); /* Replicate each group i weight[i] times */ grp = 0; site = 0; for (i = 0; i < newgroups; i++) { for (j = 0; j < weight[i]; j++) { for (k = 0; k < newhowmany[i]; k++) { newerfactor[site] = grp + 1; site++; } newerwhere[grp] = newwhere[i]; newerhowmany[grp] = newhowmany[i]; grp++; } } } /* bootweights */ void permute_vec(long *a, long n) { long i, j, k; for (i = 1; i < n; i++) { k = (long)((i+1) * randum(seed)); j = a[i]; a[i] = a[k]; a[k] = j; } } void sppermute(long n) { /* permute the species order as given in array sppord */ permute_vec(sppord[n-1], spp); } /* sppermute */ void charpermute(long m, long n) { /* permute the n+1 characters of species m+1 */ permute_vec(charorder[m], n); } /* charpermute */ void writedata() { /* write out one set of bootstrapped sequences */ long i, j, k, l, m, n, n2=0; double x; Char charstate; sppord = (long **)Malloc(newergroups*sizeof(long *)); for (i = 0; i < (newergroups); i++) sppord[i] = (long *)Malloc(spp*sizeof(long)); for (j = 1; j <= spp; j++) sppord[0][j - 1] = j; for (i = 1; i < newergroups; i++) { for (j = 1; j <= (spp); j++) sppord[i][j - 1] = sppord[i - 1][j - 1]; } if (!justwts || permute) { if (data == restsites && enzymes) fprintf(outfile, "%5ld %5ld% 4ld\n", spp, newergroups, nenzymes); else if (data == genefreqs) fprintf(outfile, "%5ld %5ld\n", spp, newergroups); else { if ((data == seqs) && rewrite && xml) fprintf(outfile, "\n"); else if (rewrite && nexus) { fprintf(outfile, "#NEXUS\n"); fprintf(outfile, "BEGIN DATA;\n"); fprintf(outfile, " DIMENSIONS NTAX=%ld NCHAR=%ld;\n", spp, newersites); fprintf(outfile, " FORMAT"); if (interleaved) fprintf(outfile, " interleave=yes"); else fprintf(outfile, " interleave=no"); fprintf(outfile, " DATATYPE="); if (data == seqs) { switch (seq) { case (dna): fprintf(outfile, "DNA missing=N gap=-"); break; case (rna): fprintf(outfile, "RNA missing=N gap=-"); break; case (protein): fprintf(outfile, "protein missing=? gap=-"); break; } } if (data == morphology) fprintf(outfile, "STANDARD"); fprintf(outfile, ";\n MATRIX\n"); } else fprintf(outfile, "%5ld %5ld\n", spp, newersites); } if (data == genefreqs) { for (i = 0; i < (newergroups); i++) fprintf(outfile, " %3ld", alleles[factorr[newerwhere[i] - 1] - 1]); putc('\n', outfile); } } l = 1; if ((!(bootstrap || jackknife || permute || ild || lockhart | nexus)) && ((data == seqs) || (data == restsites))) { interleaved = !interleaved; if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) interleaved = false; } m = interleaved ? 60 : newergroups; do { if (m > newergroups) m = newergroups; for (j = 0; j < spp; j++) { n = 0; if ((l == 1) || (interleaved && nexus)) { if (rewrite && xml) { fprintf(outfile, " \n"); fprintf(outfile, " "); } n2 = nmlngth; if (rewrite && (xml || nexus)) { while (nayme[j][n2-1] == ' ') n2--; } if (nexus) fprintf(outfile, " "); for (k = 0; k < n2; k++) if (nexus && (nayme[j][k] == ' ') && (k < n2)) putc('_', outfile); else putc(nayme[j][k], outfile); if (rewrite && xml) fprintf(outfile, "\n "); } else { if (rewrite && xml) { fprintf(outfile, " "); } } if (!xml) { for (k = 0; k < nmlngth-n2; k++) fprintf(outfile, " "); fprintf(outfile, " "); } for (k = l - 1; k < m; k++) { if (permute && j + 1 == 1) sppermute(newerfactor[n]); /* we can assume chars not permuted */ for (n2 = -1; n2 <= (newerhowmany[charorder[j][k]] - 2); n2++) { n++; if (data == genefreqs) { if (n > 1 && (n & 7) == 1) fprintf(outfile, "\n "); x = nodef[sppord[charorder[j][k]][j] - 1] [newerwhere[charorder[j][k]] + n2]; fprintf(outfile, "%8.5f", x); } else { if (rewrite && xml && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); else if (!nexus && !interleaved && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); charstate = nodep[sppord[charorder[j][k]][j] - 1] [newerwhere[charorder[j][k]] + n2]; putc(charstate, outfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfile); } } } if (rewrite && xml) { fprintf(outfile, "\n \n"); } putc('\n', outfile); } if (interleaved) { if ((m <= newersites) && (newersites > 60)) putc('\n', outfile); l += 60; m += 60; } } while (interleaved && l <= newersites); if ((data == seqs) && (!(bootstrap || jackknife || permute || ild || lockhart) && xml)) fprintf(outfile, "\n"); if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) fprintf(outfile, " ;\nEND;\n"); for (i = 0; i < (newergroups); i++) free(sppord[i]); free(sppord); } /* writedata */ void writeweights() { /* write out one set of post-bootstrapping weights */ long j, k, l, m, n, o; j = 0; l = 1; if (interleaved) m = 60; else m = sites; do { if(m > sites) m = sites; n = 0; for (k = l - 1; k < m; k++) { for(o = 0 ; o < how_many[k] ; o++){ if(oldweight[k]==0){ fprintf(outweightfile, "0"); j++; } else{ if (weight[k-j] < 10) fprintf(outweightfile, "%c", (char)('0'+weight[k-j])); else fprintf(outweightfile, "%c", (char)('A'+weight[k-j]-10)); n++; if (!interleaved && n > 1 && n % 60 == 1) { fprintf(outweightfile, "\n"); if (n % 10 == 0 && n % 60 != 0) putc(' ', outweightfile); } } } } putc('\n', outweightfile); if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= sites); } /* writeweights */ void writecategories() { /* write out categories for the bootstrapped sequences */ long k, l, m, n, n2; Char charstate; if(justwts){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n=0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[k]; putc(charstate, outcatfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outcatfile, "\n"); return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[newerwhere[k] + n2]; putc(charstate, outcatfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outcatfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outcatfile, "\n"); } /* writecategories */ void writeauxdata(steptr auxdata, FILE *outauxfile) { /* write out auxiliary option data (mixtures, ancestors, etc.) to appropriate file. Samples parralel to data, or just gives one output entry if justwts is true */ long k, l, m, n, n2; Char charstate; /* if we just output weights (justwts), and this is first set just output the data unsampled */ if(justwts){ if(firstrep){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[k]; putc(charstate, outauxfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outauxfile, "\n"); } return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[newerwhere[k] + n2]; putc(charstate, outauxfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outauxfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outauxfile, "\n"); } /* writeauxdata */ void writefactors(void) { long i, k, l, m, n, writesites; char symbol; /*steptr wfactor;*/ long grp; if(!justwts || firstrep){ if(justwts){ writesites = sites; /*wfactor = factorr;*/ } else { writesites = newergroups; /*wfactor = newerfactor;*/ } symbol = '+'; if (interleaved) m = 60; else m = writesites; l=1; do { if(m > writesites) m = writesites; n = 0; for(k=l-1 ; k < m ; k++){ grp = charorder[0][k]; for(i = 0; i < newerhowmany[grp]; i++) { putc(symbol, outfactfile); n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outfactfile, "\n "); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfactfile); } symbol = (symbol == '+') ? '-' : '+'; } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= writesites); fprintf(outfactfile, "\n"); } } /* writefactors */ void bootwrite() { /* does bootstrapping and writes out data sets */ long i, j, rr, repdiv10; if (rewrite) reps = 1; repdiv10 = reps / 10; if (repdiv10 < 1) repdiv10 = 1; if (progress) putchar('\n'); firstrep = true; for (rr = 1; rr <= (reps); rr++) { bootweights(); for (i = 0; i < spp; i++) for (j = 0; j < newergroups; j++) charorder[i][j] = j; if (ild) { charpermute(0, newergroups); for (i = 1; i < spp; i++) for (j = 0; j < newergroups; j++) charorder[i][j] = charorder[0][j]; } if (lockhart) for (i = 0; i < spp; i++) charpermute(i, newergroups); if (!justwts || permute || ild || lockhart) writedata(); if (justwts && !(permute || ild || lockhart)) writeweights(); if (categories) writecategories(); if (factors) writefactors(); if (mixture) writeauxdata(mixdata, outmixfile); if (ancvar) writeauxdata(ancdata, outancfile); if (progress && !rewrite && ((reps < 10) || rr % repdiv10 == 0)) { printf("completed replicate number %4ld\n", rr); #ifdef WIN32 phyFillScreenColor(); #endif firstrep = false; } } if (progress) { if (justwts) printf("\nOutput weights written to file \"%s\"\n\n", outweightfilename); else printf("\nOutput written to file \"%s\"\n\n", outfilename); } } /* bootwrite */ int main(int argc, Char *argv[]) { /* Read in sequences or frequencies and bootstrap or jackknife them */ #ifdef MAC argc = 1; /* macsetup("SeqBoot",""); */ argv[0] = "SeqBoot"; #endif init(argc,argv); emboss_getoptions("fseqboot", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; doinput(argc, argv); bootwrite(); freenewer(); freenew(); freerest(); if (nodep) matrix_char_delete(nodep, spp); if (nodef) matrix_double_delete(nodef, spp); FClose(infile); if (factors) { FClose(factfile); FClose(outfactfile); } if (weights) FClose(weightfile); if (categories) { FClose(catfile); FClose(outcatfile); } if(mixture) FClose(outmixfile); if(ancvar) FClose(outancfile); if (justwts && !permute) { FClose(outweightfile); } else FClose(outfile); #ifdef MAC fixmacfile(outfilename); if (justwts && !permute) fixmacfile(outweightfilename); if (categories) fixmacfile(outcatfilename); if (mixture) fixmacfile(outmixfilename); #endif if(progress) printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/promlk.c0000664000175000017500000025364311616234204012541 00000000000000/* PHYLIP version 3.6. (c) Copyright 1986-2007 by the University of * Washington and by Joseph Felsenstein. Written by Joseph * Felsenstein. Permission is granted to copy and use this program * provided no fee is charged for it and provided that this copyright * notice is not removed. */ #include "phylip.h" #include "seq.h" #include "mlclock.h" #include "printree.h" #define epsilon 0.0001 /* used in makenewv, getthree, update */ #define over 60 typedef long vall[maxcategs]; typedef double contribarr[maxcategs]; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ void init_protmats(void); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void initmemrates(void); void makeprotfreqs(void); void allocrest(void); void doinit(void); void inputoptions(void); void input_protdata(AjPSeqset, long); void makeweights(void); void prot_makevalues(long, pointarray, long, long, sequence, steptr); void getinput(void); void prot_inittable(void); void alloc_pmatrix(long); void make_pmatrix(double **, double **, double **, long, double, double, double *, double **); boolean prot_nuview(node *); void getthree(node *p, double thigh, double tlow); void update(node *); void smooth(node *); void promlk_add(node *, node *, node *, boolean); void promlk_re_move(node **, node **, boolean); double prot_evaluate(node *); void tryadd(node *, node **, node **); void addpreorder(node *, node *, node *, boolean); void restoradd(node *, node *, node *, double); void tryrearr(node *, boolean *); void repreorder(node *, boolean *); void rearrange(node **); void nodeinit(node *); void initrav(node *); void travinit(node *); void travsp(node *); void treevaluate(void); void prot_reconstr(node *, long); void rectrav(node *, long, long); void summarize(void); void promlk_treeout(node *); void initpromlnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void tymetrav(node *, double *); void free_all_protx(long, pointarray); void maketree(void); void clean_up(void); void reallocsites(void); void prot_freetable(void); void free_pmatrix(long sib); void invalidate_traverse(node *p); void invalidate_tyme(node *p); /* function prototypes */ #endif /* OLDC */ extern sequence y; double **tbl; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; Char infilename[FNMLNGTH], intreename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH]; double *rrate; long sites, weightsum, categs, datasets, ith, njumble, jumb, numtrees, shimotrees; /* sites = number of sites in actual sequences numtrees = number of user-defined trees */ long inseed, inseed0, mx, mx0, mx1; boolean global, jumble, trout, usertree, weights, rctgry, ctgry, auto_, progress, mulsets, firstset, hypstate, smoothit, polishing, justwts, gama, invar, usejtt, usepmb, usepam; boolean lengthsopt = false; /* Use lengths in user tree option */ boolean lngths = false; /* Actually use lengths (depends on each input tree) */ tree curtree, bestree, bestree2; node *qwhere, *grbg; double sumrates, cv, alpha, lambda, lambda1, invarfrac; long *enterorder; steptr aliasweight; double *rate; longer seed; double *probcat; contribarr *contribution; char aachar[26]="ARNDCQEGHILKMFPSTWYVBZX?*-"; char *progname; long rcategs, nonodes2; boolean mnv_success = false; /* Local variables for maketree, propagated globally for C version: */ long k, maxwhich, col; double like, bestyet, tdelta, lnlike, slope, curv, maxlogl; boolean lastsp, smoothed, succeeded; double *l0gl; double x[3], lnl[3]; double expon1i[maxcategs], expon1v[maxcategs], expon2i[maxcategs], expon2v[maxcategs]; node *there; double **l0gf; Char ch, ch2; vall *mp; /* Variables introduced to allow for protein probability calculations */ long max_num_sibs; /* maximum number of siblings used in a */ /* nuview calculation. determines size */ /* final size of pmatrices */ double *eigmat; /* eig matrix variable */ double **probmat; /* prob matrix variable */ double ****dpmatrix; /* derivative of pmatrix */ double ****ddpmatrix; /* derivative of xpmatrix */ double *****pmatrices; /* matrix of probabilities of protien */ /* conversion. The 5 subscripts refer */ /* to sibs, rcategs, categs, final and */ /* initial states, respectively. */ double freqaa[20]; /* amino acid frequencies */ /* this jtt matrix decomposition due to Elisabeth Tillier */ static double jtteigmat[] = {+0.00000000000000,-1.81721720738768,-1.87965834528616,-1.61403121885431, -1.53896608443751,-1.40486966367848,-1.30995061286931,-1.24668414819041, -1.17179756521289,-0.31033320987464,-0.34602837857034,-1.06031718484613, -0.99900602987105,-0.45576774888948,-0.86014403434677,-0.54569432735296, -0.76866956571861,-0.60593589295327,-0.65119724379348,-0.70249806480753}; static double jttprobmat[20][20] = {{+0.07686196156903,+0.05105697447152,+0.04254597872702,+0.05126897436552, +0.02027898986051,+0.04106097946952,+0.06181996909002,+0.07471396264303, +0.02298298850851,+0.05256897371552,+0.09111095444453,+0.05949797025102, +0.02341398829301,+0.04052997973502,+0.05053197473402,+0.06822496588753, +0.05851797074102,+0.01433599283201,+0.03230298384851,+0.06637396681302}, {-0.04445795120462,-0.01557336502860,-0.09314817363516,+0.04411372100382, -0.00511178725134,+0.00188472427522,-0.02176250428454,-0.01330231089224, +0.01004072641973,+0.02707838224285,-0.00785039050721,+0.02238829876349, +0.00257470703483,-0.00510311699563,-0.01727154263346,+0.20074235330882, -0.07236268502973,-0.00012690116016,-0.00215974664431,-0.01059243778174}, {+0.09480046389131,+0.00082658405814,+0.01530023104155,-0.00639909042723, +0.00160605602061,+0.00035896642912,+0.00199161318384,-0.00220482855717, -0.00112601328033,+0.14840201765438,-0.00344295714983,-0.00123976286718, -0.00439399942758,+0.00032478785709,-0.00104270266394,-0.02596605592109, -0.05645800566901,+0.00022319903170,-0.00022792271829,-0.16133258048606}, {-0.06924141195400,-0.01816245289173,-0.08104005811201,+0.08985697111009, +0.00279659017898,+0.01083740322821,-0.06449599336038,+0.01794514261221, +0.01036809141699,+0.04283504450449,+0.00634472273784,+0.02339134834111, -0.01748667848380,+0.00161859106290,+0.00622486432503,-0.05854130195643, +0.15083728660504,+0.00030733757661,-0.00143739522173,-0.05295810171941}, {-0.14637948915627,+0.02029296323583,+0.02615316895036,-0.10311538564943, -0.00183412744544,-0.02589124656591,+0.11073673851935,+0.00848581728407, +0.00106057791901,+0.05530240732939,-0.00031533506946,-0.03124002869407, -0.01533984125301,-0.00288717337278,+0.00272787410643,+0.06300929916280, +0.07920438311152,-0.00041335282410,-0.00011648873397,-0.03944076085434}, {-0.05558229086909,+0.08935293782491,+0.04869509588770,+0.04856877988810, -0.00253836047720,+0.07651693957635,-0.06342453535092,-0.00777376246014, -0.08570270266807,+0.01943016473512,-0.00599516526932,-0.09157595008575, -0.00397735155663,-0.00440093863690,-0.00232998056918,+0.02979967701162, -0.00477299485901,-0.00144011795333,+0.01795114942404,-0.00080059359232}, {+0.05807741644682,+0.14654292420341,-0.06724975334073,+0.02159062346633, -0.00339085518294,-0.06829036785575,+0.03520631903157,-0.02766062718318, +0.03485632707432,-0.02436836692465,-0.00397566003573,-0.10095488644404, +0.02456887654357,+0.00381764117077,-0.00906261340247,-0.01043058066362, +0.01651199513994,-0.00210417220821,-0.00872508520963,-0.01495915462580}, {+0.02564617106907,+0.02960554611436,-0.00052356748770,+0.00989267817318, -0.00044034172141,-0.02279910634723,-0.00363768356471,-0.01086345665971, +0.01229721799572,+0.02633650142592,+0.06282966783922,-0.00734486499924, -0.13863936313277,-0.00993891943390,-0.00655309682350,-0.00245191788287, -0.02431633805559,-0.00068554031525,-0.00121383858869,+0.06280025239509}, {+0.11362428251792,-0.02080375718488,-0.08802750967213,-0.06531316372189, -0.00166626058292,+0.06846081717224,+0.07007301248407,-0.01713112936632, -0.05900588794853,-0.04497159138485,+0.04222484636983,+0.00129043178508, -0.01550337251561,-0.01553102163852,-0.04363429852047,+0.01600063777880, +0.05787328925647,-0.00008265841118,+0.02870014572813,-0.02657681214523}, {+0.01840541226842,+0.00610159018805,+0.01368080422265,+0.02383751807012, -0.00923516894192,+0.01209943150832,+0.02906782189141,+0.01992384905334, +0.00197323568330,+0.00017531415423,-0.01796698381949,+0.01887083962858, -0.00063335886734,-0.02365277334702,+0.01209445088200,+0.01308086447947, +0.01286727242301,-0.11420358975688,-0.01886991700613,+0.00238338728588}, {-0.01100105031759,-0.04250695864938,-0.02554356700969,-0.05473632078607, +0.00725906469946,-0.03003724918191,-0.07051526125013,-0.06939439879112, -0.00285883056088,+0.05334304124753,+0.12839241846919,-0.05883473754222, +0.02424304967487,+0.09134510778469,-0.00226003347193,-0.01280041778462, -0.00207988305627,-0.02957493909199,+0.05290385686789,+0.05465710875015}, {-0.01421274522011,+0.02074863337778,-0.01006411985628,+0.03319995456446, -0.00005371699269,-0.12266046460835,+0.02419847062899,-0.00441168706583, -0.08299118738167,-0.00323230913482,+0.02954035119881,+0.09212856795583, +0.00718635627257,-0.02706936115539,+0.04473173279913,-0.01274357634785, -0.01395862740618,-0.00071538848681,+0.04767640012830,-0.00729728326990}, {-0.03797680968123,+0.01280286509478,-0.08614616553187,-0.01781049963160, +0.00674319990083,+0.04208667754694,+0.05991325707583,+0.03581015660092, -0.01529816709967,+0.06885987924922,-0.11719120476535,-0.00014333663810, +0.00074336784254,+0.02893416406249,+0.07466151360134,-0.08182016471377, -0.06581536577662,-0.00018195976501,+0.00167443595008,+0.09015415667825}, {+0.03577726799591,-0.02139253448219,-0.01137813538175,-0.01954939202830, -0.04028242801611,-0.01777500032351,-0.02106862264440,+0.00465199658293, -0.02824805812709,+0.06618860061778,+0.08437791757537,-0.02533125946051, +0.02806344654855,-0.06970805797879,+0.02328376968627,+0.00692992333282, +0.02751392122018,+0.01148722812804,-0.11130404325078,+0.07776346000559}, {-0.06014297925310,-0.00711674355952,-0.02424493472566,+0.00032464353156, +0.00321221847573,+0.03257969053884,+0.01072805771161,+0.06892027923996, +0.03326534127710,-0.01558838623875,+0.13794237677194,-0.04292623056646, +0.01375763233229,-0.11125153774789,+0.03510076081639,-0.04531670712549, -0.06170413486351,-0.00182023682123,+0.05979891871679,-0.02551802851059}, {-0.03515069991501,+0.02310847227710,+0.00474493548551,+0.02787717003457, -0.12038329679812,+0.03178473522077,+0.04445111601130,-0.05334957493090, +0.01290386678474,-0.00376064171612,+0.03996642737967,+0.04777677295520, +0.00233689200639,+0.03917715404594,-0.01755598277531,-0.03389088626433, -0.02180780263389,+0.00473402043911,+0.01964539477020,-0.01260807237680}, {-0.04120428254254,+0.00062717164978,-0.01688703578637,+0.01685776910152, +0.02102702093943,+0.01295781834163,+0.03541815979495,+0.03968150445315, -0.02073122710938,-0.06932247350110,+0.11696314241296,-0.00322523765776, -0.01280515661402,+0.08717664266126,+0.06297225078802,-0.01290501780488, -0.04693925076877,-0.00177653675449,-0.08407812137852,-0.08380714022487}, {+0.03138655228534,-0.09052573757196,+0.00874202219428,+0.06060593729292, -0.03426076652151,-0.04832468257386,+0.04735628794421,+0.14504653737383, -0.01709111334001,-0.00278794215381,-0.03513813820550,-0.11690294831883, -0.00836264902624,+0.03270980973180,-0.02587764129811,+0.01638786059073, +0.00485499822497,+0.00305477087025,+0.02295754527195,+0.00616929722958}, {-0.04898722042023,-0.01460879656586,+0.00508708857036,+0.07730497806331, +0.04252420017435,+0.00484232580349,+0.09861807969412,-0.05169447907187, -0.00917820907880,+0.03679081047330,+0.04998537112655,+0.00769330211980, +0.01805447683564,-0.00498723245027,-0.14148416183376,-0.05170281760262, -0.03230723310784,-0.00032890672639,-0.02363523071957,+0.03801365471627}, {-0.02047562162108,+0.06933781779590,-0.02101117884731,-0.06841945874842, -0.00860967572716,-0.00886650271590,-0.07185241332269,+0.16703684361030, -0.00635847581692,+0.00811478913823,+0.01847205842216,+0.06700967948643, +0.00596607376199,+0.02318239240593,-0.10552958537847,-0.01980199747773, -0.02003785382406,-0.00593392430159,-0.00965391033612,+0.00743094349652}}; /* dcmut version of PAM model from http://www.ebi.ac.uk/goldman-srv/dayhoff/ */ static double pameigmat[] = {0,-1.93321786301018,-2.20904642493621,-1.74835983874903, -1.64854548332072,-1.54505559488222,-1.33859384676989,-1.29786201193594, -0.235548517495575,-0.266951066089808,-0.28965813670665,-1.10505826965282, -1.04323310568532,-0.430423720979904,-0.541719761016713,-0.879636093986914, -0.711249353378695,-0.725050487280602,-0.776855937389452,-0.808735559461343}; static double pamprobmat[20][20] ={ {0.08712695644, 0.04090397955, 0.04043197978, 0.04687197656, 0.03347398326, 0.03825498087, 0.04952997524, 0.08861195569, 0.03361898319, 0.03688598156, 0.08535695732, 0.08048095976, 0.01475299262, 0.03977198011, 0.05067997466, 0.06957696521, 0.05854197073, 0.01049399475, 0.02991598504, 0.06471796764}, {0.07991048383, 0.006888314018, 0.03857806206, 0.07947073194, 0.004895492884, 0.03815829405, -0.1087562465, 0.008691167141, -0.0140554828, 0.001306404001, -0.001888411299, -0.006921303342, 0.0007655604228, 0.001583298443, 0.006879590446, -0.171806883, 0.04890917949, 0.0006700432804, 0.0002276237277, -0.01350591875}, {-0.01641514483, -0.007233933239, -0.1377830621, 0.1163201333, -0.002305138017, 0.01557250366, -0.07455879489, -0.003225343503, 0.0140630487, 0.005112274204, 0.001405731862, 0.01975833782, -0.001348402973, -0.001085733262, -0.003880514478, 0.0851493313, -0.01163526615, -0.0001197903399, 0.002056153393, 0.0001536095643}, {0.009669278686, -0.006905863869, 0.101083544, 0.01179903104, -0.003780967591, 0.05845105878, -0.09138357299, -0.02850503638, -0.03233951408, 0.008708065876, -0.004700705411, -0.02053221579, 0.001165851398, -0.001366585849, -0.01317695074, 0.1199985703, -0.1146346193, -0.0005953021314, -0.0004297615194, 0.007475695618}, {0.1722243502, -0.003737582995, -0.02964873222, -0.02050116381, -0.0004530478465, -0.02460043205, 0.02280768412, -0.02127364909, 0.01570095258, 0.1027744285, -0.005330539586, 0.0179697651, -0.002904077286, -0.007068126663, -0.0142869583, -0.01444241844, -0.08218861544, 0.0002069181629, 0.001099671379, -0.1063484263}, {-0.1553433627, -0.001169168032, 0.02134785337, 0.0007602305436, 0.0001395330122, 0.03194992019, -0.01290252206, 0.03281720789, -0.01311103735, 0.1177254769, -0.008008783885, -0.02375317548, -0.002817809762, -0.008196682776, 0.01731267617, 0.01853526375, 0.08249908546, -2.788771776e-05, 0.001266182191, -0.09902299976}, {-0.03671080341, 0.0274168035, 0.04625877597, 0.07520706414, -0.0001833803619, -0.1207833161, -0.006415807779, -0.005465629648, 0.02778273972, 0.007589688485, -0.02945266034, -0.03797542064, 0.07044042052, -0.002018573865, 0.01845277071, 0.006901513991, -0.02430934639, -0.0005919635873, -0.001266962331, -0.01487591261}, {-0.03060317816, 0.01182361623, 0.04200270053, 0.05406235279, -0.0003920498815, -0.09159709348, -0.009602690652, -0.00382944418, 0.01761361993, 0.01605684317, 0.05198878008, 0.02198696949, -0.09308930025, -0.00102622863, 0.01477637127, 0.0009314065393, -0.01860959472, -0.0005964703968, -0.002694284083, 0.02079767439}, {0.0195976494, -0.005104484936, 0.007406728707, 0.01236244954, 0.0201446796, 0.007039564785, 0.01276942134, 0.02641595685, 0.002764624354, 0.001273314658, -0.01335316035, 0.01105658671, 2.148773499e-05, -0.02692205639, 0.0118684991, 0.01212624708, 0.01127770094, -0.09842754796, -0.01942336432, 0.007105703151}, {-0.01819461888, -0.01509348507, -0.01297636935, -0.01996453439, 0.1715705905, -0.01601550692, -0.02122706144, -0.02854628494, -0.009351082371, -0.001527995472, -0.010198224, -0.03609537551, -0.003153182095, 0.02395980501, -0.01378664626, -0.005992611421, -0.01176810875, 0.003132361603, 0.03018439539, -0.004956065656}, {-0.02733614784, -0.02258066705, -0.0153112506, -0.02475728664, -0.04480525045, -0.01526640341, -0.02438517425, -0.04836914601, -0.00635964824, 0.02263169831, 0.09794101931, -0.04004304158, 0.008464393478, 0.1185443142, -0.02239294163, -0.0281550321, -0.01453581604, -0.0246742804, 0.0879619849, 0.02342867605}, {0.06483718238, 0.1260012082, -0.006496013283, 0.009914915531, -0.004181603532, 0.0003493226286, 0.01408035752, -0.04881663016, -0.03431167356, -0.01768005602, 0.02362447761, -0.1482364784, -0.01289035619, -0.001778893279, -0.05240099752, 0.05536174567, 0.06782165352, -0.003548568717, 0.001125301173, -0.03277489363}, {0.06520296909, -0.0754802543, 0.03139281903, -0.03266449554, -0.004485188002, -0.03389072036, -0.06163274338, -0.06484769882, 0.05722658289, -0.02824079619, 0.01544837349, 0.03909752708, 0.002029218884, 0.003151939572, -0.05471208363, 0.07962008342, 0.125916047, 0.0008696184937, -0.01086027514, -0.05314092355}, {0.004543119081, 0.01935177735, 0.01905511007, 0.02682993409, -0.01199617967, 0.01426278655, 0.02472521255, 0.03864795501, 0.02166224804, -0.04754243479, -0.1921545477, 0.03621321546, -0.02120627881, 0.04928097895, 0.009396088815, 0.01748042052, -6.173742851e-05, -0.003168033098, 0.07723565812, -0.08255529309}, {0.06710378668, -0.09441410284, -0.004801776989, 0.008830272165, -0.01021645042, -0.02764365608, 0.004250361851, 0.1648777542, -0.037446109, 0.004541057635, -0.0296980702, -0.1532325189, -0.008940580901, 0.006998050812, 0.02338809379, 0.03175059182, 0.02033965512, 0.006388075608, 0.001762762044, 0.02616280361}, {0.01915943021, -0.05432967274, 0.01249342683, 0.06836622457, 0.002054462161, -0.01233535859, 0.07087282652, -0.08948637051, -0.1245896013, -0.02204522882, 0.03791481736, 0.06557467874, 0.005529294156, -0.006296644235, 0.02144530752, 0.01664230081, 0.02647078439, 0.001737725271, 0.01414149877, -0.05331990116}, {0.0266659303, 0.0564142853, -0.0263767738, -0.08029726006, -0.006059357163, -0.06317558457, -0.0911894019, 0.05401487057, -0.08178072458, 0.01580699778, -0.05370550396, 0.09798653264, 0.003934944022, 0.01977291947, 0.0441198541, 0.02788220393, 0.03201877081, -0.00206161759, -0.005101423308, 0.03113033802}, {0.02980360751, -0.009513246268, -0.009543527165, -0.02190644172, -0.006146440672, 0.01207009085, -0.0126989156, -0.1378266418, 0.0275235217, 0.00551720592, -0.03104791544, -0.07111701247, -0.006081754489, -0.01337494521, 0.1783961085, 0.01453225059, 0.01938736048, 0.0004488631071, 0.0110844398, 0.02049339243}, {-0.01433508581, 0.01258858175, -0.004294252236, -0.007146532854, 0.009541628809, 0.008040155729, -0.006857781832, 0.05584120066, 0.007749418365, -0.05867835844, 0.08008131283, -0.004877854222, -0.0007128540743, 0.09489058424, 0.06421121962, 0.00271493526, -0.03229944773, -0.001732026038, -0.08053448316, -0.1241903609}, {-0.009854113227, 0.01294129929, -0.00593064392, -0.03016833115, -0.002018439732, -0.00792418722, -0.03372768732, 0.07828561288, 0.007722254639, -0.05067377561, 0.1191848621, 0.005059475202, 0.004762387166, -0.1029870175, 0.03537190114, 0.001089956203, -0.02139157573, -0.001015245062, 0.08400521847, -0.08273195059}}; /* this pmb matrix decomposition due to Elisabeth Tillier */ static double pmbeigmat[20] = {0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, -1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, -1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, -0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, -0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; static double pmbprobmat[20][20] = {{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, 0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, 0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, 0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, 0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, {0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, 0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, -0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, -0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, 0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, {-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, -0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, 0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, 0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, -0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, {0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, 0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, 0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, 0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, 0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, {0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, -0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, 0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, -0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, -0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, {-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, -0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, -0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, 0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, 0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, {0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, -0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, -0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, -0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, -0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, {0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, 0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, -0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, 0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, -0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, {0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, -0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, 0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, -0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, -0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, {-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, -0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, -0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, -0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, 0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, {-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, -0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, -0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, 0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, 0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, {0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, -0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, 0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, -0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, 0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, {-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, -0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, 0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, 0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, -0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, {-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, 0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, 0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, 0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, -0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, {0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, -0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, -0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, 0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, -0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, {0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, -0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, -0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, 0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, 0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, {0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, 0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, -0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, -0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, -0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, {0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, -0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, 0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, 0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, 0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, {0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, -0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, 0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, 0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, 0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, {0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, -0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, -0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, 0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, 0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} ; void init_protmats(void) { long l; eigmat = (double *) Malloc (20 * sizeof(double)); for (l = 0; l <= 19; l++) if (usejtt) eigmat[l] = jtteigmat[l]; /** changed from jtteigmat*100. **/ else { if (usepmb) eigmat[l] = pmbeigmat[l]; else eigmat[l] = pameigmat[l]; /** changed from pameigmat*100. **/ } probmat = (double **) Malloc (20 * sizeof(double *)); for (l = 0; l <= 19; l++) { if (usejtt) { probmat[l] = jttprobmat[l]; } else { if (usepmb) probmat[l] = pmbprobmat[l]; else probmat[l] = pamprobmat[l]; } } } /* init_protmats */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { long i; double probsum; AjPStr model = NULL; AjPStr gammamethod = NULL; AjPFloat hmmrates; AjPFloat hmmprob; AjPFloat arrayval; auto_ = false; ctgry = false; rctgry = false; categs = 1; rcategs = 1; gama = false; global = false; hypstate = false; invar = false; jumble = false; njumble = 1; lambda = 1.0; lambda1 = 0.0; lngths = false; trout = true; usepam = false; usepmb = false; usejtt = false; usertree = false; weights = false; printdata = false; progress = true; treeprint = true; interleaved = true; datasets = 1; mulsets = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; lngths = ajAcdGetBoolean("lengths"); } numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; datasets = numseqs; } else if (numwts > 1) { mulsets = true; datasets = numwts; justwts = true; } model = ajAcdGetListSingle("model"); if(ajStrMatchC(model, "j")) usejtt = true; if(ajStrMatchC(model, "h")) usepmb = true; if(ajStrMatchC(model, "d")) usepam = true; categs = ajAcdGetInt("ncategories"); if (categs > 1) { ctgry = true; rate = (double *) Malloc(categs * sizeof(double)); arrayval = ajAcdGetArray("rate"); emboss_initcategs(arrayval, categs, rate); } else{ rate = (double *) Malloc(categs*sizeof(double)); rate[0] = 1.0; } phyloratecat = ajAcdGetProperties("categories"); gammamethod = ajAcdGetListSingle("gammatype"); if(ajStrMatchC(gammamethod, "n")) { rrate = (double *) Malloc(rcategs*sizeof(double)); probcat = (double *) Malloc(rcategs*sizeof(double)); rrate[0] = 1.0; probcat[0] = 1.0; } else { rctgry = true; auto_ = ajAcdGetBoolean("adjsite"); if(auto_) { lambda = ajAcdGetFloat("lambda"); lambda = 1 / lambda; lambda1 = 1.0 - lambda; } } if(ajStrMatchC(gammamethod, "g")) { gama = true; rcategs = ajAcdGetInt("ngammacat"); cv = ajAcdGetFloat("gammacoefficient"); alpha = 1.0 / (cv*cv); initmemrates(); initgammacat(rcategs, alpha, rrate, probcat); } else if(ajStrMatchC(gammamethod, "i")) { invar = true; rcategs = ajAcdGetInt("ninvarcat"); cv = ajAcdGetFloat("invarcoefficient"); alpha = 1.0 / (cv*cv); invarfrac = ajAcdGetFloat("invarfrac"); initmemrates(); initgammacat(rcategs-1, alpha, rrate, probcat); for (i=0; i < rcategs-1 ; i++) probcat[i] = probcat[i]*(1.0-invarfrac); probcat[rcategs-1] = invarfrac; rrate[rcategs-1] = 0.0; } else if(ajStrMatchC(gammamethod, "h")) { rcategs = ajAcdGetInt("nhmmcategories"); initmemrates(); hmmrates = ajAcdGetArray("hmmrates"); emboss_initcategs(hmmrates, rcategs,rrate); hmmprob = ajAcdGetArray("hmmprobabilities"); for (i=0; i < rcategs; i++){ probcat[i] = ajFloatGet(hmmprob, i); probsum += probcat[i]; } } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; if(!usertree) { global = ajAcdGetBoolean("global"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); hypstate = ajAcdGetBoolean("hypstate"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nAmino acid sequence\n"); fprintf(outfile, " Maximum Likelihood method with molecular "); fprintf(outfile, "clock, version %s\n\n", VERSION); init_protmats(); } /* emboss_getoptions */ void initmemrates(void) { probcat = (double *) Malloc(rcategs * sizeof(double)); rrate = (double *) Malloc(rcategs * sizeof(double)); } void makeprotfreqs(void) { /* calculate amino acid frequencies based on eigmat */ long i, mineig; mineig = 0; for (i = 0; i <= 19; i++) if (fabs(eigmat[i]) < fabs(eigmat[mineig])) mineig = i; memcpy(freqaa, probmat[mineig], 20 * sizeof(double)); for (i = 0; i <= 19; i++) freqaa[i] = fabs(freqaa[i]); } /* makeprotfreqs */ void reallocsites(void) { long i; for (i = 0; i < spp; i++) y[i] = (char *)Malloc(sites * sizeof(char)); enterorder = (long *)Malloc(spp*sizeof(long)); weight = (long *)Malloc(sites*sizeof(long)); category = (long *)Malloc(sites*sizeof(long)); alias = (long *)Malloc(sites*sizeof(long)); aliasweight = (long *)Malloc(sites*sizeof(long)); ally = (long *)Malloc(sites*sizeof(long)); location = (long *)Malloc(sites*sizeof(long)); for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; makeweights(); } /* reallocsites */ void allocrest(void) { long i; y = (Char **)Malloc(spp*sizeof(Char *)); nayme = (naym *)Malloc(spp*sizeof(naym)); for (i = 0; i < spp; i++) y[i] = (char *)Malloc((sites+1) * sizeof(char)); enterorder = (long *)Malloc(spp*sizeof(long)); weight = (long *)Malloc(sites*sizeof(long)); category = (long *)Malloc(sites*sizeof(long)); alias = (long *)Malloc(sites*sizeof(long)); aliasweight = (long *)Malloc(sites*sizeof(long)); ally = (long *)Malloc(sites*sizeof(long)); location = (long *)Malloc(sites*sizeof(long)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &sites, &nonodes, 1); makeprotfreqs(); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); alloctree(&curtree.nodep, nonodes, usertree); allocrest(); if (usertree) return; alloctree(&bestree.nodep, nonodes, 0); if (njumble <= 1) return; alloctree(&bestree2.nodep, nonodes, 0); } /* doinit */ void inputoptions() { long i; if (!firstset) { samenumspseq(seqsets[ith-1], &sites, ith); reallocsites(); } if (firstset) { for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; } if (justwts || weights) inputweightsstr(phyloweights->Str[ith-1], sites, weight, &weights); weightsum = 0; for (i = 0; i < sites; i++) weightsum += weight[i]; if ((ctgry && categs > 1) && (firstset || !justwts)) { inputcategsstr(phyloratecat->Str[0], 0, sites, category, categs, "ProMLK"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } if (weights && printdata) printweights(outfile, 0, sites, weight, "Sites"); fprintf(outfile, "%s model of amino acid change\n\n", (usejtt ? "Jones-Taylor-Thornton" : usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM")); } /* inputoptions */ void input_protdata(AjPSeqset seqset, long chars) { /* input the names and sequences for each species */ /* used by proml */ long i, j, k, l; Char charstate; if (printdata) headings(chars, "Sequences", "---------"); for(i=0;i chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (j > 1 && y[j - 1][k - 1] == y[0][k - 1]) charstate = '.'; else charstate = y[j - 1][k - 1]; putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* input_protdata */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = 0; aliasweight[i - 1] = weight[i - 1]; location[i - 1] = 0; } sitesort2(sites, aliasweight); sitecombine2(sites, aliasweight); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { if (aliasweight[i - 1] > 0) endsite = i; } for (i = 1; i <= endsite; i++) { ally[alias[i - 1] - 1] = alias[i - 1]; location[alias[i - 1] - 1] = i; } mp = (vall *) Malloc(sites*sizeof(vall)); contribution = (contribarr *) Malloc( endsite*sizeof(contribarr)); } /* makeweights */ void prot_makevalues(long categs, pointarray treenode, long endsite, long spp, sequence y, steptr alias) { /* set up fractional likelihoods at tips */ /* a version of makevalues2 found in seq.c */ /* used by proml */ long i, j, k, l; long b; for (k = 0; k < endsite; k++) { j = alias[k]; for (i = 0; i < spp; i++) { for (l = 0; l < categs; l++) { memset(treenode[i]->protx[k][l], 0, sizeof(double)*20); switch (y[i][j - 1]) { case 'A': treenode[i]->protx[k][l][0] = 1.0; break; case 'R': treenode[i]->protx[k][l][(long)arginine - (long)alanine] = 1.0; break; case 'N': treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; break; case 'D': treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; break; case 'C': treenode[i]->protx[k][l][(long)cysteine - (long)alanine] = 1.0; break; case 'Q': treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; break; case 'E': treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; break; case 'G': treenode[i]->protx[k][l][(long)glycine - (long)alanine] = 1.0; break; case 'H': treenode[i]->protx[k][l][(long)histidine - (long)alanine] = 1.0; break; case 'I': treenode[i]->protx[k][l][(long)isoleucine - (long)alanine] = 1.0; break; case 'L': treenode[i]->protx[k][l][(long)leucine - (long)alanine] = 1.0; break; case 'K': treenode[i]->protx[k][l][(long)lysine - (long)alanine] = 1.0; break; case 'M': treenode[i]->protx[k][l][(long)methionine - (long)alanine] = 1.0; break; case 'F': treenode[i]->protx[k][l][(long)phenylalanine - (long)alanine] = 1.0; break; case 'P': treenode[i]->protx[k][l][(long)proline - (long)alanine] = 1.0; break; case 'S': treenode[i]->protx[k][l][(long)serine - (long)alanine] = 1.0; break; case 'T': treenode[i]->protx[k][l][(long)threonine - (long)alanine] = 1.0; break; case 'W': treenode[i]->protx[k][l][(long)tryptophan - (long)alanine] = 1.0; break; case 'Y': treenode[i]->protx[k][l][(long)tyrosine - (long)alanine] = 1.0; break; case 'V': treenode[i]->protx[k][l][(long)valine - (long)alanine] = 1.0; break; case 'B': treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; break; case 'Z': treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; break; case 'X': /* unknown aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '?': /* unknown aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '*': /* stop codon symbol */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '-': /* deletion event-absent data or aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; } } } } } /* prot_makevalues */ void getinput() { long grcategs; /* reads the input data */ if (!justwts || firstset) inputoptions(); if (!justwts || firstset) input_protdata(seqsets[ith-1],sites); makeweights(); setuptree2(&curtree); if (!usertree) { setuptree2(&bestree); if (njumble > 1) setuptree2(&bestree2); } grcategs = (categs > rcategs) ? categs : rcategs; prot_allocx(nonodes, grcategs, curtree.nodep, usertree); if (!usertree) { prot_allocx(nonodes, grcategs, bestree.nodep, 0); if (njumble > 1) prot_allocx(nonodes, grcategs, bestree2.nodep, 0); } prot_makevalues(rcategs, curtree.nodep, endsite, spp, y, alias); } /* getinput */ void prot_freetable(void) { long i,j,k,l; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(ddpmatrix[j][k][l]); free(ddpmatrix[j][k]); } free(ddpmatrix[j]); } free(ddpmatrix); for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(dpmatrix[j][k][l]); free(dpmatrix[j][k]); } free(dpmatrix[j]); } free(dpmatrix); for (j = 0; j < rcategs; j++) free(tbl[j]); free(tbl); for ( i = 0 ; i < max_num_sibs ; i++ ) free_pmatrix(i); free(pmatrices); } /* prot_freetable */ void prot_inittable() { /* Define a lookup table. Precompute values and print them out in tables */ /* Allocate memory for the pmatrices, dpmatices and ddpmatrices */ long i, j, k, l; double sumrates; /* Allocate memory for pmatrices, the array of pointers to pmatrices */ pmatrices = (double *****) Malloc (spp * sizeof(double ****)); /* Allocate memory for the first 2 pmatrices, the matrix of conversion */ /* probabilities, but only once per run (aka not on the second jumble. */ alloc_pmatrix(0); alloc_pmatrix(1); /* Allocate memory for one dpmatrix, the first derivative matrix */ dpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { dpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); for (k = 0; k < categs; k++) { dpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); for (l = 0; l < 20; l++) dpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); } } /* Allocate memory for one ddpmatrix, the second derivative matrix */ ddpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { ddpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); for (k = 0; k < categs; k++) { ddpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); for (l = 0; l < 20; l++) ddpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); } } /* Allocate memory and assign values to tbl, the matrix of possible rates*/ tbl = (double **) Malloc( rcategs * sizeof(double *)); for (j = 0; j < rcategs; j++) tbl[j] = (double *) Malloc( categs * sizeof(double)); for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) tbl[j][k] = rrate[j]*rate[k]; sumrates = 0.0; for (i = 0; i < endsite; i++) { for (j = 0; j < rcategs; j++) sumrates += aliasweight[i] * probcat[j] * tbl[j][category[alias[i] - 1] - 1]; } sumrates /= (double)sites; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) { tbl[j][k] /= sumrates; } if(jumb > 1) return; if (gama || invar) { fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); fprintf(outfile, " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); } if (rcategs > 1) { fprintf(outfile, "\nState in HMM Rate of change Probability\n\n"); for (i = 0; i < rcategs; i++) if (probcat[i] < 0.0001) fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.001) fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.01) fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); else fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); putc('\n', outfile); if (auto_) { fprintf(outfile, "Expected length of a patch of sites having the same rate = %8.3f\n", 1/lambda); putc('\n', outfile); } } if (categs > 1) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (k = 0; k < categs; k++) fprintf(outfile, "%9ld%16.3f\n", k+1, rate[k]); fprintf(outfile, "\n\n"); } } /* prot_inittable */ void free_pmatrix(long sib) { long j,k,l; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(pmatrices[sib][j][k][l]); free(pmatrices[sib][j][k]); } free(pmatrices[sib][j]); } free(pmatrices[sib]); } /* free_pmatrix */ void alloc_pmatrix(long sib) { /* Allocate memory for a new pmatrix. Called iff num_sibs>max_num_sibs */ long j, k, l; double ****temp_matrix; temp_matrix = (double ****) Malloc (rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { temp_matrix[j] = (double ***) Malloc(categs * sizeof(double **)); for (k = 0; k < categs; k++) { temp_matrix[j][k] = (double **) Malloc(20 * sizeof (double *)); for (l = 0; l < 20; l++) temp_matrix[j][k][l] = (double *) Malloc(20 * sizeof(double)); } } pmatrices[sib] = temp_matrix; max_num_sibs++; } /* alloc_pmatrix */ void make_pmatrix(double **matrix, double **dmat, double **ddmat, long derivative, double lz, double rat, double *eigmat, double **probmat) { /* Computes the R matrix such that matrix[m][l] is the joint probability */ /* of m and l. */ /* Computes a P matrix such that matrix[m][l] is the conditional */ /* probability of m given l. This is accomplished by dividing all terms */ /* in the R matrix by freqaa[m], the frequency of l. */ long k, l, m; /* (l) original character state */ /* (m) final character state */ /* (k) lambda counter */ double p0, p1, p2, q; double elambdat[20], delambdat[20], ddelambdat[20]; /* exponential term for matrix */ /* and both derivative matrices */ for (k = 0; k <= 19; k++) { elambdat[k] = exp(lz * rat * eigmat[k]); if(derivative != 0) { delambdat[k] = (elambdat[k] * rat * eigmat[k]); ddelambdat[k] = (delambdat[k] * rat * eigmat[k]); } } for (m = 0; m <= 19; m++) { for (l = 0; l <= 19; l++) { p0 = 0.0; p1 = 0.0; p2 = 0.0; for (k = 0; k <= 19; k++) { q = probmat[k][m] * probmat[k][l]; p0 += (q * elambdat[k]); if(derivative !=0) { p1 += (q * delambdat[k]); p2 += (q * ddelambdat[k]); } } matrix[m][l] = p0 / freqaa[m]; if(derivative != 0) { dmat[m][l] = p1 / freqaa[m]; ddmat[m][l] = p2 / freqaa[m]; } } } } /* make_pmatrix */ boolean prot_nuview(node *p) { /* Recursively update summary data for subtree rooted at p. Returns true if * view has changed. */ long i, j, k, l, num_sibs = 0, sib_index; long b, m; node *q; node *sib_ptr, *sib_back_ptr; psitelike prot_xx, x2; double prod7; double **pmat; double lw; double correction; double maxx; if ( p == NULL ) return false; if ( p->tip ) return false; /* Tips do not need to be initialized */ for (q = p->next; q != p; q = q->next) { num_sibs++; if ( q->back != NULL && !q->tip) { if ( prot_nuview(q->back) ) p->initialized = false; } } if ( p->initialized ) return false; /* Make sure pmatrices is large enough for all siblings */ for (i = 0; i < num_sibs; i++) if (pmatrices[i] == NULL) alloc_pmatrix(i); /* Make pmatrices for all possible combinations of category, rcateg */ /* and sib */ sib_ptr = p; /* return to p */ for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (sib_back_ptr != NULL) lw = fabs(p->tyme - sib_back_ptr->tyme); else lw = 0.0; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) make_pmatrix(pmatrices[sib_index][j][k], NULL, NULL, 0, lw, tbl[j][k], eigmat, probmat); } for (i = 0; i < endsite; i++) { correction = 0; maxx = 0; k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { /* initialize to 1 all values of prot_xx */ for (m = 0; m <= 19; m++) prot_xx[m] = 1; sib_ptr = p; /* return to p */ /* loop through all sibs and calculate likelihoods for all possible*/ /* amino acid combinations */ for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (sib_back_ptr != NULL) { memcpy(x2, sib_back_ptr->protx[i][j], sizeof(psitelike)); if ( j == 0 ) correction += sib_back_ptr->underflows[i]; } else for (b = 0; b <= 19; b++) x2[b] = 1.0; pmat = pmatrices[sib_index][j][k]; for (m = 0; m <= 19; m++) { prod7 = 0; for (l = 0; l <= 19; l++) prod7 += (pmat[m][l] * x2[l]); prot_xx[m] *= prod7; if ( prot_xx[m] > maxx && sib_index == (num_sibs - 1 )) maxx = prot_xx[m]; } } /* And the final point of this whole function: */ memcpy(p->protx[i][j], prot_xx, sizeof(psitelike)); } p->underflows[i] = 0; if ( maxx < MIN_DOUBLE ) fix_protx(p,i,maxx,rcategs); p->underflows[i] += correction; } p->initialized = true; return true; } /* prot_nuview */ void update(node *p) { node *sib_ptr, *sib_back_ptr; long i, num_sibs; /* improve time and recompute views at a node */ if (p == NULL) return; if (p->back != NULL) { if (!p->back->tip && !p->back->initialized) prot_nuview(p->back); } sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (sib_back_ptr != NULL) { if (!sib_back_ptr->tip && !sib_back_ptr->initialized) prot_nuview(sib_back_ptr); } } if ( (!usertree) || (usertree && !lngths) ) { mnv_success = makenewv(p) || mnv_success; return; } prot_nuview(p); sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; prot_nuview(sib_ptr); } } /* update */ void smooth(node *p) { node *q; if (p == NULL) return; if (p->tip) return; /* optimize tyme here */ update(p); if (smoothit || polishing) { for (q = p->next; q != p; q = q->next) { if (!q->back->tip) { /* smooth subtrees */ smooth(q->back); /* optimize tyme again after each subtree */ update(p); } } } } /* smooth */ void promlk_add(node *below, node *newtip, node *newfork, boolean tempadd) { /* inserts the nodes newfork and its descendant, newtip, into the tree. */ long i; boolean done; node *p; double newtyme; /* Get parent nodelets */ below = curtree.nodep[below->index - 1]; newfork = curtree.nodep[newfork->index-1]; newtip = curtree.nodep[newtip->index-1]; /* Join above node to newfork */ if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; /* Join below to newfork->next->next */ below->back = newfork->next->next; newfork->next->next->back = below; /* Join newtip to newfork->next */ newfork->next->back = newtip; newtip->back = newfork->next; /* assign newfork minimum child tyme */ if (newtip->tyme < below->tyme) p = newtip; else p = below; newtyme = p->tyme; setnodetymes(newfork,newtyme); /* Move root if inserting there */ if (curtree.root == below) curtree.root = newfork; /* If not at root, set newfork tyme to average below/above */ if (newfork->back != NULL) { if (p->tyme > newfork->back->tyme) newtyme = (p->tyme + newfork->back->tyme) / 2.0; else newtyme = p->tyme - INSERT_MIN_TYME; setnodetymes(newfork, newtyme); /* Now move from p to root, setting parent tymes older than children * by at least INSERT_MIN_TYME */ do { p = curtree.nodep[p->back->index - 1]; done = (p == curtree.root); if (!done) done = (curtree.nodep[p->back->index - 1]->tyme < p->tyme - INSERT_MIN_TYME); if (!done) { setnodetymes(curtree.nodep[p->back->index - 1], p->tyme - INSERT_MIN_TYME); } } while (!done); } else { /* root == newfork */ /* make root 2x older */ setnodetymes(newfork, newfork->tyme - 2*INSERT_MIN_TYME); } /* This is needed to prevent negative lengths */ all_tymes_valid(curtree.root, 0.0, true); /* Invalidate views */ inittrav(newtip); inittrav(newtip->back); /* Adjust branch lengths throughout */ for ( i = 1; i < smoothings; i++ ) { smoothed = true; smooth(newfork); smooth(newfork->back); if ( smoothed ) break; } } /* promlk_add */ void promlk_re_move(node **item, node **fork, boolean tempadd) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork */ node *p, *q; long i; if ((*item)->back == NULL) { *fork = NULL; return; } *item = curtree.nodep[(*item)->index-1]; *fork = curtree.nodep[(*item)->back->index - 1]; if (curtree.root == *fork) { if (*item == (*fork)->next->back) curtree.root = (*fork)->next->next->back; else curtree.root = (*fork)->next->back; } p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; inittrav(p); inittrav(q); if (tempadd) return; /* This is needed to prevent negative lengths */ all_tymes_valid(curtree.root, 0.0, true); i = 1; while (i <= smoothings) { smooth(q); if (smoothit) smooth(q->back); i++; } } /* promlk_re_move */ double prot_evaluate(node *p) { /* Evaluate and return the log likelihood of the current tree * as seen from the branch from p to p->back. If p is the root node, * the first child branch is used instead. Views are updated as needed. */ contribarr tterm; static contribarr like, nulike, clai; double sum, sum2, sumc=0, y, prod4, prodl, frexm, sumterm, lterm; double **pmat; long i, j, k, l, m, lai; node *q, *r; psitelike x1, x2; sum = 0.0; if (p == curtree.root) { p = p->next; } r = p; q = p->back; prot_nuview (r); prot_nuview (q); y = fabs(r->tyme - q->tyme); for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) make_pmatrix(pmatrices[0][j][k],NULL,NULL,0,y,tbl[j][k],eigmat,probmat); for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { memcpy(x1, r->protx[i][j], sizeof(psitelike)); memcpy(x2, q->protx[i][j], sizeof(psitelike)); prod4 = 0.0; pmat = pmatrices[0][j][k]; for (m = 0; m <= 19; m++) { prodl = 0.0; for (l = 0; l <= 19; l++) prodl += (pmat[m][l] * x2[l]); frexm = x1[m] * freqaa[m]; prod4 += (prodl * frexm); } tterm[j] = prod4; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * tterm[j]; if (sumterm < 0.0) sumterm = 0.00000001; /* ??? */ lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) clai[j] = tterm[j] / sumterm; memcpy(contribution[i], clai, rcategs * sizeof(double)); if (!auto_ && usertree && (which <= shimotrees)) l0gf[which - 1][i] = lterm; sum += aliasweight[i] * lterm; } if (auto_) { for (j = 0; j < rcategs; j++) like[j] = 1.0; for (i = 0; i < sites; i++) { sumc = 0.0; for (k = 0; k < rcategs; k++) sumc += probcat[k] * like[k]; sumc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; } else { for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc); } memcpy(like, nulike, rcategs * sizeof(double)); } sum2 = 0.0; for (i = 0; i < rcategs; i++) sum2 += probcat[i] * like[i]; sum += log(sum2); } /* FIXME check sum for -inf or nan * (sometimes occurs with short branches) */ assert( sum - sum == 0.0 ); curtree.likelihood = sum; if (auto_ || !usertree) return sum; if(which <= shimotrees) l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; return sum; } if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } return sum; } /* prot_evaluate */ void tryadd(node *p, node **item, node **nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater likelihood than other locations tested up to that time, then keeps that location as there */ long grcategs; grcategs = (categs > rcategs) ? categs : rcategs; promlk_add(p, *item, *nufork, true); like = prot_evaluate(p); if (lastsp) { if (like >= bestyet || bestyet == UNDEFINED) prot_copy_(&curtree, &bestree, nonodes, grcategs); } if (like > bestyet || bestyet == UNDEFINED) { bestyet = like; there = p; } promlk_re_move(item, nufork, true); } /* tryadd */ void addpreorder(node *p, node *item, node *nufork, boolean contin) { /* traverses a binary tree, calling function tryadd at a node before calling tryadd at its descendants */ if (p == NULL) return; tryadd(p, &item, &nufork); if ((!p->tip) && contin) { addpreorder(p->next->back, item, nufork, contin); addpreorder(p->next->next->back, item, nufork, contin); } } /* addpreorder */ void restoradd(node *below, node *newtip, node *newfork, double prevtyme) { /* restore "new" tip and fork to place "below". restore tymes */ /* assumes bifurcation */ hookup(newfork, below->back); hookup(newfork->next, below); hookup(newtip, newfork->next->next); curtree.nodep[newfork->index-1] = newfork; newfork->tyme = prevtyme; /* assumes bifurcations */ newfork->next->tyme = prevtyme; newfork->next->next->tyme = prevtyme; } /* restoradd */ void tryrearr(node *p, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater likelihood than the old one sets success = TRUE and keeps the new tree. otherwise, restores the old tree */ node *frombelow, *whereto, *forknode; double oldlike, prevtyme; boolean wasonleft; if (p == curtree.root) return; forknode = curtree.nodep[p->back->index - 1]; if (forknode == curtree.root) return; oldlike = bestyet; prevtyme = forknode->tyme; /* the following statement presumes bifurcating tree */ if (forknode->next->back == p) { frombelow = forknode->next->next->back; wasonleft = true; } else { frombelow = forknode->next->back; wasonleft = false; } whereto = curtree.nodep[forknode->back->index - 1]; promlk_re_move(&p, &forknode, true); promlk_add(whereto, p, forknode, true); like = prot_evaluate(p); if (like - oldlike > LIKE_EPSILON || oldlike == UNDEFINED) { (*success) = true; bestyet = like; } else { promlk_re_move(&p, &forknode, true); restoradd(frombelow, p, forknode, prevtyme); if (wasonleft && (forknode->next->next->back == p)) { hookup (forknode->next->back, forknode->next->next); hookup (forknode->next, p); } curtree.likelihood = oldlike; /* assumes bifurcation */ inittrav(forknode); inittrav(forknode->next); inittrav(forknode->next->next); } } /* tryrearr */ void repreorder(node *p, boolean *success) { /* traverses a binary tree, calling function tryrearr at a node before calling tryrearr at its descendants */ if (p == NULL) return; tryrearr(p, success); if (p->tip) return; if (!(*success)) repreorder(p->next->back, success); if (!(*success)) repreorder(p->next->next->back, success); } /* repreorder */ void rearrange(node **r) { /* traverses the tree (preorder), finding any local rearrangement which increases the likelihood. if traversal succeeds in increasing the tree's likelihood, function rearrange runs traversal again */ boolean success; success = true; while (success) { success = false; repreorder(*r, &success); } } /* rearrange */ void nodeinit(node *p) { /* set up times at one node */ node *sib_ptr, *sib_back_ptr; long i, num_sibs; double lowertyme; sib_ptr = p; num_sibs = count_sibs(p); /* lowertyme = lowest of children's times */ lowertyme = p->next->back->tyme; for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (sib_back_ptr->tyme < lowertyme) lowertyme = sib_back_ptr->tyme; } p->tyme = lowertyme - 0.1; sib_ptr = p; for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; sib_ptr->tyme = p->tyme; sib_back_ptr->v = sib_back_ptr->tyme - p->tyme; sib_ptr->v = sib_back_ptr->v; } } /* nodeinit */ void invalidate_traverse(node *p) { /* Invalidates p's view and all views looking toward p from p->back * on out. */ node *q; if (p == NULL) return; if (p->tip) return; p->initialized = false; q = p->back; if ( q == NULL ) return; if ( q->tip ) return; /* Call ourselves on p->back's sibs */ for ( q = q->next ; q != p->back ; q = q->next) { invalidate_traverse(q); } } /* invalidate_traverse */ void invalidate_tyme(node *p) { /* Must be called on a node after changing its tyme, and before calling * evaluate on any other node. */ node *q; if ( p == NULL ) return; invalidate_traverse(p); if ( p->tip ) return; for ( q = p->next; q != p; q = q->next ) { invalidate_traverse(q); } } /* invalidate_tyme */ void initrav(node *p) { long i, num_sibs; node *sib_ptr, *sib_back_ptr; /* traverse to set up times throughout tree */ if (p->tip) return; sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; initrav(sib_back_ptr); } nodeinit(p); } /* initrav */ void travinit(node *p) { long i, num_sibs; node *sib_ptr, *sib_back_ptr; /* traverse to set up initial values */ if (p == NULL) return; if (p->tip) return; if (p->initialized) return; sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; travinit(sib_back_ptr); } prot_nuview(p); p->initialized = true; } /* travinit */ void travsp(node *p) { long i, num_sibs; node *sib_ptr, *sib_back_ptr; /* traverse to find tips */ if (p == curtree.root) travinit(p); if (p->tip) travinit(p->back); else { sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; travsp(sib_back_ptr); } } } /* travsp */ void treevaluate() { /* evaluate likelihood of tree, after iterating branch lengths */ long i, j, num_sibs; node *sib_ptr; polishing = true; smoothit = true; for (i = 0; i < spp; i++) curtree.nodep[i]->initialized = false; for (i = spp; i < nonodes; i++) { sib_ptr = curtree.nodep[i]; sib_ptr->initialized = false; num_sibs = count_sibs(sib_ptr); for (j=0 ; j < num_sibs; j++) { sib_ptr = sib_ptr->next; sib_ptr->initialized = false; } } if (!lngths) initrav(curtree.root); travsp(curtree.root); i = 0; do { mnv_success = false; smooth(curtree.root); i++; } while (mnv_success); prot_evaluate(curtree.root); } /* treevaluate */ void prot_reconstr(node *p, long n) { /* reconstruct and print out acid at site n+1 at node p */ long i, j, k, first, num_sibs = 0; double f, sum, xx[20]; node *q = NULL; if (p->tip) putc(y[p->index-1][n], outfile); else { num_sibs = count_sibs(p); if ((ally[n] == 0) || (location[ally[n]-1] == 0)) putc('.', outfile); else { j = location[ally[n]-1] - 1; sum = 0; for (i = 0; i <= 19; i++) { f = p->protx[j][mx-1][i]; if (!p->tip) { q = p; for (k = 0; k < num_sibs; k++) { q = q->next; f *= q->protx[j][mx-1][i]; } } f = sqrt(f); xx[i] = f * freqaa[i]; sum += xx[i]; } for (i = 0; i <= 19; i++) xx[i] /= sum; first = 0; for (i = 0; i <= 19; i++) if (xx[i] > xx[first]) first = i; if (xx[first] > 0.95) putc(aachar[first], outfile); else putc(tolower((int)aachar[first]), outfile); if (rctgry && rcategs > 1) mx = mp[n][mx - 1]; else mx = 1; } } } /* prot_reconstr */ void rectrav(node *p, long m, long n) { /* print out segment of reconstructed sequence for one branch */ long num_sibs, i; node *sib_ptr; putc(' ', outfile); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, " "); mx = mx0; for (i = m; i <= n; i++) { if ((i % 10 == 0) && (i != m)) putc(' ', outfile); prot_reconstr(p, i); } putc('\n', outfile); if (!p->tip) { num_sibs = count_sibs(p); sib_ptr = p; for (i = 0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; rectrav(sib_ptr->back, m, n); } } mx1 = mx; } /* rectrav */ void summarize() { long i, j, mm=0; double mode, sum; double like[maxcategs], nulike[maxcategs]; double **marginal; long **mp; mp = (long **)Malloc(sites * sizeof(long *)); for (i = 0; i <= sites-1; ++i) mp[i] = (long *)Malloc(sizeof(long)*rcategs); fprintf(outfile, "\nLn Likelihood = %11.5f\n\n", curtree.likelihood); fprintf(outfile, " Ancestor Node Node Height Length\n"); fprintf(outfile, " -------- ---- ---- ------ ------\n"); mlk_describe(outfile, &curtree, 1.0); putc('\n', outfile); if (rctgry && rcategs > 1) { for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; mp[i][j] = j + 1; for (k = 1; k <= rcategs; k++) { if (k != j + 1) { if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { nulike[j] = lambda * probcat[k - 1] * like[k - 1]; mp[i][j] = k; } } } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); } mode = 0.0; mx = 1; for (i = 1; i <= rcategs; i++) { if (probcat[i - 1] * like[i - 1] > mode) { mx = i; mode = probcat[i - 1] * like[i - 1]; } } mx0 = mx; fprintf(outfile, "Combination of categories that contributes the most to the likelihood:\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 1; i <= sites; i++) { fprintf(outfile, "%ld", mx); if (i % 10 == 0) putc(' ', outfile); if (i % 60 == 0 && i != sites) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } mx = mp[i - 1][mx - 1]; } fprintf(outfile, "\n\n"); marginal = (double **) Malloc( sites*sizeof(double *)); for (i = 0; i < sites; i++) marginal[i] = (double *) Malloc( rcategs*sizeof(double)); for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) { nulike[j] /= sum; marginal[i][j] = nulike[j]; } memcpy(like, nulike, rcategs * sizeof(double)); } for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = 0; i < sites; i++) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } marginal[i][j] *= like[j] * probcat[j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); sum = 0.0; for (j = 0; j < rcategs; j++) sum += marginal[i][j]; for (j = 0; j < rcategs; j++) marginal[i][j] /= sum; } fprintf(outfile, "Most probable category at each site if > 0.95"); fprintf(outfile, " probability (\".\" otherwise)\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 0; i < sites; i++) { sum = 0.0; mm = 0; for (j = 0; j < rcategs; j++) if (marginal[i][j] > sum) { sum = marginal[i][j]; mm = j; } if (sum >= 0.95) fprintf(outfile, "%ld", mm+1); else putc('.', outfile); if ((i+1) % 60 == 0) { if (i != 0) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } } else if ((i+1) % 10 == 0) putc(' ', outfile); } putc('\n', outfile); for (i = 0; i < sites; i++) free(marginal[i]); free(marginal); } for (i = 0; i <= sites-1; ++i) free(mp[i]); free(mp); putc('\n', outfile); putc('\n', outfile); putc('\n', outfile); if (hypstate) { fprintf(outfile, "Probable sequences at interior nodes:\n\n"); fprintf(outfile, " node "); for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) putc(' ', outfile); fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n"); if (!rctgry || (rcategs == 1)) mx0 = 1; for (i = 0; i < sites; i += 60) { k = i + 59; if (k >= sites) k = sites - 1; rectrav(curtree.root, i, k); putc('\n', outfile); mx0 = mx1; } } } /* summarize */ void promlk_treeout(node *p) { /* write out file with representation of final tree */ node *sib_ptr; long i, n, w, num_sibs; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { sib_ptr = p; num_sibs = count_sibs(p); putc('(', outtree); col++; for (i=0; i < (num_sibs - 1); i++) { sib_ptr = sib_ptr->next; promlk_treeout(sib_ptr->back); putc(',', outtree); col++; if (col > 55) { putc('\n', outtree); col = 0; } } sib_ptr = sib_ptr->next; promlk_treeout(sib_ptr->back); putc(')', outtree); col++; } if (p == curtree.root) { fprintf(outtree, ";\n"); return; } x = (p->tyme - curtree.nodep[p->back->index - 1]->tyme); if (x > 0.0) w = (long)(0.4342944822 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.4342944822 * log(-x)) + 1; if (w < 0) w = 0; fprintf(outtree, ":%*.5f", (int)(w + 7), x); col += w + 8; } /* promlk_treeout */ void initpromlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; malloc_ppheno((*p), endsite, rcategs); nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); malloc_ppheno(*p, endsite, rcategs); (*p)->index = nodei; break; case tip: match_names_to_data(str, nodep, p, spp); break; case iter: (*p)->initialized = false; (*p)->v = initialv; (*p)->iter = true; if ((*p)->back != NULL) (*p)->back->iter = true; break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); (*p)->v = valyew / divisor; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; case hsnolength: if (usertree && lngths) { printf("Warning: one or more lengths not defined in user tree number %ld.\n", which); printf("PROMLK will attempt to optimize all branch lengths.\n\n"); lngths = false; } break; case unittrwt: curtree.nodep[spp]->iter = false; break; default: /* cases hslength, treewt */ break; /* should never occur */ } } /* initpromlnode */ void tymetrav(node *p, double *x) { /* set up times of nodes */ node *sib_ptr, *q; long i, num_sibs; double xmax; xmax = 0.0; if (!p->tip) { sib_ptr = p; num_sibs = count_sibs(p); for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; tymetrav(sib_ptr->back, x); if (xmax > (*x)) xmax = (*x); } } else (*x) = 0.0; p->tyme = xmax; if (!p->tip) { q = p; while (q->next != p) { q = q->next; q->tyme = p->tyme; } } (*x) = p->tyme - p->v; } /* tymetrav */ void free_all_protx (long nonodes, pointarray treenode) { /* used in proml */ long i, j, k; node *p; /* Zero thru spp are tips, */ for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(treenode[i]->protx[j]); free(treenode[i]->protx); } /* The rest are rings (i.e. triads) */ for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]; for (j = 1; j <= 3; j++) { for (k = 0; k < endsite; k++) free(p->protx[k]); free(p->protx); p = p->next; } } } } /* free_all_protx */ void maketree() { /* constructs a binary tree from the pointers in curtree.nodep, adds each node at location which yields highest likelihood then rearranges the tree for greatest likelihood */ long i, j; long numtrees = 0; double bestlike, gotlike, x; node *item, *nufork, *dummy, *q, *root=NULL; boolean dummy_haslengths, dummy_first, goteof; long max_nonodes; /* Maximum number of nodes required to * express all species in a bifurcating tree * */ long nextnode; long grcategs; pointarray dummy_treenode=NULL; char *treestr; grcategs = (categs > rcategs) ? categs : rcategs; prot_inittable(); if (!usertree) { for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); curtree.root = curtree.nodep[spp]; curtree.root->back = NULL; for (i = 0; i < spp; i++) curtree.nodep[i]->back = NULL; for (i = spp; i < nonodes; i++) { q = curtree.nodep[i]; q->back = NULL; while ((q = q->next) != curtree.nodep[i]) q->back = NULL; } polishing = false; promlk_add(curtree.nodep[enterorder[0]-1], curtree.nodep[enterorder[1]-1], curtree.nodep[spp], false); if (progress) { printf("\nAdding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastsp = false; smoothit = false; for (i = 3; i <= spp; i++) { bestree.likelihood = UNDEFINED; bestyet = UNDEFINED; there = curtree.root; item = curtree.nodep[enterorder[i - 1] - 1]; nufork = curtree.nodep[spp + i - 2]; lastsp = (i == spp); addpreorder(curtree.root, item, nufork, true); promlk_add(there, item, nufork, false); like = prot_evaluate(curtree.root); rearrange(&curtree.root); if (curtree.likelihood > bestree.likelihood) { prot_copy_(&curtree, &bestree, nonodes, grcategs); } if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (lastsp && global) { if (progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = 1; j <= nonodes; j++) if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('-'); fprintf(outfile, "!\n"); } bestlike = bestyet; do { if (progress) printf(" "); gotlike = bestlike; for (j = 0; j < nonodes; j++) { bestyet = UNDEFINED; item = curtree.nodep[j]; if (item != curtree.root) { nufork = curtree.nodep[curtree.nodep[j]->back->index - 1]; promlk_re_move(&item, &nufork, false); there = curtree.root; addpreorder(curtree.root, item, nufork, true); promlk_add(there, item, nufork, false); } if (progress) { if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); } } if (progress) putchar('\n'); } while (bestlike < gotlike); } } if (njumble > 1 && lastsp) { for (i = 0; i < spp; i++ ) promlk_re_move(&curtree.nodep[i], &dummy, false); if (jumb == 1 || bestree2.likelihood < bestree.likelihood) prot_copy_(&bestree, &bestree2, nonodes, grcategs); } if (jumb == njumble) { if (njumble > 1) prot_copy_(&bestree2, &curtree, nonodes, grcategs); else prot_copy_(&bestree, &curtree, nonodes, grcategs); fprintf(outfile, "\n\n"); treevaluate(); curtree.likelihood = prot_evaluate(curtree.root); if (treeprint) mlk_printree(outfile, &curtree); summarize(); if (trout) { col = 0; promlk_treeout(curtree.root); } } } else{ if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); l0gl = (double *)Malloc(shimotrees * sizeof(double)); l0gf = (double **)Malloc(shimotrees * sizeof(double *)); for (i=0; i < shimotrees; ++i) l0gf[i] = (double *)Malloc(endsite * sizeof(double)); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } fprintf(outfile, "\n\n"); which = 1; max_nonodes = nonodes; while (which <= numtrees) { /* These initializations required each time through the loop since multiple trees require re-initialization */ dummy_haslengths = true; nextnode = 0; dummy_first = true; goteof = false; lngths = lengthsopt; nonodes = max_nonodes; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, dummy_treenode, &goteof, &dummy_first, curtree.nodep, &nextnode, &dummy_haslengths, &grbg, initpromlnode, false, nonodes); nonodes = nextnode; root = curtree.nodep[root->index - 1]; curtree.root = root; if (lngths) tymetrav(curtree.root, &x); if (goteof && (which <= numtrees)) { /* if we hit the end of the file prematurely */ fprintf (outfile, "\n"); fprintf (outfile, "ERROR: trees missing at end of file.\n"); fprintf (outfile, "\tExpected number of trees:\t\t%ld\n", numtrees); fprintf (outfile, "\tNumber of trees actually in file:\t%ld.\n\n", which - 1); embExitBad(); } curtree.start = curtree.nodep[0]->back; treevaluate(); if (treeprint) mlk_printree(outfile, &curtree); summarize(); if (trout) { col = 0; promlk_treeout(curtree.root); } if(which < numtrees){ prot_freex_notip(nonodes, curtree.nodep); gdispose(curtree.root, &grbg, curtree.nodep); } which++; } FClose(intree); if (!auto_ && numtrees > 1 && weightsum > 1 ) standev2(numtrees, maxwhich, 0, endsite, maxlogl, l0gl, l0gf, aliasweight, seed); } if (usertree) { free(l0gl); for (i=0; i < shimotrees; i++) free(l0gf[i]); free(l0gf); } prot_freetable(); if (jumb < njumble) return; free(contribution); free(mp); free_all_protx(nonodes2, curtree.nodep); if (!usertree) { free_all_protx(nonodes2, bestree.nodep); if (njumble > 1) free_all_protx(nonodes2, bestree2.nodep); } if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n", outtreename); } free(root); } /* maketree */ void clean_up() { /* Free and/or close stuff */ long i; free (rrate); free (probcat); free (rate); /* Seems to require freeing every time... */ for (i = 0; i < spp; i++) { free (y[i]); } free (y); free (nayme); free (enterorder); free (category); free (weight); free (alias); free (ally); free (location); free (aliasweight); free (probmat); free (eigmat); /* FIXME jumble should never be enabled with usertree * * also -- freetree2 was making memory leak. Since that's * broken and we're currently not bothering to free our * other trees, it makes more sense to me to not free * bestree2. We should fix all of them properly. Doing * that will require a freetree variant that specifically * handles nodes made with prot_allocx if (!usertree && njumble > 1) freetree2(bestree2.nodep, nonodes2); */ FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif } /* clean_up */ int main(int argc, Char *argv[]) { /* Protein Maximum Likelihood with molecular clock */ /* Initialize mlclock.c */ mlclock_init(&curtree, &prot_evaluate); #ifdef MAC argc = 1; /* macsetup("Promlk", ""); */ argv[0] = "Promlk"; #endif init(argc,argv); emboss_getoptions("fpromlk", argc, argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); /* Data set loop */ for ( ith = 1; ith <= datasets; ith++ ) { if ( datasets > 1 ) { fprintf(outfile, "Data set # %ld:\n\n", ith); if ( progress ) printf("\nData set # %ld:\n", ith); } getinput(); if ( ith == 1 ) firstset = false; /* Jumble loop */ if (usertree) { max_num_sibs = 0; maketree(); } else for ( jumb = 1; jumb <= njumble; jumb++ ) { max_num_sibs = 0; maketree(); } } clean_up(); printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Protein Maximum Likelihood with molecular clock */ PHYLIPNEW-3.69.650/src/dnacomp.c0000664000175000017500000007132011305225544012646 00000000000000 #include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxtrees 100 /* maximum number of tied trees stored */ AjPSeqset* seqsets = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; typedef boolean *boolptr; #ifndef OLDC /* function prototypes */ void emboss_getoptions(char *pgm, int argc, char *argv[]); //void getoptions(void); void allocrest(void); void deallocrest(void); void doinit(void); void initdnacompnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void makeweights(void); void doinput(void); void mincomp(long ); void evaluate(node *); void localsavetree(void); void tryadd(node *, node *, node *); void addpreorder(node *, node *, node *); void tryrearr(node *, boolean *); void repreorder(node *, boolean *); void rearrange(node **); void describe(void); void initboolnames(node *, boolean *); void maketree(void); void freerest(void); void standev3(long, long, long, double, double *, long **, longer); void reallocchars(void); /* function prototypes */ #endif extern sequence y; Char infilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; node *root, *p; long chars, col, ith, njumble, jumb, msets, numtrees; long inseed, inseed0; boolean jumble, usertree, trout, weights, progress, stepbox, ancseq, firstset, mulsets, justwts; steptr oldweight, necsteps; pointarray treenode; /* pointers to all nodes in tree */ long *enterorder; Char basechar[32]="ACMGRSVTWYHKDBNO???????????????"; bestelm *bestrees; boolean dummy; longer seed; gbases *garbage; Char ch; Char progname[20]; long *zeros; /* Local variables for maketree, propagated globally for C version: */ long maxwhich; double like, maxsteps, bestyet, bestlike, bstlike2; boolean lastrearr, recompute; double nsteps[maxuser]; long **fsteps; node *there; long *place; boolptr in_tree; baseptr nothing; node *temp, *temp1; node *grbg; void emboss_getoptions(char *pgm, int argc, char *argv[]) { jumble = false; njumble = 1; outgrno = 1; outgropt = false; trout = true; usertree = false; weights = false; justwts = false; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; numtrees = 0; numwts = 0; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { while (phylotrees[numtrees]) numtrees++; usertree = true; } phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); printf("numwts: %d\n", numwts); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); if(!usertree) { njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nDNA compatibility algorithm, version %s\n\n",VERSION); } /* emboss_getoptions */ void reallocchars(void) {/* The amount of chars can change between runs this function reallocates all the variables whose size depends on the amount of chars */ long i; for (i = 0; i < spp; i++) { free(y[i]); y[i] = (Char *)Malloc(chars*sizeof(Char)); } free(weight); free(oldweight); free(enterorder); free(necsteps); free(alias); free(ally); free(location); free(in_tree); weight = (steptr)Malloc(chars*sizeof(long)); oldweight = (steptr)Malloc(chars*sizeof(long)); enterorder = (long *)Malloc(spp*sizeof(long)); necsteps = (steptr)Malloc(chars*sizeof(long)); alias = (steptr)Malloc(chars*sizeof(long)); ally = (steptr)Malloc(chars*sizeof(long)); location = (steptr)Malloc(chars*sizeof(long)); in_tree = (boolptr)Malloc(chars*sizeof(boolean)); } void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc(chars*sizeof(Char)); bestrees = (bestelm *) Malloc(maxtrees*sizeof(bestelm)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1].btree = (long *)Malloc(nonodes*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); weight = (steptr)Malloc(chars*sizeof(long)); oldweight = (steptr)Malloc(chars*sizeof(long)); enterorder = (long *)Malloc(spp*sizeof(long)); necsteps = (steptr)Malloc(chars*sizeof(long)); alias = (steptr)Malloc(chars*sizeof(long)); ally = (steptr)Malloc(chars*sizeof(long)); location = (steptr)Malloc(chars*sizeof(long)); place = (long *)Malloc((2*spp-1)*sizeof(long)); in_tree = (boolptr)Malloc(spp*sizeof(boolean)); } /* allocrest */ void deallocrest() { long i; for (i = 0; i < spp; i++) free(y[i]); free(y); for (i = 0; i < maxtrees; i++) free(bestrees[i].btree); free(bestrees); free(nayme); free(weight); free(oldweight); free(enterorder); free(necsteps); free(alias); free(ally); free(location); free(place); free(in_tree); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &chars, &nonodes, 1); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, chars); alloctree(&treenode, nonodes, usertree); allocrest(); } /* doinit */ void initdnacompnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnutreenode(grbg, p, nodei, endsite, zeros); treenode[nodei - 1] = *p; break; case nonbottom: gnutreenode(grbg, p, nodei, endsite, zeros); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); /* process and discard lengths */ default: break; } } /* initdnacompnode */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= chars; i++) { alias[i - 1] = i; oldweight[i - 1] = weight[i - 1]; ally[i - 1] = i; } sitesort(chars, weight); sitecombine(chars); sitescrunch(chars); endsite = 0; for (i = 1; i <= chars; i++) { if (ally[i - 1] == i) endsite++; } for (i = 1; i <= endsite; i++) location[alias[i - 1] - 1] = i; zeros = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) zeros[i] = 0; } /* makeweights */ void doinput() { /* reads the input data */ long i; if (justwts) { if (firstset) seq_inputdata(seqsets[ith-1], chars); for (i = 0; i < chars; i++) weight[i] = 1; inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); if (justwts) { fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } else { if (!firstset){ samenumspseq(seqsets[ith-1], &chars, ith); reallocchars(); } seq_inputdata(seqsets[ith-1], chars); for (i = 0; i < chars; i++) weight[i] = 1; if (weights) { inputweightsstr(phyloweights->Str[0], chars, weight, &weights); if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } } makeweights(); makevalues(treenode, zeros, usertree); allocnode(&temp, zeros, endsite); allocnode(&temp1, zeros, endsite); } /* doinput */ void mincomp(long n) { /* computes for each site the minimum number of steps necessary to accomodate those species already in the analysis, adding in species n */ long i, j, k, l, m; bases b; long s; boolean allowable, deleted; in_tree[n - 1] = true; for (i = 0; i < endsite; i++) necsteps[i] = 3; for (m = 0; m <= 31; m++) { s = 0; l = -1; k = m; for (b = A; (long)b <= (long)O; b = (bases)((long)b + 1)) { if ((k & 1) == 1) { s |= 1L << ((long)b); l++; } k /= 2; } for (j = 0; j < endsite; j++) { allowable = true; i = 1; while (allowable && i <= spp) { if (in_tree[i - 1] && treenode[i - 1]->base[j] != 0) { if ((treenode[i - 1]->base[j] & s) == 0) allowable = false; } i++; } if (allowable) { if (l < necsteps[j]) necsteps[j] = l; } } } for (j = 0; j < endsite; j++) { deleted = false; for (i = 0; i < spp; i++) { if (in_tree[i] && treenode[i]->base[j] == 0) deleted = true; } if (deleted) necsteps[j]++; } for (i = 0; i < endsite; i++) necsteps[i] *= weight[i]; } /* mincomp */ void evaluate(node *r) { /* determines the number of steps needed for a tree. this is the minimum number of steps needed to evolve sequences on this tree */ long i, term; double sum; sum = 0.0; for (i = 0; i < endsite; i++) { if (r->numsteps[i] == necsteps[i]) term = weight[i]; else term = 0; sum += term; if (usertree && which <= maxuser) fsteps[which - 1][i] = term; } if (usertree && which <= maxuser) { nsteps[which - 1] = sum; if (which == 1) { maxwhich = 1; maxsteps = sum; } else if (sum > maxsteps) { maxwhich = which; maxsteps = sum; } } like = sum; } /* evaluate */ void localsavetree() { /* record in place where each species has to be added to reconstruct this tree */ long i, j; node *p; boolean done; reroot(treenode[outgrno - 1], root); savetraverse(root); for (i = 0; i < nonodes; i++) place[i] = 0; place[root->index - 1] = 1; for (i = 1; i <= spp; i++) { p = treenode[i - 1]; while (place[p->index - 1] == 0) { place[p->index - 1] = i; while (!p->bottom) p = p->next; p = p->back; } if (i > 1) { place[i - 1] = place[p->index - 1]; j = place[p->index - 1]; done = false; while (!done) { place[p->index - 1] = spp + i - 1; while (!p->bottom) p = p->next; p = p->back; done = (p == NULL); if (!done) done = (place[p->index - 1] != j); } } } } /* localsavetree */ void tryadd(node *p, node *item, node *nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ long pos; boolean found; node *rute, *q; if (p == root) fillin(temp, item, p); else { fillin(temp1, item, p); fillin(temp, temp1, p->back); } evaluate(temp); if (lastrearr) { if (like < bestlike) { if (item == nufork->next->next->back) { q = nufork->next; nufork->next = nufork->next->next; nufork->next->next = q; q->next = nufork; } } else if (like >= bstlike2) { recompute = false; add(p, item, nufork, &root, recompute, treenode, &grbg, zeros); rute = root->next->back; localsavetree(); reroot(rute, root); if (like > bstlike2) { bestlike = bstlike2 = like; pos = 1; nextree = 1; addtree(pos, &nextree, dummy, place, bestrees); } else { pos = 0; findtree(&found, &pos, nextree, place, bestrees); if (!found) { if (nextree <= maxtrees) addtree(pos, &nextree, dummy, place, bestrees); } } re_move(item, &nufork, &root, recompute, treenode, &grbg, zeros); recompute = true; } } if (like > bestyet) { bestyet = like; there = p; } } /* tryadd */ void addpreorder(node *p, node *item, node *nufork) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ if (p == NULL) return; tryadd(p, item, nufork); if (!p->tip) { addpreorder(p->next->back, item, nufork); addpreorder(p->next->next->back, item, nufork); } } /* addpreorder */ void tryrearr(node *p, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success := TRUE and keeps the new tree. otherwise, restores the old tree */ node *frombelow, *whereto, *forknode, *q; double oldlike; if (p->back == NULL) return; forknode = treenode[p->back->index - 1]; if (forknode->back == NULL) return; oldlike = bestyet; if (p->back->next->next == forknode) frombelow = forknode->next->next->back; else frombelow = forknode->next->back; whereto = treenode[forknode->back->index - 1]; if (whereto->next->back == forknode) q = whereto->next->next->back; else q = whereto->next->back; fillin(temp1, frombelow, q); fillin(temp, temp1, p); fillin(temp1, temp, whereto->back); evaluate(temp1); if (like <= oldlike + LIKE_EPSILON) { if (p != forknode->next->next->back) return; q = forknode->next; forknode->next = forknode->next->next; forknode->next->next = q; q->next = forknode; return; } recompute = false; re_move(p, &forknode, &root, recompute, treenode, &grbg, zeros); fillin(whereto, whereto->next->back, whereto->next->next->back); recompute = true; add(whereto, p, forknode, &root, recompute, treenode, &grbg, zeros); *success = true; bestyet = like; } /* tryrearr */ void repreorder(node *p, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ if (p == NULL) return; tryrearr(p,success); if (!p->tip) { repreorder(p->next->back,success); repreorder(p->next->next->back,success); } } /* repreorder */ void rearrange(node **r) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ boolean success=true; while (success) { success = false; repreorder(*r,&success); } } /* rearrange */ void describe() { /* prints ancestors, steps and table of numbers of steps in each site and table of compatibilities */ long i, j, k; if (treeprint) { fprintf(outfile, "\ntotal number of compatible sites is "); fprintf(outfile, "%10.1f\n", like); } if (stepbox) { writesteps(chars, weights, oldweight, root); fprintf(outfile, "\n compatibility (Y or N) of each site with this tree:\n\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%ld", i); fprintf(outfile, "\n *----------\n"); for (i = 0; i <= (chars / 10); i++) { putc(' ', outfile); fprintf(outfile, "%3ld !", i * 10); for (j = 0; j <= 9; j++) { k = i * 10 + j; if (k > 0 && k <= chars) { if (root->numsteps[location[ally[k - 1] - 1] - 1] == necsteps[location[ally[k - 1] - 1] - 1]) { if (oldweight[k - 1] > 0) putc('Y', outfile); else putc('y', outfile); } else { if (oldweight[k - 1] > 0) putc('N', outfile); else putc('n', outfile); } } else putc(' ', outfile); } putc('\n', outfile); } } if (ancseq) { hypstates(chars, root, treenode, &garbage, basechar); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout(root, nextree, &col, root); } } /* describe */ void initboolnames(node *p, boolean *names) { /* sets BOOLEANs that indicate tips */ node *q; if (p->tip) { names[p->index - 1] = true; return; } q = p->next; while (q != p) { initboolnames(q->back, names); q = q->next; } } /* initboolnames */ void standev3(long chars, long numtrees, long maxwhich, double maxsteps, double *nsteps, long **fsteps, longer seed) { /* compute and write standard deviation of user trees */ long i, j, k; double wt, sumw, sum, sum2, sd; double temp; double **covar, *P, *f; #define SAMPLES 1000 /* ????? if numtrees too big for Shimo, truncate */ if (numtrees == 2) { fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); fprintf(outfile, "Tree Compatible Difference Its S.D."); fprintf(outfile, " Significantly worse?\n\n"); which = 1; while (which <= numtrees) { fprintf(outfile, "%3ld %11.1f", which, nsteps[which - 1]); if (maxwhich == which) fprintf(outfile, " <------ best\n"); else { sumw = 0.0; sum = 0.0; sum2 = 0.0; for (i = 0; i < chars; i++) { if (weight[i] > 0) { wt = weight[i]; sumw += wt; temp = (fsteps[which - 1][i] - fsteps[maxwhich - 1][i]); sum += temp; sum2 += temp * temp / wt; } } temp = sum / sumw; sd = sqrt(sumw / (sumw - 1.0) * (sum2 - temp * temp)); fprintf(outfile, " %10.1f %11.4f", (maxsteps-nsteps[which - 1]), sd); if (sum > 1.95996 * sd) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } which++; } fprintf(outfile, "\n\n"); } else { /* Shimodaira-Hasegawa test using normal approximation */ fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); covar = (double **)Malloc(numtrees*sizeof(double *)); sumw = 0.0; for (i = 0; i < chars; i++) sumw += weight[i]; for (i = 0; i < numtrees; i++) covar[i] = (double *)Malloc(numtrees*sizeof(double)); for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ sum = nsteps[i]/sumw; for (j = 0; j <=i; j++) { sum2 = nsteps[j]/sumw; temp = 0.0; for (k = 0; k < chars; k++) { if (weight[k] > 0) { wt = weight[k]; temp = temp + wt*(fsteps[i][k]/wt-sum) *(fsteps[j][k]/wt-sum2); } } covar[i][j] = temp; if (i != j) covar[j][i] = temp; } } for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition of trees x trees covariance matrix */ sum = 0.0; for (j = 0; j <= i-1; j++) sum = sum + covar[i][j] * covar[i][j]; temp = sqrt(covar[i][i] - sum); covar[i][i] = temp; for (j = i+1; j < numtrees; j++) { sum = 0.0; for (k = 0; k < i; k++) sum = sum + covar[i][k] * covar[j][k]; if (fabs(temp) < 1.0E-12) covar[j][i] = 0.0; else covar[j][i] = (covar[j][i] - sum)/temp; } } f = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ for (i = 0; i < numtrees; i++) P[i] = 0.0; sum2 = nsteps[0]; /* sum2 will be largest # of compat. sites */ for (i = 1; i < numtrees; i++) if (sum2 < nsteps[i]) sum2 = nsteps[i]; for (i = 1; i < SAMPLES; i++) { /* loop over resampled trees */ for (j = 0; j < numtrees; j++) { /* draw vectors */ sum = 0.0; for (k = 0; k <= j; k++) sum += normrand(seed)*covar[j][k]; f[j] = sum; } sum = f[1]; for (j = 1; j < numtrees; j++) /* get max of vector */ if (f[j] > sum) sum = f[j]; for (j = 0; j < numtrees; j++) /* accumulate P's */ if (sum2-nsteps[j] <= sum-f[j]) P[j] += 1.0/SAMPLES; } fprintf(outfile, "Tree Compatible Difference P value"); fprintf(outfile, " Significantly worse?\n\n"); for (i = 0; i < numtrees; i++) { fprintf(outfile, "%3ld %10.1f", i+1, nsteps[i]); if ((maxwhich-1) == i) fprintf(outfile, " <------ best\n"); else { fprintf(outfile, " %10.1f %10.3f", sum2-nsteps[i], P[i]); if (P[i] < 0.05) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } } fprintf(outfile, "\n"); free(P); /* free the variables we Malloc'ed */ free(f); for (i = 0; i < numtrees; i++) free(covar[i]); free(covar); } } /* standev */ void maketree() { /* constructs a binary tree from the pointers in treenode. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j, nextnode; boolean firsttree, goteof, haslengths; double gotlike; node *item, *nufork, *dummy; pointarray nodep; boolean *names; char* treestr; if (!usertree) { recompute = true; for (i = 0; i < spp; i++) in_tree[i] = false; for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); root = treenode[enterorder[0] - 1]; add(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1], treenode[spp], &root, recompute, treenode, &grbg, zeros); if (progress) { printf("Adding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } in_tree[0] = true; in_tree[1] = true; lastrearr = false; for (i = 3; i <= spp; i++) { mincomp(i); bestyet = -350.0 * spp * chars; item = treenode[enterorder[i - 1] - 1]; nufork = treenode[spp + i - 2]; there = root; addpreorder(root, item, nufork); add(there, item, nufork, &root, recompute, treenode, &grbg, zeros); like = bestyet; rearrange(&root); if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = (i == spp); if (lastrearr) { if (progress) { printf("\nDoing global rearrangements\n"); printf(" !"); for (j = 1; j <= nonodes; j++) if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } bestlike = bestyet; if (jumb == 1) { bstlike2 = bestlike; nextree = 1; } do { if (progress) printf(" "); gotlike = bestlike; for (j = 0; j < nonodes; j++) { bestyet = -10.0 * spp * chars; item = treenode[j]; there = root; if (item != root) { re_move(item, &nufork, &root, recompute, treenode, &grbg, zeros); there = root; addpreorder(root, item, nufork); add(there, item, nufork, &root, recompute, treenode, &grbg, zeros); } if (progress) { if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); } } if (progress) putchar('\n'); } while (bestlike > gotlike); } } if (progress) putchar('\n'); for (i = spp - 1; i >= 1; i--) re_move(treenode[i], &dummy, &root, recompute, treenode, &grbg, zeros); if (jumb == njumble) { if (treeprint) { putc('\n', outfile); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); recompute = false; for (i = 0; i <= (nextree - 2); i++) { root = treenode[0]; add(treenode[0], treenode[1], treenode[spp], &root, recompute, treenode, &grbg, zeros); for (j = 3; j <= spp; j++) add(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1], treenode[spp + j - 2], &root, recompute, treenode, &grbg, zeros); reroot(treenode[outgrno - 1], root); postorder(root); evaluate(root); printree(root, 1.0); describe(); for (j = 1; j < spp; j++) re_move(treenode[j], &dummy, &root, recompute, treenode, &grbg, zeros); } } } else { if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n"); } fsteps = (long **)Malloc(maxuser*sizeof(long *)); for (j = 1; j <= maxuser; j++) fsteps[j - 1] = (long *)Malloc(endsite*sizeof(long)); names = (boolean *)Malloc(spp*sizeof(boolean)); nodep = NULL; maxsteps = 0.0; which = 1; while (which <= numtrees) { firsttree = true; nextnode = 0; haslengths = true; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdnacompnode,false,nonodes); for (j = 0; j < spp; j++) names[j] = false; initboolnames(root, names); for (j = 0; j < spp; j++) in_tree[j] = names[j]; j = 1; while (!in_tree[j - 1]) j++; mincomp(j); ajUtilCatch(); if (outgropt) reroot(treenode[outgrno - 1], root); postorder(root); evaluate(root); printree(root, 1.0); describe(); which++; } FClose(intree); putc('\n', outfile); if (numtrees > 1 && chars > 1 ) { standev3(chars, numtrees, maxwhich, maxsteps, nsteps, fsteps, seed); } for (j = 1; j <= maxuser; j++) free(fsteps[j - 1]); free(fsteps); free(names); } if (jumb == njumble) { if (progress) { printf("Output written to file \"%s\"\n", outfilename); if (trout) printf("\nTrees also written onto file \"%s\"\n", outtreename); putchar('\n'); } } } /* maketree */ void freerest() { if (!usertree) { freenode(&temp); freenode(&temp1); } freegrbg(&grbg); if (ancseq) freegarbage(&garbage); free(zeros); freenodes(nonodes, treenode); } /* freerest */ int main(int argc, Char *argv[]) { /* DNA compatibility by uphill search */ /* reads in spp, chars, and the data. Then calls maketree to construct the tree */ #ifdef MAC argc = 1; /* macsetup("Dnacomp",""); */ argv[0]="Dnacomp"; #endif init(argc, argv); emboss_getoptions("fdnacomp", argc, argv); garbage = NULL; grbg = NULL; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= msets; ith++) { doinput(); if (ith == 1) firstset = false; if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n", ith); if (progress) printf("Data set # %ld:\n\n", ith); } for (jumb = 1; jumb <= njumble; jumb++) maketree(); freerest(); } freetree(nonodes, treenode); FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif ajSeqsetDelarray(&seqsets); ajPhyloPropDel(&phyloweights); ajPhyloTreeDelarray(&phylotrees); ajFileClose(&embossoutfile); ajFileClose(&embossouttree); deallocrest(); embExit(); return 0; } /* DNA compatibility by uphill search */ PHYLIPNEW-3.69.650/src/moves.c0000664000175000017500000001375210775447511012375 00000000000000 #include "phylip.h" #include "moves.h" void inpnum(long *n, boolean *success) { /* used by dnamove, dolmove, move, & retree */ int fields; char line[100]; #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); getstryng(line); *n = atof(line); fields = sscanf(line,"%ld",n); *success = (fields == 1); } /* inpnum */ void prereverse(boolean ansi) { /* turn on reverse video */ printf(ansi ? "\033[7m": ""); } /* prereverse */ void postreverse(boolean ansi) { /* turn off reverse video */ printf(ansi ? "\033[0m" : ""); } /* postreverse */ void chwrite(Char ch, long num, long *pos, long leftedge, long screenwidth) { long i; for (i = 1; i <= num; i++) { if ((*pos) >= leftedge && (*pos) - leftedge + 1 < screenwidth) putchar(ch); (*pos)++; } } /* chwrite */ void nnwrite(long nodenum,long num,long *pos,long leftedge,long screenwidth) { long i, leftx; leftx = leftedge - (*pos); if ((*pos) >= leftedge && (*pos) - leftedge + num < screenwidth) printf("%*ld", (int)num, nodenum); else if (leftx > 0 && leftx < 3) for(i=0;i= leftedge && (*pos) - leftedge + 1 < screenwidth) printf("%*s", (int)length, s); (*pos) += length; } /* stwrite */ void help(const char *letters) { /* display help information */ char input[100]; printf("\n\nR Rearrange a tree by moving a node or group\n"); printf("# Show the states of the next %s that doesn't fit tree\n", letters); printf("+ Show the states of the next %s\n", letters); printf("- ... of the previous %s\n", letters); printf("S Show the states of a given %s\n", letters); printf(". redisplay the same tree again\n"); printf("T Try all possible positions of a node or group\n"); printf("U Undo the most recent rearrangement\n"); printf("W Write tree to a file\n"); printf("O select an Outgroup for the tree\n"); printf("F Flip (rotate) branches at a node\n"); printf("H Move viewing window to the left\n"); printf("J Move viewing window downward\n"); printf("K Move viewing window upward\n"); printf("L Move viewing window to the right\n"); printf("C show only one Clade (subtree) (useful if tree is too big)\n"); printf("? Help (this screen)\n"); printf("Q (Quit) Exit from program\n"); printf("X Exit from program\n\n\n"); printf("TO CONTINUE, PRESS ON THE Return OR Enter KEY"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); getstryng(input); } /* help */ void window(adjwindow action, long *leftedge, long *topedge, long hscroll, long vscroll, long treelines, long screenlines, long screenwidth, long farthest, boolean subtree) { /* move viewing window of tree */ switch (action) { case left: if (*leftedge != 1) *leftedge -= hscroll; break; case downn: /* The 'topedge + 6' is needed to allow downward scrolling when part of the tree is above the screen and only 1 or 2 lines are below it. */ if (treelines - *topedge + 6 >= screenlines) *topedge += vscroll; break; case upp: if (*topedge != 1) *topedge -= vscroll; break; case right: if ((farthest + 6 + nmlngth + ((subtree) ? 8 : 0)) > (*leftedge + screenwidth)) *leftedge += hscroll; break; } } /* window */ void pregraph(boolean ansi) { /* turn on graphic characters */ /* used in move & dolmove */ printf(ansi ? "\033(0" : ""); } /* pregraph */ void pregraph2(boolean ansi) { /* turn on graphic characters */ /* used in dnamove & retree */ if (ansi) { printf("\033(0"); printf("\033[10m"); } } /* pregraph2 */ void postgraph(boolean ansi) { /* turn off graphic characters */ /* used in move & dolmove */ printf(ansi ? "\033(B" : ""); } /* postgraph */ void postgraph2(boolean ansi) { /* turn off graphic characters */ /* used in dnamove & retree */ if (ansi) { printf("\033[11m"); printf("\033(B"); } } /* postgraph2 */ void nextinc(long *dispchar, long *dispword, long *dispbit, long chars, long bits, boolean *display, steptr numsteps, steptr weight) { /* show next incompatible character */ /* used in move & dolmove */ long disp0; boolean done; *display = true; disp0 = *dispchar; done = false; do { (*dispchar)++; if (*dispchar > chars) { *dispchar = 1; done = (disp0 == 0); } } while (!(numsteps[*dispchar - 1] > weight[*dispchar - 1] || *dispchar == disp0 || done)); *dispword = (*dispchar - 1) / bits + 1; *dispbit = (*dispchar - 1) % bits + 1; } /* nextinc */ void nextchar(long *dispchar, long *dispword, long *dispbit, long chars, long bits, boolean *display) { /* show next character */ /* used in move & dolmove */ *display = true; (*dispchar)++; if (*dispchar > chars) *dispchar = 1; *dispword = (*dispchar - 1) / bits + 1; *dispbit = (*dispchar - 1) % bits + 1; } /* nextchar */ void prevchar(long *dispchar, long *dispword, long *dispbit, long chars, long bits, boolean *display) { /* show previous character */ /* used in move & dolmove */ *display = true; (*dispchar)--; if (*dispchar < 1) *dispchar = chars; *dispword = (*dispchar - 1) / bits + 1; *dispbit = (*dispchar - 1) % bits + 1; } /* prevchar */ void show(long *dispchar, long *dispword, long *dispbit, long chars, long bits, boolean *display) { /* used in move & dolmove */ long i; boolean ok; do { printf("SHOW: (Character number or 0 to see none)? "); inpnum(&i, &ok); ok = (ok && (i == 0 || (i >= 1 && i <= chars))); if (ok && i != 0) { *display = true; *dispchar = i; *dispword = (i - 1) / bits + 1; *dispbit = (i - 1) % bits + 1; } if (ok && i == 0) *display = false; } while (!ok); } /* show */ PHYLIPNEW-3.69.650/src/move.c0000664000175000017500000011314611305225544012176 00000000000000 #include "phylip.h" #include "disc.h" #include "moves.h" #include "wagner.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define overr 4 #define which 1 AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylomix = NULL; AjPPhyloProp phylofact = NULL; AjPPhyloTree* phylotrees = NULL; typedef enum { horiz, vert, up, overt, upcorner, downcorner, onne, zerro, question } chartype; typedef enum { arb, use, spec } howtree; typedef enum { rearr, flipp, reroott, none } rearrtype; #ifndef OLDC /*function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void inputoptions(void); void allocrest(void); void doinput(void); void configure(void); void prefix(chartype); void postfix(chartype); void makechar(chartype); void move_fillin(node *); void move_postorder(node *); void evaluate(node *); void reroot(node *); void move_filltrav(node *); void move_hyptrav(node *); void move_hypstates(void); void grwrite(chartype, long, long *); void move_drawline(long, long); void move_printree(void); void arbitree(void); void yourtree(void); void initmovenode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void buildtree(void); void rearrange(void); void tryadd(node *, node *, node *, double *); void addpreorder(node *, node *, node *, double *); void try(void); void undo(void); void treewrite(boolean); void clade(void); void flip(void); void changeoutgroup(void); void redisplay(void); void treeconstruct(void); /*function prototypes */ #endif char infilename[FNMLNGTH],intreename[FNMLNGTH], weightfilename[FNMLNGTH], ancfilename[FNMLNGTH], mixfilename[FNMLNGTH], factfilename[FNMLNGTH]; const char* outtreename; AjPFile embossouttree; node *root; long outgrno, screenlines, col, treelines, leftedge, topedge, vmargin, hscroll, vscroll, scrollinc, screenwidth, farthest; /* outgrno indicates outgroup */ boolean weights, outgropt, ancvar, questions, allsokal, allwagner, mixture, factors, noroot, waswritten; boolean *ancone, *anczero, *ancone0, *anczero0; Char *factor; pointptr treenode; /* pointers to all nodes in tree */ double threshold; double *threshwt; bitptr wagner, wagner0; unsigned char che[9]; boolean reversed[9]; boolean graphic[9]; howtree how; gbit *garbage; char* progname; Char ch; /* Variables for treeread */ boolean usertree, goteof, firsttree, haslengths; pointarray nodep; node *grbg; long *zeros; /* Local variables for treeconstruct, propagated globally for C vesion: */ long dispchar, dispword, dispbit, atwhat, what, fromwhere, towhere, oldoutgrno, compatible; double like, bestyet, gotlike; Char *guess; boolean display, newtree, changed, subtree, written, oldwritten, restoring, wasleft, oldleft, earlytree; boolean *in_tree; steptr numsteps, numsone, numszero; long fullset; bitptr steps, zeroanc, oneanc; node *nuroot; rearrtype lastop; boolean *names; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr initialtree = NULL; AjPStr method = NULL; how = arb; usertree = false; goteof = false; outgrno = 1; outgropt = false; weights = false; ancvar = false; allsokal = false; allwagner = true; mixture = false; factors = false; screenlines = 24; scrollinc = 20; screenwidth = 80; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; threshold = ajAcdGetFloat("threshold"); method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "w")) allwagner = true; else if(ajStrMatchC(method, "c")) allsokal = true; else if(ajStrMatchC(method, "m")) { mixture = allwagner = true; phylomix = ajAcdGetProperties("mixfile"); } initialtree = ajAcdGetListSingle("initialtree"); if(ajStrMatchC(initialtree, "a")) how = arb; if(ajStrMatchC(initialtree, "u")) how = use; if(ajStrMatchC(initialtree, "s")) { how = spec; phylotrees = ajAcdGetTree("intreefile"); usertree = true; } phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; phylofact = ajAcdGetProperties("factorfile"); if(phylofact) factors = true; screenwidth = ajAcdGetInt("screenwidth"); screenlines = ajAcdGetInt("screenlines"); if (scrollinc < screenwidth / 2.0) hscroll = scrollinc; else hscroll = screenwidth / 2; if (scrollinc < screenlines / 2.0) vscroll = scrollinc; else vscroll = screenlines / 2; embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } /*emboss_getoptions */ void inputoptions() { /* input the information on the options */ long i; for (i = 0; i < (chars); i++) weight[i] = 1; if (ancvar) inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); if (factors) { factor = (Char *)Malloc(chars*sizeof(Char)); inputfactorsstr(phylofact->Str[0], chars, factor, &factors); } if (mixture) inputmixturestr(phylomix->Str[0], wagner0); if (weights) inputweightsstr(phyloweights->Str[0], chars, weight, &weights); putchar('\n'); if (weights) printweights(stdout, 0, chars, weight, "Characters"); for (i = 0; i < (words); i++) { if (mixture) wagner[i] = wagner0[i]; else if (allsokal) wagner[i] = 0; else wagner[i] = (1L << (bits + 1)) - (1L << 1); } if (allsokal && !mixture) printf("Camin-Sokal parsimony method\n\n"); if (allwagner && !mixture) printf("Wagner parsimony method\n\n"); if (mixture) printmixture(stdout, wagner); for (i = 0; i < (chars); i++) { if (!ancvar) { anczero[i] = true; ancone[i] = (((1L << (i % bits + 1)) & wagner[i / bits]) != 0); } else { anczero[i] = anczero0[i]; ancone[i] = ancone0[i]; } } if (factors) printfactors(stdout, chars, factor, ""); if (ancvar) printancestors(stdout, anczero, ancone); noroot = true; questions = false; for (i = 0; i < (chars); i++) { if (weight[i] > 0) { noroot = (noroot && ancone[i] && anczero[i] && ((((1L << (i % bits + 1)) & wagner[i / bits]) != 0) || threshold <= 2.0)); } questions = (questions || (ancone[i] && anczero[i])); threshwt[i] = threshold * weight[i]; } } /* inputoptions */ void allocrest() { nayme = (naym *)Malloc(spp*sizeof(naym)); in_tree = (boolean *)Malloc(nonodes*sizeof(boolean)); extras = (steptr)Malloc(chars*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); numsteps = (steptr)Malloc(chars*sizeof(long)); numsone = (steptr)Malloc(chars*sizeof(long)); numszero = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); guess = (Char *)Malloc(chars*sizeof(Char)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); wagner = (bitptr)Malloc(words*sizeof(long)); wagner0 = (bitptr)Malloc(words*sizeof(long)); steps = (bitptr)Malloc(words*sizeof(long)); zeroanc = (bitptr)Malloc(words*sizeof(long)); oneanc = (bitptr)Malloc(words*sizeof(long)); } /* allocrest */ void doinput() { /* reads the input data */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); words = chars / bits + 1; printf("%2ld species, %3ld characters\n", spp, chars); alloctree(&treenode); setuptree(treenode); allocrest(); inputoptions(); disc_inputdata(phylostates[0], treenode, true, false, stdout); } /* doinput */ void configure() { /* configure to machine -- set up special characters */ chartype a; for (a = horiz; (long)a <= (long)question; a = (chartype)((long)a + 1)) reversed[(long)a] = false; for (a = horiz; (long)a <= (long)question; a = (chartype)((long)a + 1)) graphic[(long)a] = false; if (ibmpc) { che[(long)horiz] = 205; graphic[(long)horiz] = true; che[(long)vert] = 186; graphic[(long)vert] = true; che[(long)up] = 186; graphic[(long)up] = true; che[(long)overt] = 205; graphic[(long)overt] = true; che[(long)onne] = 219; reversed[(long)onne] = true; che[(long)zerro] = 176; graphic[(long)zerro] = true; che[(long)question] = 178; /* or try CHR(177) */ graphic[(long)question] = true; che[(long)upcorner] = 200; graphic[(long)upcorner] = true; che[(long)downcorner] = 201; graphic[(long)downcorner] = true; return; } if (ansi) { che[(long)onne] = ' '; reversed[(long)onne] = true; che[(long)horiz] = che[(long)onne]; reversed[(long)horiz] = true; che[(long)vert] = che[(long)onne]; reversed[(long)vert] = true; che[(long)up] = 'x'; graphic[(long)up] = true; che[(long)overt] = 'q'; graphic[(long)overt] = true; che[(long)zerro] = 'a'; graphic[(long)zerro] = true; reversed[(long)zerro] = true; che[(long)question] = '?'; reversed[(long)question] = true; che[(long)upcorner] = 'm'; graphic[(long)upcorner] = true; che[(long)downcorner] = 'l'; graphic[(long)downcorner] = true; return; } che[(long)horiz] = '='; che[(long)vert] = ' '; che[(long)up] = '!'; che[(long)overt] = '-'; che[(long)onne] = '*'; che[(long)zerro] = '='; che[(long)question] = '.'; che[(long)upcorner] = '`'; che[(long)downcorner] = ','; } /* configure */ void prefix(chartype a) { /* give prefix appropriate for this character */ if (reversed[(long)a]) prereverse(ansi); if (graphic[(long)a]) pregraph(ansi); } /* prefix */ void postfix(chartype a) { /* give postfix appropriate for this character */ if (reversed[(long)a]) postreverse(ansi); if (graphic[(long)a]) postgraph(ansi); } /* postfix */ void makechar(chartype a) { /* print out a character with appropriate prefix or postfix */ prefix(a); putchar(che[(long)a]); postfix(a); } /* makechar */ void move_fillin(node *p) { /* Sets up for each node in the tree two statesets. stateone and statezero are the sets of character states that must be 1 or must be 0, respectively, in a most parsimonious reconstruction, based on the information at or above this node. Note that this state assignment may change based on information further down the tree. If a character is in both sets it is in state "P". If in neither, it is "?". */ long i; long l0, l1, r0, r1, st, wa, za, oa; for (i = 0; i < (words); i++) { l0 = p->next->back->statezero[i]; l1 = p->next->back->stateone[i]; r0 = p->next->next->back->statezero[i]; r1 = p->next->next->back->stateone[i]; wa = wagner[i]; za = zeroanc[i]; oa = oneanc[i]; st = (l1 & r0) | (l0 & r1); steps[i] = st; p->stateone[i] = (l1 | r1) & (~(st & (wa | za))); p->statezero[i] = (l0 | r0) & (~(st & (wa | oa))); } } /* move_fillin */ void move_postorder(node *p) { /* traverses a binary tree, calling function fillin at a node's descendants before calling fillin at the node */ if (p->tip) return; move_postorder(p->next->back); move_postorder(p->next->next->back); move_fillin(p); count(steps, zeroanc, numszero, numsone); } /* move_postorder */ void evaluate(node *r) { /* Determines the number of steps needed for a tree. This is the minimum number needed to evolve chars on this tree */ long i, stepnum, smaller; double sum; boolean nextcompat, thiscompat, done; sum = 0.0; for (i = 0; i < (chars); i++) { numszero[i] = 0; numsone[i] = 0; } for (i = 0; i < (words); i++) { zeroanc[i] = fullset; oneanc[i] = 0; } compatible = 0; nextcompat = true; move_postorder(r); count(r->stateone, zeroanc, numszero, numsone); for (i = 0; i < (words); i++) { zeroanc[i] = 0; oneanc[i] = fullset; } move_postorder(r); count(r->statezero, zeroanc, numszero, numsone); for (i = 0; i < (chars); i++) { smaller = spp * weight[i]; numsteps[i] = smaller; if (anczero[i]) { numsteps[i] = numszero[i]; smaller = numszero[i]; } if (ancone[i] && numsone[i] < smaller) numsteps[i] = numsone[i]; stepnum = numsteps[i] + extras[i]; if (stepnum <= threshwt[i]) sum += stepnum; else sum += threshwt[i]; thiscompat = (stepnum <= weight[i]); if (factors) { done = (i + 1 == chars); if (!done) done = (factor[i + 1] != factor[i]); nextcompat = (nextcompat && thiscompat); if (done) { if (nextcompat) compatible += weight[i]; nextcompat = true; } } else if (thiscompat) compatible += weight[i]; guess[i] = '?'; if (!ancone[i] || (anczero[i] && numszero[i] < numsone[i])) guess[i] = '0'; else if (!anczero[i] || (ancone[i] && numsone[i] < numszero[i])) guess[i] = '1'; } like = -sum; } /* evaluate */ void reroot(node *outgroup) { /* reorients tree, putting outgroup in desired position. */ node *p, *q, *newbottom, *oldbottom; boolean onleft; if (outgroup->back->index == root->index) return; newbottom = outgroup->back; p = treenode[newbottom->index - 1]->back; while (p->index != root->index) { oldbottom = treenode[p->index - 1]; treenode[p->index - 1] = p; p = oldbottom->back; } onleft = (p == root->next); if (restoring) if (!onleft && wasleft){ p = root->next->next; q = root->next; } else { p = root->next; q = root->next->next; } else { if (onleft) oldoutgrno = root->next->next->back->index; else oldoutgrno = root->next->back->index; wasleft = onleft; p = root->next; q = root->next->next; } p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; if (restoring) { if (!onleft && wasleft) { outgroup->back->back = root->next; outgroup->back = root->next->next; } else { outgroup->back->back = root->next->next; outgroup->back = root->next; } } else { outgroup->back->back = root->next->next; outgroup->back = root->next; } treenode[newbottom->index - 1] = newbottom; } /* reroot */ void move_filltrav(node *r) { /* traverse to fill in interior node states */ if (r->tip) return; move_filltrav(r->next->back); move_filltrav(r->next->next->back); move_fillin(r); } /* move_filltrav */ void move_hyptrav(node *r) { /* compute states at one interior node */ long i; boolean bottom; long l0, l1, r0, r1, s0, s1, a0, a1, temp, wa; gbit *zerobelow = NULL, *onebelow = NULL; disc_gnu(&zerobelow, &garbage); disc_gnu(&onebelow, &garbage); bottom = (r->back == NULL); if (bottom) { memcpy(zerobelow->bits_, zeroanc, words*sizeof(long)); memcpy(onebelow->bits_, oneanc, words*sizeof(long)); } else { memcpy(zerobelow->bits_, treenode[r->back->index - 1]->statezero, words*sizeof(long)); memcpy(onebelow->bits_, treenode[r->back->index - 1]->stateone, words*sizeof(long)); } for (i = 0; i < (words); i++) { s0 = r->statezero[i]; s1 = r->stateone[i]; a0 = zerobelow->bits_[i]; a1 = onebelow->bits_[i]; if (!r->tip) { wa = wagner[i]; l0 = r->next->back->statezero[i]; l1 = r->next->back->stateone[i]; r0 = r->next->next->back->statezero[i]; r1 = r->next->next->back->stateone[i]; s0 = (wa & ((a0 & l0) | (a0 & r0) | (l0 & r0))) | (fullset & (~wa) & s0); s1 = (wa & ((a1 & l1) | (a1 & r1) | (l1 & r1))) | (fullset & (~wa) & s1); temp = fullset & (~(s0 | s1 | l1 | l0 | r1 | r0)); s0 |= temp & a0; s1 |= temp & a1; r->statezero[i] = s0; r->stateone[i] = s1; } } if (((1L << dispbit) & r->stateone[dispword - 1]) != 0) { if (((1L << dispbit) & r->statezero[dispword - 1]) != 0) r->state = '?'; else r->state = '1'; } else { if (((1L << dispbit) & r->statezero[dispword - 1]) != 0) r->state = '0'; else r->state = '?'; } if (!r->tip) { move_hyptrav(r->next->back); move_hyptrav(r->next->next->back); } disc_chuck(zerobelow, &garbage); disc_chuck(onebelow, &garbage); } /* move_hyptrav */ void move_hypstates() { /* fill in and describe states at interior nodes */ long i, j, k; for (i = 0; i < (words); i++) { zeroanc[i] = 0; oneanc[i] = 0; } for (i = 0; i < (chars); i++) { j = i / bits + 1; k = i % bits + 1; if (guess[i] == '0') zeroanc[j - 1] = ((long)zeroanc[j - 1]) | (1L << k); if (guess[i] == '1') oneanc[j - 1] = ((long)oneanc[j - 1]) | (1L << k); } move_filltrav(root); move_hyptrav(root); } /* move_hypstates */ void grwrite(chartype c, long num, long *pos) { long i; prefix(c); for (i = 1; i <= num; i++) { if ((*pos) >= leftedge && (*pos) - leftedge + 1 < screenwidth) putchar(che[(long)c]); (*pos)++; } postfix(c); } /* grwrite */ void move_drawline(long i, long lastline) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first =NULL, *last =NULL; long n, j, pos; boolean extra, done; Char st; chartype c, d; pos = 1; p = nuroot; q = nuroot; extra = false; if (i == (long)p->ycoord && (p == root || subtree)) { extra = true; c = overt; if (display) { switch (p->state) { case '1': c = onne; break; case '0': c = zerro; break; case '?': c = question; break; } } if ((subtree)) stwrite("Subtree:", 8, &pos, leftedge, screenwidth); if (p->index >= 100) nnwrite(p->index, 3, &pos, leftedge, screenwidth); else if (p->index >= 10) { grwrite(c, 1, &pos); nnwrite(p->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(p->index, 1, &pos, leftedge, screenwidth); } } else { if (subtree) stwrite(" ", 10, &pos, leftedge, screenwidth); else stwrite(" ", 2, &pos, leftedge, screenwidth); } do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)p->xcoord - (long)q->xcoord; if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if ((long)q->ycoord > (long)p->ycoord) d = upcorner; else d = downcorner; c = overt; if (display) { switch (q->state) { case '1': c = onne; break; case '0': c = zerro; break; case '?': c = question; break; } d = c; } if (n > 1) { grwrite(d, 1, &pos); grwrite(c, n - 3, &pos); } if (q->index >= 100) nnwrite(q->index, 3, &pos, leftedge, screenwidth); else if (q->index >= 10) { grwrite(c, 1, &pos); nnwrite(q->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(q->index, 1, &pos, leftedge, screenwidth); } extra = true; } else if (!q->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { c = up; if (i < (long)p->ycoord) st = p->next->back->state; else st = p->next->next->back->state; if (display) { switch (st) { case '1': c = onne; break; case '0': c = zerro; break; case '?': c = question; break; } } grwrite(c, 1, &pos); chwrite(' ', n - 1, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); if (p != q) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { n = 0; for (j = 1; j <= nmlngth; j++) { if (nayme[p->index - 1][j - 1] != '\0') n = j; } chwrite(':', 1, &pos, leftedge, screenwidth); for (j = 0; j < n; j++) chwrite(nayme[p->index - 1][j], 1, &pos, leftedge, screenwidth); } putchar('\n'); } /* move_drawline */ void move_printree() { /* prints out diagram of the tree */ long tipy, i, dow; if (!subtree) nuroot = root; if (changed || newtree) evaluate(root); if (display) move_hypstates(); if (ansi || ibmpc) printf("\033[2J\033[H"); else putchar('\n'); tipy = 1; dow = down; if (spp * dow > screenlines && !subtree) { dow--; } if (noroot) printf("(unrooted)"); if (display) { printf(" "); makechar(onne); printf(":1 "); makechar(question); printf(":? "); makechar(zerro); printf(":0 "); } else printf(" "); if (!earlytree) { printf("%10.1f Steps", -like); } if (display) printf(" CHAR%3ld", dispchar); else printf(" "); if (!earlytree) { printf(" %3ld chars compatible\n", compatible); } printf(" "); if (changed && !earlytree) { if (-like < bestyet) { printf(" BEST YET!"); bestyet = -like; } else if (fabs(-like - bestyet) < 0.000001) printf(" (as good as best)"); else { if (-like < gotlike) printf(" better"); else if (-like > gotlike) printf(" worse!"); } } printf("\n"); farthest = 0; coordinates(nuroot, &tipy, 1.5, &farthest); vmargin = 4; treelines = tipy - dow; if (topedge != 1) { printf("** %ld lines above screen **\n", topedge - 1); vmargin++; } if ((treelines - topedge + 1) > (screenlines - vmargin)) vmargin++; for (i = 1; i <= treelines; i++) { if (i >= topedge && i < topedge + screenlines - vmargin) move_drawline(i, treelines); } if ((treelines - topedge + 1) > (screenlines - vmargin)) { printf("** %ld", treelines - (topedge - 1 + screenlines - vmargin)); printf(" lines below screen **\n"); } if (treelines - topedge + vmargin + 1 < screenlines) putchar('\n'); gotlike = -like; changed = false; } /* move_printree */ void arbitree() { long i; root = treenode[0]; add2(treenode[0], treenode[1], treenode[spp], &root, restoring, wasleft, treenode); for (i = 3; i <= (spp); i++) add2(treenode[spp+ i - 3], treenode[i - 1], treenode[spp + i - 2], &root, restoring, wasleft, treenode); for (i = 0; i < (nonodes); i++) in_tree[i] = true; } /* arbitree */ void yourtree() { long i, j; boolean ok; root = treenode[0]; add2(treenode[0], treenode[1], treenode[spp], &root, restoring, wasleft, treenode); i = 2; do { i++; move_printree(); printf("\nAdd species%3ld: \n", i); printf(" \n"); for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); do { printf("\nbefore node (type number): "); inpnum(&j, &ok); ok = (ok && ((j >= 1 && j < i) || (j > spp && j < spp + i - 1))); if (!ok) printf("Impossible number. Please try again:\n"); } while (!ok); add2(treenode[j - 1], treenode[i - 1], treenode[spp + i - 2], &root, restoring, wasleft, treenode); } while (i != spp); for (i = 0; i < (nonodes); i++) in_tree[i] = true; } /* yourtree */ void initmovenode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ /* LM 7/27 I added this function and the commented lines around */ /* treeread() to get the program running, but all 4 move programs*/ /* are improperly integrated into the v4.0 support files. As is */ /* this is a patchwork function */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnutreenode(grbg, p, nodei, chars, zeros); treenode[nodei - 1] = *p; break; case nonbottom: gnutreenode(grbg, p, nodei, chars, zeros); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); /* process lengths and discard */ default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/ break; } } /* initmovenode */ void buildtree() { long i, j, nextnode; node *p; char* treestr; changed = false; newtree = false; switch (how) { case arb: arbitree(); break; case use: names = (boolean *)Malloc(spp*sizeof(boolean)); firsttree = true; /**/ nodep = NULL; /**/ nextnode = 0; /**/ haslengths = 0; /**/ zeros = (long *)Malloc(chars*sizeof(long)); /**/ for (i = 0; i < chars; i++) /**/ zeros[i] = 0; /**/ treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initmovenode,false,nonodes); for (i = spp; i < (nonodes); i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->stateone = (bitptr)Malloc(words*sizeof(long)); p->statezero = (bitptr)Malloc(words*sizeof(long)); p = p->next; } } /* debug: see comment at initmovenode() */ /* treeread(which, ch, &root, treenode, names);*/ for (i = 0; i < (spp); i++) in_tree[i] = names[i]; free(names); FClose(intree); break; case spec: yourtree(); break; } if (!outgropt) outgrno = root->next->back->index; if (outgropt && in_tree[outgrno - 1]) reroot(treenode[outgrno - 1]); } /* buildtree */ void rearrange() { long i, j; boolean ok1, ok2; node *p, *q; printf("Remove everything to the right of which node? "); inpnum(&i, &ok1); ok1 = (ok1 && i >= 1 && i < spp * 2 && i != root->index); if (ok1) { printf("Add before which node? "); inpnum(&j, &ok2); ok2 = (ok2 && j >= 1 && j < spp * 2); if (ok2) { ok2 = (treenode[j - 1] != treenode[treenode[i - 1]->back->index - 1]); p = treenode[j - 1]; while (p != root) { ok2 = (ok2 && p != treenode[i - 1]); p = treenode[p->back->index - 1]; } if (ok1 && ok2) { what = i; q = treenode[treenode[i - 1]->back->index - 1]; if (q->next->back->index == i) fromwhere = q->next->next->back->index; else fromwhere = q->next->back->index; towhere = j; re_move2(&treenode[i - 1], &q, &root, &wasleft, treenode); add2(treenode[j - 1], treenode[i - 1], q, &root, restoring, wasleft, treenode); } lastop = rearr; } } changed = (ok1 && ok2); move_printree(); if (!(ok1 && ok2)) printf("Not a possible rearrangement. Try again: "); else { oldwritten =written; written = false; } } /* rearrange */ void tryadd(node *p, node *item, node *nufork, double *place) { /* temporarily adds one fork and one tip to the tree. Records scores in array place */ add2(p, item, nufork, &root, restoring, wasleft, treenode); evaluate(root); place[p->index - 1] = -like; re_move2(&item, &nufork, &root, &wasleft, treenode); } /* tryadd */ void addpreorder(node *p, node *item, node *nufork, double *place) { /* traverses a binary tree, calling function tryadd at a node before calling tryadd at its descendants */ if (p == NULL) return; tryadd(p,item,nufork,place); if (!p->tip) { addpreorder(p->next->back, item, nufork, place); addpreorder(p->next->next->back, item, nufork, place); } } /* addpreorder */ void try() { /* Remove node, try it in all possible places */ double *place; long i, j, oldcompat; double current; node *q, *dummy, *rute; boolean tied, better, ok; printf("Try other positions for which node? "); inpnum(&i, &ok); if (!(ok && i >= 1 && i <= nonodes && i != root->index)) { printf("Not a possible choice! "); return; } printf("WAIT ...\n"); place = (double *)Malloc(nonodes*sizeof(double)); for (j = 0; j < (nonodes); j++) place[j] = -1.0; evaluate(root); current = -like; oldcompat = compatible; what = i; q = treenode[treenode[i - 1]->back->index - 1]; if (q->next->back->index == i) fromwhere = q->next->next->back->index; else fromwhere = q->next->back->index; rute = root; if (root->index == treenode[i - 1]->back->index) { if (treenode[treenode[i - 1]->back->index - 1]->next->back == treenode[i - 1]) rute = treenode[treenode[i - 1]->back->index - 1]->next->next->back; else rute = treenode[treenode[i - 1]->back->index - 1]->next->back; } re_move2(&treenode[i - 1], &dummy, &root, &wasleft, treenode); oldleft = wasleft; root = rute; addpreorder(root, treenode[i - 1], dummy, place); wasleft =oldleft; restoring = true; add2(treenode[fromwhere - 1], treenode[what - 1],dummy, &root, restoring, wasleft, treenode); like = -current; compatible = oldcompat; restoring = false; better = false; printf(" BETTER: "); for (j = 1; j <= (nonodes); j++) { if (place[j - 1] < current && place[j - 1] >= 0.0) { printf("%3ld:%6.2f", j, place[j - 1]); better = true; } } if (!better) printf(" NONE"); printf("\n TIED: "); tied = false; for (j = 1; j <= (nonodes); j++) { if (fabs(place[j - 1] - current) < 1.0e-6 && j != fromwhere) { if (j < 10) printf("%2ld", j); else printf("%3ld", j); tied = true; } } if (tied) printf(":%6.2f\n", current); else printf("NONE\n"); changed = true; free(place); } /* try */ void undo() { /* restore to tree before last rearrangement */ long temp; boolean btemp; node *q; switch (lastop) { case rearr: restoring = true; oldleft = wasleft; re_move2(&treenode[what - 1], &q, &root, &wasleft, treenode); btemp = wasleft; wasleft = oldleft; add2(treenode[fromwhere - 1], treenode[what - 1],q, &root, restoring, wasleft, treenode); wasleft = btemp; restoring = false; temp = fromwhere; fromwhere = towhere; towhere = temp; changed = true; break; case flipp: q = treenode[atwhat - 1]->next->back; treenode[atwhat - 1]->next->back = treenode[atwhat - 1]->next->next->back; treenode[atwhat - 1]->next->next->back = q; treenode[atwhat - 1]->next->back->back = treenode[atwhat - 1]->next; treenode[atwhat - 1]->next->next->back->back = treenode[atwhat - 1]->next->next; break; case reroott: restoring = true; temp = oldoutgrno; oldoutgrno = outgrno; outgrno = temp; reroot(treenode[outgrno - 1]); restoring = false; break; case none: /* blank case */ break; } move_printree(); if (lastop == none) { printf("No operation to undo! \n"); return; } btemp = oldwritten; oldwritten = written; written = btemp; } /* undo */ void treewrite(boolean done) { /* write out tree to a file */ if (!done) move_printree(); if (waswritten && ch == 'N') return; col = 0; treeout(root, 1, &col, root); printf("\nTree written to file \"%s\"\n\n", outtreename); waswritten = true; written = true; FClose(outtree); #ifdef MAC fixmacfile(outtreename); #endif } /* treewrite */ void clade() { /* pick a subtree and show only that on screen */ long i; boolean ok; printf("Select subtree rooted at which node (0 for whole tree)? "); inpnum(&i, &ok); ok = (ok && (unsigned)(i <= nonodes)); if (ok) { subtree = (i > 0); if (subtree) nuroot = treenode[i - 1]; else nuroot = root; } move_printree(); if (!ok) printf("Not possible to use this node. "); } /* clade */ void flip() { /* flip at a node left-right */ long i; boolean ok; node *p; printf("Flip branches at which node? "); inpnum(&i, &ok); ok = (ok && i > spp && i <= nonodes); if (ok) { p = treenode[i - 1]->next->back; treenode[i - 1]->next->back = treenode[i - 1]->next->next->back; treenode[i - 1]->next->next->back = p; treenode[i - 1]->next->back->back = treenode[i - 1]->next; treenode[i - 1]->next->next->back->back = treenode[i - 1]->next->next; atwhat = i; lastop = flipp; } move_printree(); if (ok) { oldwritten = written; written = false; return; } if (i >= 1 && i <= spp) printf("Can't flip there. "); else printf("No such node. "); } /* flip */ void changeoutgroup() { long i; boolean ok; oldoutgrno = outgrno; do { printf("Which node should be the new outgroup? "); inpnum(&i, &ok); ok = (ok && in_tree[i - 1] && i >= 1 && i <= nonodes && i != root->index); if (ok) outgrno = i; } while (!ok); if (in_tree[outgrno - 1]) reroot(treenode[outgrno - 1]); changed = true; lastop = reroott; move_printree(); oldwritten = written; written = false; } /* changeoutgroup */ void redisplay() { boolean done=false; waswritten = false; do { fprintf(stderr, "NEXT? (R # + - S . T U W O F H J K L C ? X Q) "); fprintf(stderr, "(? for Help): "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); if (strchr("R#+-S.TUWOFHJKLC?XQ",ch) != NULL){ switch (ch) { case 'R': rearrange(); break; case '#': nextinc(&dispchar, &dispword, &dispbit, chars, bits, &display, numsteps, weight); move_printree(); break; case '+': nextchar(&dispchar, &dispword, &dispbit, chars, bits, &display); move_printree(); break; case '-': prevchar(&dispchar, &dispword, &dispbit, chars, bits, &display); move_printree(); break; case 'S': show(&dispchar, &dispword, &dispbit, chars, bits, &display); move_printree(); break; case '.': move_printree(); break; case 'T': try(); break; case 'U': undo(); break; case 'W': treewrite(done); break; case 'O': changeoutgroup(); break; case 'F': flip(); break; case 'H': window(left, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); move_printree(); break; case 'J': window(downn, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); move_printree(); break; case 'K': window(upp, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); move_printree(); break; case 'L': window(right, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); move_printree(); break; case 'C': clade(); break; case '?': help("character"); move_printree(); break; case 'X': done = true; break; case 'Q': done = true; break; } } } while (!done); if (!written) { do { fprintf(stderr,"Do you want to write out the tree to a file? (Y or N): "); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; } while (ch != 'Y' && ch != 'y' && ch != 'N' && ch != 'n'); } if (ch == 'Y' || ch == 'y') treewrite(done); } /* redisplay */ void treeconstruct() { /* constructs a binary tree from the pointers in treenode. */ restoring = false; subtree = false; display = false; dispchar = 0; fullset = (1L << (bits + 1)) - (1L << 1); earlytree = true; buildtree(); waswritten = false; printf("\nComputing steps needed for compatibility in characters...\n\n"); newtree = true; earlytree = false; move_printree(); bestyet = -like; gotlike = -like; lastop = none; newtree = false; written = false; redisplay(); } /* treeconstruct */ int main(int argc, Char *argv[]) { /* Interactive mixed parsimony */ /* reads in spp, chars, and the data. Then calls treeconstruct to */ /* construct the tree and query the user */ #ifdef MAC argc = 1; /* macsetup("Move",""); */ argv[0] = "Move"; #endif init(argc, argv); emboss_getoptions("fmove", argc, argv); progname = argv[0]; topedge = 1; leftedge = 1; ibmpc = IBMCRT; ansi = ANSICRT; root = NULL; bits = 8*sizeof(long) - 1; doinput(); configure(); treeconstruct(); FClose(outtree); #ifdef MAC fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Interactive mixed parsimony */ PHYLIPNEW-3.69.650/src/dollo.c0000664000175000017500000002503210775447511012347 00000000000000#include "phylip.h" #include "disc.h" #include "dollo.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ void correct(node *p, long fullset, boolean dollo, bitptr zeroanc, pointptr treenode) { /* get final states for intermediate nodes */ long i; long z0, z1, s0, s1, temp; if (p->tip) return; for (i = 0; i < (words); i++) { if (p->back == NULL) { s0 = zeroanc[i]; s1 = fullset & (~zeroanc[i]); } else { s0 = treenode[p->back->index - 1]->statezero[i]; s1 = treenode[p->back->index - 1]->stateone[i]; } z0 = (s0 & p->statezero[i]) | (p->next->back->statezero[i] & p->next->next->back->statezero[i]); z1 = (s1 & p->stateone[i]) | (p->next->back->stateone[i] & p->next->next->back->stateone[i]); if (dollo) { temp = z0 & (~(zeroanc[i] & z1)); z1 &= ~(fullset & (~zeroanc[i]) & z0); z0 = temp; } temp = fullset & (~z0) & (~z1); p->statezero[i] = z0 | (temp & s0 & (~s1)); p->stateone[i] = z1 | (temp & s1 & (~s0)); } } /* correct */ void fillin(node *p) { /* Sets up for each node in the tree two statesets. stateone and statezero are the sets of character states that must be 1 or must be 0, respectively, in a most parsimonious reconstruction, based on the information at or above this node. Note that this state assignment may change based on information further down the tree. If a character is in both sets it is in state "P". If in neither, it is "?". */ long i; for (i = 0; i < words; i++) { p->stateone[i] = p->next->back->stateone[i] | p->next->next->back->stateone[i]; p->statezero[i] = p->next->back->statezero[i] | p->next->next->back->statezero[i]; } } /* fillin */ void postorder(node *p) { /* traverses a binary tree, calling PROCEDURE fillin at a node's descendants before calling fillin at the node */ /* used in dollop, dolmove, & move */ if (p->tip) return; postorder(p->next->back); postorder(p->next->next->back); fillin(p); } /* postorder */ void count(long *stps, bitptr zeroanc, steptr numszero, steptr numsone) { /* counts the number of steps in a branch of the tree. The program spends much of its time in this PROCEDURE */ /* used in dolpenny & move */ long i, j, l; j = 1; l = 0; for (i = 0; i < (chars); i++) { l++; if (l > bits) { l = 1; j++; } if (((1L << l) & stps[j - 1]) != 0) { if (((1L << l) & zeroanc[j - 1]) != 0) numszero[i] += weight[i]; else numsone[i] += weight[i]; } } } /* count */ void filltrav(node *r) { /* traverse to fill in interior node states */ if (r->tip) return; filltrav(r->next->back); filltrav(r->next->next->back); fillin(r); } /* filltrav */ void hyprint(struct htrav_vars *Hyptrav, boolean *unknown, bitptr dohyp, Char *guess) { /* print out states at node */ long i, j, k; char l; boolean dot, a0, a1, s0, s1; if (Hyptrav->bottom) fprintf(outfile, "root "); else fprintf(outfile, "%3ld ", Hyptrav->r->back->index - spp); if (Hyptrav->r->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[Hyptrav->r->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", Hyptrav->r->index - spp); if (Hyptrav->nonzero) fprintf(outfile, " yes "); else if (*unknown) fprintf(outfile, " ? "); else fprintf(outfile, " no "); for (j = 1; j <= (chars); j++) { newline(outfile, j, 40, nmlngth + 17); k = (j - 1) / bits + 1; l = (j - 1) % bits + 1; dot = (((1L << l) & dohyp[k - 1]) == 0 && guess[j - 1] == '?'); s0 = (((1L << l) & Hyptrav->r->statezero[k - 1]) != 0); s1 = (((1L << l) & Hyptrav->r->stateone[k - 1]) != 0); a0 = (((1L << l) & Hyptrav->zerobelow->bits_[k - 1]) != 0); a1 = (((1L << l) & Hyptrav->onebelow->bits_[k - 1]) != 0); dot = (dot || (a1 == s1 && a0 == s0)); if (dot) putc('.', outfile); else { if (s0) { if (s1) putc('P', outfile); else putc('0', outfile); } else if (s1) putc('1', outfile); else putc('?', outfile); } if (j % 5 == 0) putc(' ', outfile); } putc('\n', outfile); } /* hyprint */ void hyptrav(node *r_, boolean *unknown, bitptr dohyp, long fullset, boolean dollo, Char *guess, pointptr treenode, gbit *garbage, bitptr zeroanc, bitptr oneanc) { /* compute, print out states at one interior node */ struct htrav_vars HypVars; long i; HypVars.r = r_; disc_gnu(&HypVars.zerobelow, &garbage); disc_gnu(&HypVars.onebelow, &garbage); if (!HypVars.r->tip) correct(HypVars.r, fullset, dollo, zeroanc, treenode); HypVars.bottom = (HypVars.r->back == NULL); HypVars.nonzero = false; if (HypVars.bottom) { memcpy(HypVars.zerobelow->bits_, zeroanc, words*sizeof(long)); memcpy(HypVars.onebelow->bits_, oneanc, words*sizeof(long)); } else { memcpy(HypVars.zerobelow->bits_, treenode[HypVars.r->back->index - 1]->statezero, words*sizeof(long)); memcpy(HypVars.onebelow->bits_, treenode[HypVars.r->back->index - 1]->stateone, words*sizeof(long)); } for (i = 0; i < (words); i++) HypVars.nonzero = (HypVars.nonzero || ((HypVars.r->stateone[i] & HypVars.zerobelow->bits_[i]) | (HypVars.r->statezero[i] & HypVars.onebelow->bits_[i])) != 0); hyprint(&HypVars,unknown,dohyp, guess); if (!HypVars.r->tip) { hyptrav(HypVars.r->next->back, unknown,dohyp, fullset, dollo, guess, treenode, garbage, zeroanc, oneanc); hyptrav(HypVars.r->next->next->back, unknown,dohyp, fullset, dollo, guess, treenode, garbage, zeroanc, oneanc); } disc_chuck(HypVars.zerobelow, &garbage); disc_chuck(HypVars.onebelow, &garbage); } /* hyptrav */ void hypstates(long fullset, boolean dollo, Char *guess, pointptr treenode, node *root, gbit *garbage, bitptr zeroanc, bitptr oneanc) { /* fill in and describe states at interior nodes */ /* used in dollop & dolpenny */ boolean unknown = false; bitptr dohyp; long i, j, k; for (i = 0; i < (words); i++) { zeroanc[i] = 0; oneanc[i] = 0; } for (i = 0; i < (chars); i++) { j = i / bits + 1; k = i % bits + 1; if (guess[i] == '0') zeroanc[j - 1] = ((long)zeroanc[j - 1]) | (1L << k); if (guess[i] == '1') oneanc[j - 1] = ((long)oneanc[j - 1]) | (1L << k); unknown = (unknown || guess[i] == '?'); } dohyp = (bitptr)Malloc(words*sizeof(long)); for (i = 0; i < words; i++) dohyp[i] = zeroanc[i] | oneanc[i]; filltrav(root); fprintf(outfile, "From To Any Steps?"); fprintf(outfile, " State at upper node\n"); fprintf(outfile, " "); fprintf(outfile, " ( . means same as in the node below it on tree)\n\n"); hyptrav(root, &unknown,dohyp, fullset, dollo, guess, treenode, garbage, zeroanc, oneanc); free(dohyp); } /* hypstates */ void drawline(long i, double scale, node *root) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first =NULL, *last =NULL; long n, j; boolean extra, done; p = root; q = root; extra = false; if (i == (long)p->ycoord && p == root) { if (p->index - spp >= 10) fprintf(outfile, "-%2ld", p->index - spp); else fprintf(outfile, "--%ld", p->index - spp); extra = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)(scale * (p->xcoord - q->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { putc('+', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } } else { for (j = 1; j <= n; j++) putc(' ', outfile); } if (p != q) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); } /* drawline */ void printree(double f, boolean treeprint, node *root) { /* prints out diagram of the tree */ /* used in dollop & dolpenny */ long i, tipy, dummy; double scale; putc('\n', outfile); if (!treeprint) return; putc('\n', outfile); tipy = 1; dummy = 0; coordinates(root, &tipy, f, &dummy); scale = 1.5; putc('\n', outfile); for (i = 1; i <= (tipy - down); i++) drawline(i, scale, root); putc('\n', outfile); } /* printree */ void writesteps(boolean weights, boolean dollo, steptr numsteps) { /* write number of steps */ /* used in dollop & dolpenny */ long i, j, k; if (weights) fprintf(outfile, "weighted"); if (dollo) fprintf(outfile, " reversions "); else fprintf(outfile, " polymorphisms "); fprintf(outfile, "in each character:\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%4ld", i); fprintf(outfile, "\n *-----------------------------------------\n"); for (i = 0; i <= (chars / 10); i++) { fprintf(outfile, "%5ld", i * 10); putc('!', outfile); for (j = 0; j <= 9; j++) { k = i * 10 + j; if (k == 0 || k > chars) fprintf(outfile, " "); else fprintf(outfile, "%4ld", numsteps[k - 1] + extras[k - 1]); } putc('\n', outfile); } putc('\n', outfile); } /* writesteps */ PHYLIPNEW-3.69.650/src/treedistpair.c0000664000175000017500000011113511605067345013731 00000000000000/* version 3.6. (c) Copyright 1993-2005 by the University of Washington. Written by Dan Fineman, Joseph Felsenstein, Mike Palczewski, Hisashi Horino, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include "phylip.h" #include "cons.h" typedef enum { PHYLIPSYMMETRIC, PHYLIPBSD } distance_type; /* The following extern's refer to things declared in cons.c */ extern int tree_pairing; extern Char intreename[FNMLNGTH], intree2name[FNMLNGTH], outtreename[FNMLNGTH]; extern node *root; const char* outfilename; AjPFile embossoutfile; long trees_in_1, trees_in_2; extern long numopts, outgrno, col; extern long maxgrp; /* max. no. of groups in all trees found */ extern boolean trout, firsttree, noroot, outgropt, didreroot, prntsets, progress, treeprint, goteof; extern pointarray treenode, nodep; extern group_type **grouping, **grping2, **group2;/* to store groups found */ extern long **order, **order2, lasti; extern group_type *fullset; extern node *grbg; extern long tipy; extern double **timesseen, **tmseen2, **times2; extern double trweight, ntrees; static distance_type dtype; static long output_scheme; AjPPhyloTree* phylotrees = NULL; AjPPhyloTree* phylomoretrees = NULL; #ifndef OLDC /* function prototpes */ void assign_tree(group_type **, pattern_elm ***, long, long *); boolean group_is_null(group_type **, long); void compute_distances(pattern_elm ***, long, long); void free_patterns(pattern_elm ***, long); void produce_square_matrix(long, long *); void produce_full_matrix(long, long, long *); void output_submenu(void); void pairing_submenu(void); void read_second_file(pattern_elm ***, long, long, AjPPhyloTree* trees); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void assign_lengths(double **lengths, pattern_elm ***pattern_array, long tree_index); void print_header(long trees_in_1, long trees_in_2); void output_distances(long trees_in_1, long trees_in_2); void output_long_distance(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2); void output_matrix_long(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2); void output_matrix_double(double diffl, long tree1, long tree2, long trees_in_1, long trees_in_2); void output_double_distance(double diffd, long tree1, long tree2, long trees_in_1, long trees_in_2); long symetric_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, long patternsz1, long patternsz2); double bsd_tree_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, double* lengths1, double *lengths2, long patternsz1, long patternsz2); void tree_diff(group_type **tree1, group_type **tree2, double *lengths1, double* lengths2, long patternsz1, long patternsz2, long ntree1, long ntree2, long trees_in_1, long trees_in_2); void print_line_heading(long tree); int get_num_columns(void); void print_matrix_heading(long tree, long maxtree); /* function prototpes */ #endif void assign_lengths(double **lengths, pattern_elm ***pattern_array, long tree_index) { *lengths = pattern_array[0][tree_index]->length; } void assign_tree(group_type **treeN, pattern_elm ***pattern_array, long tree_index, long *pattern_size) { /* set treeN to be the tree_index-th tree in pattern_elm */ long i; for ( i = 0 ; i < setsz ; i++ ) { treeN[i] = pattern_array[i][tree_index]->apattern; } *pattern_size = *pattern_array[0][tree_index]->patternsize; } /* assign_tree */ boolean group_is_null(group_type **treeN, long index) { /* Check to see if a given index to a tree array points to an empty group */ long i; for ( i = 0 ; i < setsz ; i++ ) if (treeN[i][index] != (group_type) 0) return false; /* If we've gotten this far, then the index is to an empty group in the tree. */ return true; } /* group_is_null */ double bsd_tree_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, double *lengths1, double* lengths2, long patternsz1, long patternsz2) { /* Compute the difference between 2 given trees. Return that value as a double. */ long index1, index2; double return_value = 0; boolean match_found; long i; if ( group_is_null(tree1, 0) || group_is_null(tree2, 0) ) { printf ("Error computing tree difference between tree %ld and tree %ld\n", ntree1, ntree2); embExitBad(); } for ( index1 = 0; index1 < patternsz1; index1++ ) { if ( !group_is_null(tree1, index1) ) { if ( lengths1[index1] == -1 ) { printf( "Error: tree %ld is missing a length from at least one branch\n", ntree1); embExitBad(); } } } for ( index2 = 0; index2 < patternsz2; index2++ ) { if ( !group_is_null(tree2, index2) ) { if ( lengths2[index2] == -1 ) { printf( "Error: tree %ld is missing a length from at least one branch\n", ntree2); embExitBad(); } } } for ( index1 = 0 ; index1 < patternsz1; index1++ ) { /* For every element in the first tree, see if there's a match to it in the second tree. */ match_found = false; if ( group_is_null(tree1, index1) ) { /* When we've gone over all the elements in tree1, greater number of elements in tree2 will constitute that much more of a difference... */ while ( !group_is_null(tree2, index1) ) { return_value += pow(lengths1[index1], 2); index1++; } break; } for ( index2 = 0 ; index2 < patternsz2 ; index2++ ) { /* For every element in the second tree, see if any match the current element in the first tree. */ if ( group_is_null(tree2, index2) ) { /* When we've gone over all the elements in tree2 */ match_found = false; break; } else { /* Tentatively set match_found; will be changed later if neccessary. . . */ match_found = true; for ( i = 0 ; i < setsz ; i++ ) { /* See if we've got a match, */ if ( tree1[i][index1] != tree2[i][index2] ) match_found = false; } if ( match_found == true ) { break; } } } if ( match_found == false ) { return_value += pow(lengths1[index1], 2); } } for ( index2 = 0 ; index2 < patternsz2 ; index2++ ) { /* For every element in the second tree, see if there's a match to it in the first tree. */ match_found = false; if ( group_is_null(tree2, index2) ) { /* When we've gone over all the elements in tree2, greater number of elements in tree1 will constitute that much more of a difference... */ while ( !group_is_null(tree1, index2) ) { return_value += pow(lengths2[index2], 2); index2++; } break; } for ( index1 = 0 ; index1 < patternsz1 ; index1++ ) { /* For every element in the first tree, see if any match the current element in the second tree. */ if ( group_is_null(tree1, index1) ) { /* When we've gone over all the elements in tree2 */ match_found = false; break; } else { /* Tentatively set match_found; will be changed later if neccessary. . . */ match_found = true; for ( i = 0 ; i < setsz ; i++ ) { /* See if we've got a match, */ if ( tree1[i][index1] != tree2[i][index2] ) match_found = false; } if ( match_found == true ) { return_value += pow(lengths1[index1] - lengths2[index2], 2); break; } } } if ( match_found == false ) { return_value += pow(lengths2[index2], 2); } } if (return_value > 0.0) return_value = sqrt(return_value); else return_value = 0.0; return return_value; } long symetric_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, long patternsz1, long patternsz2) { /* Compute the symmetric difference between 2 given trees. Return that value as a long. */ long index1, index2, return_value = 0; boolean match_found; long i; if (group_is_null (tree1, 0) || group_is_null (tree2, 0)) { printf ("Error computing tree difference.\n"); return 0; } for (index1 = 0 ; index1 < patternsz1 ; index1++) { /* For every element in the first tree, see if there's a match to it in the second tree. */ match_found = false; if (group_is_null (tree1, index1)) { /* When we've gone over all the elements in tree1, greater number of elements in tree2 will constitute that much more of a difference... */ while (! group_is_null (tree2, index1)) { return_value++; index1++; } break; } for (index2 = 0 ; index2 < patternsz2 ; index2++) { /* For every element in the second tree, see if any match the current element in the first tree. */ if (group_is_null (tree2, index2)) { /* When we've gone over all the elements in tree2 */ match_found = false; break; } else { /* Tentatively set match_found; will be changed later if neccessary. . . */ match_found = true; for (i = 0 ; i < setsz ; i++) { /* See if we've got a match, */ if (tree1[i][index1] != tree2[i][index2]) match_found = false; } if (match_found == true) { /* If the previous loop ran from 0 to setsz without setting match_found to false, */ break; } } } if (match_found == false) { return_value++; } } return return_value; } /* symetric_diff */ void output_double_distance(double diffd, long tree1, long tree2, long trees_in_1, long trees_in_2) { switch (tree_pairing) { case ADJACENT_PAIRS: if (output_scheme == VERBOSE ) { fprintf (outfile, "Trees %ld and %ld: %e\n", tree1, tree2, diffd); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %e\n", tree1, tree2, diffd); } break; case ALL_IN_FIRST: if (output_scheme == VERBOSE) { fprintf (outfile, "Trees %ld and %ld: %e\n", tree1, tree2, diffd); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %e\n", tree1, tree2, diffd ); } else if (output_scheme == FULL_MATRIX) { output_matrix_double(diffd, tree1, tree2, trees_in_1, trees_in_2); } break; case CORR_IN_1_AND_2: if (output_scheme == VERBOSE) { fprintf (outfile, "Tree pair %ld: %e\n", tree1, diffd); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %e\n", tree1, diffd); } break; case ALL_IN_1_AND_2: if (output_scheme == VERBOSE ) fprintf (outfile, "Trees %ld and %ld: %e\n", tree1, tree2, diffd); else if (output_scheme == SPARSE) fprintf (outfile, "%ld %ld %e\n", tree1, tree2, diffd); else if (output_scheme == FULL_MATRIX) { output_matrix_double(diffd, tree1, tree2, trees_in_1, trees_in_2); } break; } } /* output_double_distance */ void print_matrix_heading(long tree, long maxtree) { long i; if ( tree_pairing == ALL_IN_1_AND_2 ) { fprintf(outfile, "\n\nFirst\\ Second tree file:\n"); fprintf(outfile, "tree \\\n"); fprintf(outfile, "file: \\"); } else fprintf(outfile, "\n\n "); for ( i = tree ; i <= maxtree ; i++ ) { if ( dtype == PHYLIPSYMMETRIC ) fprintf(outfile, "%5ld ", i); else fprintf(outfile, " %7ld ", i); } fprintf(outfile, "\n"); if ( tree_pairing == ALL_IN_1_AND_2 ) fprintf(outfile, " \\"); else fprintf(outfile, " \\"); for ( i = tree ; i <= maxtree ; i++ ) { if ( dtype == PHYLIPSYMMETRIC ) fprintf(outfile, "------"); else fprintf(outfile, "------------"); } } void print_line_heading(long tree) { if ( tree_pairing == ALL_IN_1_AND_2 ) fprintf(outfile, "\n%4ld |", tree); else fprintf(outfile, "\n%5ld |", tree); } void output_matrix_double(double diffl, long tree1, long tree2, long trees_in_1, long trees_in_2) { if ( tree1 == 1 && ((tree2 - 1) % get_num_columns() == 0 || tree2 == 1 )) { if ( (tree_pairing == ALL_IN_FIRST && tree2 + get_num_columns() - 1 < trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree2 + get_num_columns() - 1 < trees_in_2)) { print_matrix_heading(tree2, tree2 + get_num_columns() - 1); } else { if ( tree_pairing == ALL_IN_FIRST) print_matrix_heading(tree2, trees_in_1); else print_matrix_heading(tree2, trees_in_2); } } if ( (tree2 - 1) % get_num_columns() == 0 || tree2 == 1) { print_line_heading(tree1); } fprintf(outfile, " %9g ", diffl); if ((tree_pairing == ALL_IN_FIRST && tree1 == trees_in_1 && tree2 == trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree1 == trees_in_1 && tree2 == trees_in_2)) fprintf(outfile, "\n\n\n"); } /* output_matrix_double */ void output_matrix_long(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2) { if ( tree1 == 1 && ((tree2 - 1) % get_num_columns() == 0 || tree2 == 1 )) { if ( (tree_pairing == ALL_IN_FIRST && tree2 + get_num_columns() - 1 < trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree2 + get_num_columns() - 1 < trees_in_2)) { print_matrix_heading(tree2, tree2 + get_num_columns() - 1); } else { if ( tree_pairing == ALL_IN_FIRST) print_matrix_heading(tree2, trees_in_1); else print_matrix_heading(tree2, trees_in_2); } } if ( (tree2 - 1) % get_num_columns() == 0 || tree2 == 1) { print_line_heading(tree1); } fprintf(outfile, "%4ld ", diffl); if ((tree_pairing == ALL_IN_FIRST && tree1 == trees_in_1 && tree2 == trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree1 == trees_in_1 && tree2 == trees_in_2)) fprintf(outfile, "\n\n\n"); } /* output_matrix_long */ void output_long_distance(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2) { switch (tree_pairing) { case ADJACENT_PAIRS: if (output_scheme == VERBOSE ) { fprintf (outfile, "Trees %ld and %ld: %ld\n", tree1, tree2, diffl); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %ld\n", tree1, tree2, diffl); } break; case ALL_IN_FIRST: if (output_scheme == VERBOSE) { fprintf (outfile, "Trees %ld and %ld: %ld\n", tree1, tree2, diffl); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %ld\n", tree1, tree2, diffl ); } else if (output_scheme == FULL_MATRIX) { output_matrix_long(diffl, tree1, tree2, trees_in_1, trees_in_2); } break; case CORR_IN_1_AND_2: if (output_scheme == VERBOSE) { fprintf (outfile, "Tree pair %ld: %ld\n", tree1, diffl); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld\n", tree1, diffl); } break; case ALL_IN_1_AND_2: if (output_scheme == VERBOSE) fprintf (outfile, "Trees %ld and %ld: %ld\n", tree1, tree2, diffl); else if (output_scheme == SPARSE) fprintf (outfile, "%ld %ld %ld\n", tree1, tree2, diffl); else if (output_scheme == FULL_MATRIX ) { output_matrix_long(diffl, tree1, tree2, trees_in_1, trees_in_2); } break; } } void tree_diff(group_type **tree1, group_type **tree2, double *lengths1, double* lengths2, long patternsz1, long patternsz2, long ntree1, long ntree2, long trees_in_1, long trees_in_2) { long diffl; double diffd; switch (dtype) { case PHYLIPSYMMETRIC: diffl = symetric_diff (tree1, tree2, ntree1, ntree2, patternsz1, patternsz2); diffl += symetric_diff (tree2, tree1, ntree1, ntree2, patternsz2, patternsz1); output_long_distance(diffl, ntree1, ntree2, trees_in_1, trees_in_2); break; case PHYLIPBSD: diffd = bsd_tree_diff(tree1, tree2, ntree1, ntree2, lengths1, lengths2, patternsz1, patternsz2); output_double_distance(diffd, ntree1, ntree2, trees_in_1, trees_in_2); break; } } /* tree_diff */ int get_num_columns(void) { if ( dtype == PHYLIPSYMMETRIC ) return 10; else return 7; } /* get_num_columns */ void compute_distances(pattern_elm ***pattern_array, long trees_in_1, long trees_in_2) { /* Compute symmetric distances between arrays of trees */ long tree_index, end_tree, index1, index2, index3; group_type **treeA, **treeB; long patternsz1, patternsz2; double *length1 = NULL, *length2 = NULL; int num_columns = get_num_columns(); index1 = 0; /* Put together space for treeA and treeB */ treeA = (group_type **) Malloc (setsz * sizeof (group_type *)); treeB = (group_type **) Malloc (setsz * sizeof (group_type *)); print_header(trees_in_1, trees_in_2); switch (tree_pairing) { case ADJACENT_PAIRS: /* For every tree, compute the distance between it and the tree at the next location; do this in both directions */ end_tree = trees_in_1 - 1; for (tree_index = 0 ; tree_index < end_tree ; tree_index += 2) { assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_tree (treeB, pattern_array, tree_index + 1, &patternsz2); assign_lengths(&length1, pattern_array, tree_index); assign_lengths(&length2, pattern_array, tree_index + 1); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, tree_index+1, tree_index+2, trees_in_1, trees_in_2); if (tree_index + 2 == end_tree) printf("\nWARNING: extra tree at the end of input tree file.\n"); } break; case ALL_IN_FIRST: /* For every tree, compute the distance between it and every other tree in that file. */ end_tree = trees_in_1; if ( output_scheme != FULL_MATRIX ) { /* verbose or sparse output */ for (index1 = 0 ; index1 < end_tree ; index1++) { assign_tree (treeA, pattern_array, index1, &patternsz1); assign_lengths(&length1, pattern_array, index1); for (index2 = 0 ; index2 < end_tree ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, index1 + 1, index2 + 1, trees_in_1, trees_in_2); } } } else { /* full matrix output */ for ( index3 = 0 ; index3 < trees_in_1 ; index3 += num_columns) { for ( index1 = 0 ; index1 < trees_in_1 ; index1++) { assign_tree (treeA, pattern_array, index1, &patternsz1); assign_lengths(&length1, pattern_array, index1); for ( index2 = index3 ; index2 < index3 + num_columns && index2 < trees_in_1 ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, index1 + 1, index2 + 1, trees_in_1, trees_in_2); } } } } break; case CORR_IN_1_AND_2: if (trees_in_1 != trees_in_2) { /* Set end tree to the smaller of the two totals. */ end_tree = trees_in_1 > trees_in_2 ? trees_in_2 : trees_in_1; /* Print something out to the outfile and to the terminal. */ fprintf(outfile, "\n\n" "*** Warning: differing number of trees in first and second\n" "*** tree files. Only computing %ld pairs.\n" "\n", end_tree ); printf( "\n" " *** Warning: differing number of trees in first and second\n" " *** tree files. Only computing %ld pairs.\n" "\n", end_tree ); } else end_tree = trees_in_1; for (tree_index = 0 ; tree_index < end_tree ; tree_index++) { /* For every tree, compute the distance between it and the tree at the parallel location in the other file; do this in both directions */ assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_lengths(&length1, pattern_array, tree_index); /* (tree_index + trees_in_1) will be the corresponding tree in the second file. */ assign_tree (treeB, pattern_array, tree_index + trees_in_1, &patternsz2); assign_lengths(&length2, pattern_array, tree_index + trees_in_1); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, tree_index + 1, 0, trees_in_1, trees_in_2); } break; case ALL_IN_1_AND_2: end_tree = trees_in_1 + trees_in_2; if ( output_scheme != FULL_MATRIX ) { for (tree_index = 0 ; tree_index < trees_in_1 ; tree_index++) { /* For every tree in the first file, compute the distance between it and every tree in the second file. */ assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_lengths(&length1, pattern_array, tree_index); for (index2 = trees_in_1 ; index2 < end_tree ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff(treeA, treeB, length1, length2, patternsz1, patternsz2, tree_index + 1 , index2 + 1, trees_in_1, trees_in_2); } } for ( ; tree_index < end_tree ; tree_index++) { /* For every tree in the second file, compute the distance between it and every tree in the first file. */ assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_lengths(&length1, pattern_array, tree_index); for (index2 = 0 ; index2 < trees_in_1 ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2 , patternsz1, patternsz2, tree_index + 1, index2 + 1, trees_in_1, trees_in_2); } } } else { for ( index3 = trees_in_1 ; index3 < end_tree ; index3 += num_columns) { for ( index1 = 0 ; index1 < trees_in_1 ; index1++) { assign_tree (treeA, pattern_array, index1, &patternsz1); assign_lengths(&length1, pattern_array, index1); for ( index2 = index3 ; index2 < index3 + num_columns && index2 < end_tree ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, index1 + 1, index2 - trees_in_1 + 1, trees_in_1, trees_in_2); } } } } break; } /* Free up treeA and treeB */ free (treeA); free (treeB); } /* compute_distances */ void free_patterns(pattern_elm ***pattern_array, long total_trees) { long i, j; /* Free each pattern array, */ for (i=0 ; i < setsz ; i++) { for (j = 0 ; j < total_trees ; j++) { free (pattern_array[i][j]->apattern); free (pattern_array[i][j]->patternsize); free (pattern_array[i][j]->length); free (pattern_array[i][j]); } free (pattern_array[i]); } free (pattern_array); } /* free_patterns */ void print_header(long trees_in_1, long trees_in_2) { /*long end_tree;*/ switch (tree_pairing) { case ADJACENT_PAIRS: /*end_tree = trees_in_1 - 1;*/ if (output_scheme == VERBOSE) { fprintf(outfile, "\n" "Tree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) fprintf (outfile, "Branch score distances between adjacent pairs of trees:\n" "\n"); else fprintf (outfile, "Symmetric differences between adjacent pairs of trees:\n\n"); } else if ( output_scheme != SPARSE) printf ("Error -- cannot output adjacent pairs into a full matrix.\n"); break; case ALL_IN_FIRST: /*end_tree = trees_in_1;*/ if (output_scheme == VERBOSE) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) fprintf (outfile, "Branch score distances between all pairs of trees in tree file\n\n"); else fprintf (outfile, "Symmetric differences between all pairs of trees in tree file:\n\n"); } else if (output_scheme == FULL_MATRIX) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) fprintf (outfile, "Branch score distances between all pairs of trees in tree file:\n\n"); else fprintf (outfile, "Symmetric differences between all pairs of trees in tree file:\n\n"); } break; case CORR_IN_1_AND_2: if (output_scheme == VERBOSE) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) { fprintf (outfile, "Branch score distances between corresponding pairs of trees\n"); fprintf (outfile, " from first and second tree files:\n\n"); } else { fprintf (outfile, "Symmetric differences between corresponding pairs of trees\n"); fprintf (outfile, " from first and second tree files:\n\n"); } } else if (output_scheme != SPARSE) printf ( "Error -- cannot output corresponding pairs into a full matrix.\n"); break; case (ALL_IN_1_AND_2) : if ( output_scheme == VERBOSE) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) { fprintf (outfile, "Branch score distances between all pairs of trees\n"); fprintf (outfile, " from first and second tree files:\n\n"); } else { fprintf(outfile,"Symmetric differences between all pairs of trees\n"); fprintf(outfile," from first and second tree files:\n\n"); } } else if ( output_scheme == FULL_MATRIX) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); } break; } } /* print_header */ void output_submenu() { /* this allows the user to select a different output of distances scheme. */ long loopcount; boolean done = false; Char ch; if (tree_pairing == NO_PAIRING) return; loopcount = 0; while (!done) { printf ("\nDistances output options:\n"); if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == ALL_IN_FIRST)) printf (" F Full matrix.\n"); printf (" V One pair per line, verbose.\n"); printf (" S One pair per line, sparse.\n"); if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == ALL_IN_FIRST)) printf ("\n Choose one: (F,V,S)\n"); else printf ("\n Choose one: (V,S)\n"); fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); uppercase(&ch); if (strchr("FVS", ch) != NULL) { switch (ch) { case 'F': if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == ALL_IN_FIRST)) output_scheme = FULL_MATRIX; else /* If this can't be a full matrix... */ continue; break; case 'V': output_scheme = VERBOSE; break; case 'S': output_scheme = SPARSE; break; } done = true; } countup(&loopcount, 10); } } /* output_submenu */ void pairing_submenu() { /* this allows the user to select a different tree pairing scheme. */ long loopcount; boolean done = false; Char ch; loopcount = 0; while (!done) { cleerhome(); printf ("Tree Pairing Submenu:\n"); printf (" A Distances between adjacent pairs in tree file.\n"); printf (" P Distances between all possible pairs in tree file.\n"); printf (" C Distances between corresponding pairs in one tree file and another.\n"); printf (" L Distances between all pairs in one tree file and another.\n"); printf ("\n Choose one: (A,P,C,L)\n"); fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); uppercase(&ch); if (strchr("APCL", ch) != NULL) { switch (ch) { case 'A': tree_pairing = ADJACENT_PAIRS; break; case 'P': tree_pairing = ALL_IN_FIRST; break; case 'C': tree_pairing = CORR_IN_1_AND_2; break; case 'L': tree_pairing = ALL_IN_1_AND_2; break; } output_submenu(); done = true; } countup(&loopcount, 10); } } /* pairing_submenu */ void read_second_file(pattern_elm ***pattern_array, long trees_in_1, long trees_in_2, AjPPhyloTree* treesource) { boolean firsttree2, haslengths; long nextnode, trees_read=0; long j; int itree=0; char *treestr; firsttree2 = false; while (treesource[itree]) { goteof = false; nextnode = 0; haslengths = false; treestr = ajStrGetuniquePtr(&treesource[itree++]->Tree); allocate_nodep(&nodep, treestr, &spp); treeread(&treestr, &root, treenode, &goteof, &firsttree2, nodep, &nextnode, &haslengths, &grbg, initconsnode, false, -1); missingname(root); reordertips(); if (goteof) continue; ntrees += trweight; if (noroot) { reroot(nodep[outgrno - 1], &nextnode); didreroot = outgropt; } accumulate(root); gdispose(root); for (j = 0; j < 2*(1 + spp); j++) nodep[j] = NULL; free(nodep); store_pattern (pattern_array, trees_in_1 + trees_read); trees_read++; } } /* read_second_file */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr disttype = NULL; AjPStr tree_p = NULL; AjPStr style = NULL; dtype = PHYLIPBSD; tree_pairing = ADJACENT_PAIRS; output_scheme = VERBOSE; ibmpc = IBMCRT; ansi = ANSICRT; didreroot = false; spp = 0; grbg = NULL; col = 0; noroot = true; numopts = 0; outgrno = 1; outgropt = false; progress = true; /* The following are not used by treedist, but may be used in functions in cons.c, so we set them here. */ treeprint = false; trout = false; prntsets = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylotrees = ajAcdGetTree("intreefile"); trees_in_1 = 0; while (phylotrees[trees_in_1]) trees_in_1++; phylomoretrees = ajAcdGetTree("bintreefile"); trees_in_2 = 0; while (phylomoretrees[trees_in_2]) trees_in_2++; progress = ajAcdGetBoolean("progress"); outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; disttype = ajAcdGetListSingle("dtype"); if(ajStrMatchC(disttype, "s")) dtype = PHYLIPSYMMETRIC; else dtype = PHYLIPBSD; noroot = !ajAcdGetBoolean("noroot"); tree_p = ajAcdGetListSingle("pairing"); if(ajStrMatchC(tree_p, "c")) tree_pairing = CORR_IN_1_AND_2; else if(ajStrMatchC(tree_p, "l")) tree_pairing = ALL_IN_1_AND_2; style = ajAcdGetListSingle("style"); if(ajStrMatchC(style, "f")) output_scheme = FULL_MATRIX; else if(ajStrMatchC(style, "s")) output_scheme = SPARSE; else if(ajStrMatchC(style, "v")) output_scheme = VERBOSE; embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* embosss_getoptions */ int main(int argc, Char *argv[]) { pattern_elm ***pattern_array; long tip_count = 0; double ln_maxgrp; double ln_maxgrp1; double ln_maxgrp2; node * p; #ifdef MAC argc = 1; /* macsetup("Treedist", ""); */ argv[0] = "Treedist"; #endif init(argc, argv); emboss_getoptions("ftreedistpair",argc,argv); /* Initialize option-based variables, then ask for changes regarding their values. */ ntrees = 0.0; lasti = -1; /* read files to determine size of structures we'll be working with */ countcomma(ajStrGetuniquePtr(&phylotrees[0]->Tree),&tip_count); tip_count++; /* countcomma does a raw comma count, tips is one greater */ /* * EWFIX.BUG.756 -- this section may be killed if a good solution * to bug 756 is found * * inside cons.c there are several arrays which are allocated * to size "maxgrp", the maximum number of groups (sets of * tips more closely connected than the rest of the tree) we * can see as the code executes. * * We have two measures we use to determine how much space to * allot: * (1) based on the tip count of the trees in the infile * (2) based on total number of trees in infile, and * * (1) -- Tip Count Method * Since each group is a subset of the set of tips we must * represent at most pow(2,tips) different groups. (Technically * two fewer since we don't store the empty or complete subsets, * but let's keep this simple. * * (2) -- Total Tree Size Method * Each tree we read results in * singleton groups for each tip, plus * a group for each interior node except the root * Since the singleton tips are identical for each tree, this gives * a bound of #tips + ( #trees * (# tips - 2 ) ) * * * Ignoring small terms where expedient, either of the following should * result in an adequate allocation: * pow(2,#tips) * (#trees + 1) * #tips * * Since "maxgrp" is a limit on the number of items we'll need to put * in a hash, we double it to make space for quick hashing * * BUT -- all of this has the possibility for overflow, so -- let's * make the initial calculations with doubles and then convert * */ /* limit chosen to make hash arithmetic work */ maxgrp = LONG_MAX / 2; ln_maxgrp = log((double)maxgrp); /* 2 * (#trees + 1) * #tips */ ln_maxgrp1 = log(2.0 * (double)tip_count * ((double)trees_in_1 + (double)trees_in_2)); /* ln only for 2 * pow(2,#tips) */ ln_maxgrp2 = (double)(1 + tip_count) * log(2.0); /* now -- find the smallest of the three */ if(ln_maxgrp1 < ln_maxgrp) { maxgrp = 2 * (trees_in_1 + trees_in_2 + 1) * tip_count; ln_maxgrp = ln_maxgrp1; } if(ln_maxgrp2 < ln_maxgrp) { maxgrp = pow(2,tip_count+1); } /* Read the (first) tree file and put together grouping, order, and timesseen */ read_groups (&pattern_array, trees_in_1 + trees_in_2, tip_count, phylotrees); if ((tree_pairing == ADJACENT_PAIRS) || (tree_pairing == ALL_IN_FIRST)) { /* Here deal with the adjacent or all-in-first pairing difference computation */ compute_distances (pattern_array, trees_in_1, 0); } else if ((tree_pairing == CORR_IN_1_AND_2) || (tree_pairing == ALL_IN_1_AND_2)) { /* Here, open the other tree file, parse it, and then put together the difference array */ read_second_file (pattern_array, trees_in_1, trees_in_2, phylomoretrees); compute_distances (pattern_array, trees_in_1, trees_in_2); } else if (tree_pairing == NO_PAIRING) { /* Compute the consensus tree. */ putc('\n', outfile); /* consensus(); Reserved for future development */ } if (progress) printf("\nOutput written to file \"%s\"\n\n", outfilename); FClose(outtree); FClose(intree); FClose(outfile); if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == CORR_IN_1_AND_2)) FClose(intree2); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif free_patterns (pattern_array, trees_in_1 + trees_in_2); clean_up_final(); /* clean up grbg */ p = grbg; while (p != NULL) { node * r = p; p = p->next; free(r->nodeset); free(r->view); free(r); } printf("Done.\n\n"); embExit(); return 0; } /* main */ PHYLIPNEW-3.69.650/src/contml.c0000664000175000017500000010761211305225544012525 00000000000000/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include #include "phylip.h" #include "cont.h" #define epsilon1 0.000001 /* small number */ #define epsilon2 0.02 /* not such a small number */ #define smoothings 4 /* number of passes through smoothing algorithm */ #define maxtrees 10 /* maximum number of user trees in KHT test */ #define over 60 AjPPhyloFreq phylofreq; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void getalleles(void); void inputdata(void); void transformgfs(void); void getinput(void); void sumlikely(node *, node *, double *); double evaluate(tree *); double distance(node *, node *); void makedists(node *); void makebigv(node *, boolean *); void correctv(node *); void littlev(node *); void nuview(node *); void update(node *); void smooth(node *); void insert_(node *, node *); void copynode(node *, node *); void copy_(tree *, tree *); void inittip(long, tree *); void buildnewtip(long, tree *, long); void buildsimpletree(tree *); void addtraverse(node *, node *, boolean); void re_move(node **, node **); void rearrange(node *); void coordinates(node *, double, long *, double *); void drawline(long, double); void printree(void); void treeout(node *); void describe(node *, double, double); void summarize(void); void nodeinit(node *); void initrav(node *); void treevaluate(void); void maketree(void); void globrearrange(void); /* function prototypes */ #endif const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; long nonodes2, loci, totalleles, df, outgrno, col, datasets, ith, njumble, jumb=0; long inseed, inseed0; long *alleles, *locus, *weight; phenotype3 *x; boolean all, contchars, global, jumble, lengths, outgropt, trout, usertree, printdata, progress, treeprint, mulsets, firstset; longer seed; long *enterorder; tree curtree, priortree, bestree, bestree2; long nextsp, numtrees, which, maxwhich, shimotrees; /* From maketree, propagated to global */ boolean succeeded; double maxlogl; double l0gl[MAXSHIMOTREES]; double *pbar, *sqrtp, *l0gf[MAXSHIMOTREES]; Char ch; char *progname; double trweight; /* added to make treeread happy */ boolean goteof; boolean haslengths; /* end of ones added to make treeread happy */ node *addwhere; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr datatype = NULL; global = false; jumble = false; njumble = 1; lengths = false; outgrno = 1; outgropt = false; all = true; contchars = false; trout = true; usertree = false; printdata = false; progress = true; treeprint = true; mulsets = false; datasets = 1; embInitPV (pgm, argc, argv, "PHYLIPNEW",VERSION); phylofreq = ajAcdGetFrequencies("infile"); phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; lengths = ajAcdGetBoolean("lengths"); } datatype = ajAcdGetListSingle("datatype"); if(ajStrMatchC(datatype, "c")) contchars = true; outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; if(!usertree) { global = ajAcdGetBoolean("global"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); embossoutfile = ajAcdGetOutfile("outfile"); embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) emboss_openfile(embossouttree, &outtree, &outtreename); fprintf(outfile, "\nContinuous character Maximum Likelihood"); fprintf(outfile, " method version %s\n\n",VERSION); ajStrDel(&datatype); } /* emboss_getoptions */ void allocrest() { /* allocate arrays for number of alleles, the data coordinates, names etc */ alleles = (long *)Malloc(loci*sizeof(long)); if (contchars) locus = (long *)Malloc(loci*sizeof(long)); x = (phenotype3 *)Malloc(spp*sizeof(phenotype3)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersfreq(phylofreq, &spp, &loci, &nonodes2, 2); if (printdata) fprintf(outfile, "\n%4ld Populations, %4ld Loci\n", spp, loci); alloctree(&curtree.nodep, nonodes2); if (!usertree) { alloctree(&bestree.nodep, nonodes2); alloctree(&priortree.nodep, nonodes2); if (njumble > 1) { alloctree(&bestree2.nodep, nonodes2); } } allocrest(); } /* doinit */ void getalleles() { /* set up number of alleles at loci */ long i, j, m; if (!firstset) samenumspfreq(phylofreq, &loci, ith); if (contchars ) { totalleles = loci; for (i = 1; i <= loci; i++) { locus[i - 1] = i; alleles[i - 1] = 1; } df = loci; } else { totalleles = 0; if (printdata) { fprintf(outfile, "\nNumbers of alleles at the loci:\n"); fprintf(outfile, "------- -- ------- -- --- -----\n\n"); } for (i = 1; i <= loci; i++) { alleles[i-1] = phylofreq->Allele[i-1]; if (alleles[i - 1] <= 0) { printf("ERROR: Bad number of alleles: %ld at locus %ld\n", alleles[i-1], i); exxit(-1); } totalleles += alleles[i - 1]; if (printdata) fprintf(outfile, "%4ld", alleles[i - 1]); } locus = (long *)Malloc(totalleles*sizeof(long)); m = 0; for (i = 1; i <= loci; i++) { for (j = 0; j < alleles[i - 1]; j++) locus[m+j] = i; m += alleles[i - 1]; } df = totalleles - loci; } allocview(&curtree, nonodes2, totalleles); if (!usertree) { allocview(&bestree, nonodes2, totalleles); allocview(&priortree, nonodes2, totalleles); if (njumble > 1) allocview(&bestree2, nonodes2, totalleles); } for (i = 0; i < spp; i++) x[i] = (phenotype3)Malloc(totalleles*sizeof(double)); pbar = (double *)Malloc(totalleles*sizeof(double)); if (usertree) for (i = 0; i < MAXSHIMOTREES; i++) l0gf[i] = (double *)Malloc(totalleles*sizeof(double)); if (printdata) putc('\n', outfile); } /* getalleles */ void inputdata() { /* read species data */ long i, j, k, l, m, m0, n, p; double sum; ajint ipos = 0; if (printdata) { fprintf(outfile, "\nName"); if (contchars) fprintf(outfile, " Phenotypes\n"); else fprintf(outfile, " Gene Frequencies\n"); fprintf(outfile, "----"); if (contchars) fprintf(outfile, " ----------\n"); else fprintf(outfile, " ---- -----------\n"); putc('\n', outfile); if (!contchars) { for (j = 1; j <= nmlngth - 8; j++) putc(' ', outfile); fprintf(outfile, "locus:"); p = 1; for (j = 1; j <= loci; j++) { if (all) n = alleles[j - 1]; else n = alleles[j - 1] - 1; for (k = 1; k <= n; k++) { fprintf(outfile, "%10ld", j); if (p % 6 == 0 && (all || p < df)) { putc('\n', outfile); for (l = 1; l <= nmlngth - 2; l++) putc(' ', outfile); } p++; } } fprintf(outfile, "\n\n"); } } for (i = 0; i < spp; i++) { initnamefreq(phylofreq, i); if (printdata) for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); m = 1; p = 1; for (j = 1; j <= loci; j++) { m0 = m; sum = 0.0; if (contchars) n = 1; else if (all) n = alleles[j - 1]; else n = alleles[j - 1] - 1; for (k = 1; k <= n; k++) { x[i][m - 1] = phylofreq->Data[ipos++]; sum += x[i][m - 1]; if (!contchars && x[i][m - 1] < 0.0) { printf("\n\nERROR: locus %ld in species %ld: an allele", j, i+1); printf(" frequency is negative\n"); exxit(-1); } if (printdata) { fprintf(outfile, "%10.5f", x[i][m - 1]); if (p % 6 == 0 && (all || p < df)) { putc('\n', outfile); for (l = 1; l <= nmlngth; l++) putc(' ', outfile); } } p++; m++; } if (all && !contchars) { if (fabs(sum - 1.0) > epsilon2) { printf( "\n\nERROR: Locus %ld in species %ld: frequencies do not add up to 1\n", j, i + 1); printf("\nFrequencies are:\n"); for (l = m0; l <= m-3; l++) printf("%f+", x[i][l]); printf("%f = %f\n\n", x[i][m-2], sum); exxit(-1); } else { for (l = 0; l <= m-2; l++) x[i][l] /= sum; } } if (!all && !contchars) { x[i][m-1] = 1.0 - sum; if (x[i][m-1] < 0.0) { if (x[i][m-1] > -epsilon2) { for (l = 0; l <= m-2; l++) x[i][l] /= sum; x[i][m-1] = 0.0; } else { printf("\n\nERROR: Locus %ld in species %ld: ", j, i + 1); printf("frequencies add up to more than 1\n"); printf("\nFrequencies are:\n"); for (l = m0-1; l <= m-3; l++) printf("%f+", x[i][l]); printf("%f = %f\n\n", x[i][m-2], sum); exxit(-1); } } m++; } } if (printdata) putc('\n', outfile); } if (printdata) putc('\n', outfile); } /* inputdata */ void transformgfs() { /* do stereographic projection transformation on gene frequencies to get variables that come closer to independent Brownian motions */ long i, j, k, l, m, n, maxalleles; double f, sum; double *sumprod, *sqrtp, *pbar; phenotype3 *c; sumprod = (double *)Malloc(loci*sizeof(double)); sqrtp = (double *)Malloc(totalleles*sizeof(double)); pbar = (double *)Malloc(totalleles*sizeof(double)); for (i = 0; i < totalleles; i++) { /* get mean gene frequencies */ pbar[i] = 0.0; for (j = 0; j < spp; j++) pbar[i] += x[j][i]; pbar[i] /= spp; if (pbar[i] == 0.0) sqrtp[i] = 0.0; else sqrtp[i] = sqrt(pbar[i]); } for (i = 0; i < spp; i++) { for (j = 0; j < loci; j++) /* for each locus, sum of root(p*x) */ sumprod[j] = 0.0; for (j = 0; j < totalleles; j++) if ((pbar[j]*x[i][j]) >= 0.0) sumprod[locus[j]-1] += sqrtp[j]*sqrt(x[i][j]); for (j = 0; j < totalleles; j++) { /* the projection to tangent plane */ f = (1.0 + sumprod[locus[j]-1])/2.0; if (x[i][j] == 0.0) x[i][j] = (2.0/f - 1.0)*sqrtp[j]; else x[i][j] = (1.0/f)*sqrt(x[i][j]) + (1.0/f - 1.0)*sqrtp[j]; } } maxalleles = 0; for (i = 0; i < loci; i++) if (alleles[i] > maxalleles) maxalleles = alleles[i]; c = (phenotype3 *)Malloc(maxalleles*sizeof(phenotype3)); for (i = 0; i < maxalleles; i++) /* enough room for any locus's contrasts */ c[i] = (double *)Malloc(maxalleles*sizeof(double)); m = 0; for (j = 0; j < loci; j++) { /* do this for each locus */ for (k = 0; k < alleles[j]-1; k++) { /* one fewer than # of alleles */ c[k][0] = 1.0; for (l = 0; l < k; l++) { /* for alleles 2 to k make it ... */ sum = 0.0; for (n = 0; n <= l; n++) sum += c[k][n]*c[l][n]; if (fabs(c[l][l+1]) > 0.000000001) /* ... orthogonal to those ones */ c[k][l+1] = -sum / c[l][l+1]; /* set coeff to make orthogonal */ else c[k][l+1] = 1.0; } sum = 0.0; for (l = 0; l <= k; l++) /* make it orthogonal to vector of sqrtp's */ sum += c[k][l]*sqrtp[m+l]; if (sqrtp[m+k+1] > 0.0000000001) c[k][k+1] = - sum / sqrtp[m+k+1]; else { for (l = 0; l <= k; l++) c[k][l] = 0.0; c[k][k+1] = 1.0; } sum = 0.0; for (l = 0; l <= k+1; l++) sum += c[k][l]*c[k][l]; sum = sqrt(sum); for (l = 0; l <= k+1; l++) if (sum > 0.0000000001) c[k][l] /= sum; } for (i = 0; i < spp; i++) { /* the orthonormal axes in the plane */ for (l = 0; l < alleles[k]-1; l++) { /* compute the l-th one */ c[maxalleles-1][l] = 0.0; /* temporarily store it ... */ for (n = 0; n <= l+1; n++) c[maxalleles-1][l] += c[l][n]*x[i][m+n]; } for (l = 0; l < alleles[k]-1; l++) x[i][m+l] = c[maxalleles-1][l]; /* replace the gene freqs by it */ } m += alleles[j]; } for (i = 0; i < maxalleles; i++) free(c[i]); free(c); free(sumprod); free(sqrtp); free(pbar); } /* transformgfs */ void getinput() { /* reads the input data */ getalleles(); inputdata(); if (!contchars) { transformgfs(); } } /* getinput */ void sumlikely(node *p, node *q, double *sum) { /* sum contribution to likelihood over forks in tree */ long i, j, m; double term, sumsq, vee; double temp; if (!p->tip) sumlikely(p->next->back, p->next->next->back, sum); if (!q->tip) sumlikely(q->next->back, q->next->next->back, sum); if (p->back == q) vee = p->v; else vee = p->v + q->v; vee += p->deltav + q->deltav; if (vee <= 1.0e-10) { printf("ERROR: check for two identical species "); printf("and eliminate one from the data\n"); exxit(-1); } sumsq = 0.0; if (usertree && which <= MAXSHIMOTREES) { for (i = 0; i < loci; i++) l0gf[which - 1][i] += (1 - alleles[i]) * log(vee) / 2.0; } if (contchars) { m = 0; for (i = 0; i < loci; i++) { temp = p->view[i] - q->view[i]; term = temp * temp; if (usertree && which <= MAXSHIMOTREES) l0gf[which - 1][i] -= term / (2.0 * vee); sumsq += term; } } else { m = 0; for (i = 0; i < loci; i++) { for (j = 1; j < alleles[i]; j++) { temp = p->view[m+j-1] - q->view[m+j-1]; term = temp * temp; if (usertree && which <= MAXSHIMOTREES) l0gf[which - 1][i] -= term / (2.0 * vee); sumsq += term; } m += alleles[i]; } } (*sum) += df * log(vee) / -2.0 - sumsq / (2.0 * vee); } /* sumlikely */ double evaluate(tree *t) { /* evaluate likelihood of a tree */ long i; double sum; sum = 0.0; if (usertree && which <= MAXSHIMOTREES) { for (i = 0; i < loci; i++) l0gf[which - 1][i] = 0.0; } sumlikely(t->start->back, t->start, &sum); if (usertree && which <= MAXSHIMOTREES) { l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; } else if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } } t->likelihood = sum; return sum; } /* evaluate */ double distance(node *p, node *q) { /* distance between two nodes */ long i, j, m; double sum, temp; sum = 0.0; if (!contchars) { m = 0; for (i = 0; i < loci; i++) { for (j = 0; j < alleles[i]-1; j++) { temp = p->view[m+j] - q->view[m+j]; sum += temp * temp; } m += alleles[i]; } } else { for (i = 0; i < totalleles; i++) { temp = p->view[i] - q->view[i]; sum += temp * temp; } } return sum; } /* distance */ void makedists(node *p) { /* compute distances among three neighbors of a node */ long i; node *q; for (i = 1; i <= 3; i++) { q = p->next; p->dist = distance(p->back, q->back); p = q; } } /* makedists */ void makebigv(node *p, boolean *negatives) { /* make new branch length */ long i; node *temp, *q, *r; q = p->next; r = q->next; *negatives = false; for (i = 1; i <= 3; i++) { p->bigv = p->v + p->back->deltav; if (p->iter) { p->bigv = (p->dist + r->dist - q->dist) / (df * 2); p->back->bigv = p->bigv; if (p->bigv < p->back->deltav) *negatives = true; } temp = p; p = q; q = r; r = temp; } } /* makebigv */ void correctv(node *p) { /* iterate branch lengths if some are to be zero */ node *q, *r, *temp; long i, j; double f1, f2, vtot; q = p->next; r = q->next; for (i = 1; i <= smoothings; i++) { for (j = 1; j <= 3; j++) { vtot = q->bigv + r->bigv; if (vtot > 0.0) f1 = q->bigv / vtot; else f1 = 0.5; f2 = 1.0 - f1; p->bigv = (f1 * r->dist + f2 * p->dist - f1 * f2 * q->dist) / df; p->bigv -= vtot * f1 * f2; if (p->bigv < p->back->deltav) p->bigv = p->back->deltav; p->back->bigv = p->bigv; temp = p; p = q; q = r; r = temp; } } } /* correctv */ void littlev(node *p) { /* remove part of it that belongs to other barnches */ long i; for (i = 1; i <= 3; i++) { if (p->iter) p->v = p->bigv - p->back->deltav; if (p->back->iter) p->back->v = p->v; p = p->next; } } /* littlev */ void nuview(node *p) { /* renew information about subtrees */ long i, j, k, m; node *q, *r, *a, *b, *temp; double v1, v2, vtot, f1, f2; q = p->next; r = q->next; for (i = 1; i <= 3; i++) { a = q->back; b = r->back; v1 = q->bigv; v2 = r->bigv; vtot = v1 + v2; if (vtot > 0.0) f1 = v2 / vtot; else f1 = 0.5; f2 = 1.0 - f1; m = 0; for (j = 0; j < loci; j++) { for (k = 1; k <= alleles[j]; k++) p->view[m+k-1] = f1 * a->view[m+k-1] + f2 * b->view[m+k-1]; m += alleles[j]; } p->deltav = v1 * f1; temp = p; p = q; q = r; r = temp; } } /* nuview */ void update(node *p) { /* update branch lengths around a node */ boolean negatives; if (p->tip) return; makedists(p); makebigv(p,&negatives); if (negatives) correctv(p); littlev(p); nuview(p); } /* update */ void smooth(node *p) { /* go through tree getting new branch lengths and views */ if (p->tip) return; update(p); smooth(p->next->back); smooth(p->next->next->back); } /* smooth */ void insert_(node *p, node *q) { /* put p and q together and iterate info. on resulting tree */ long i; hookup(p->next->next, q->back); hookup(p->next, q); for (i = 1; i <= smoothings; i++) { smooth(p); smooth(p->back); } } /* insert_ */ void copynode(node *c, node *d) { /* make a copy of a node */ memcpy(d->view, c->view, totalleles*sizeof(double)); d->v = c->v; d->iter = c->iter; d->deltav = c->deltav; d->bigv = c->bigv; d->dist = c->dist; d->xcoord = c->xcoord; d->ycoord = c->ycoord; d->ymin = c->ymin; d->ymax = c->ymax; } /* copynode */ void copy_(tree *a, tree *b) { /* make a copy of tree a to tree b */ long i, j; node *p, *q; for (i = 0; i < spp; i++) { copynode(a->nodep[i], b->nodep[i]); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes2; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { copynode(p, q); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->likelihood = a->likelihood; b->start = a->start; } /* copy_ */ void inittip(long m, tree *t) { /* initialize branch lengths and views in a tip */ node *tmp; tmp = t->nodep[m - 1]; memcpy(tmp->view, x[m - 1], totalleles*sizeof(double)); tmp->deltav = 0.0; tmp->v = 0.0; } /* inittip */ void buildnewtip(long m, tree *t, long nextsp) { /* initialize and hook up a new tip */ node *p; inittip(m, t); p = t->nodep[nextsp + spp - 3]; hookup(t->nodep[m - 1], p); } /* buildnewtip */ void buildsimpletree(tree *t) { /* make and initialize a three-species tree */ inittip(enterorder[0], t); inittip(enterorder[1], t); hookup(t->nodep[enterorder[0] - 1], t->nodep[enterorder[1] - 1]); buildnewtip(enterorder[2], t, nextsp); insert_(t->nodep[enterorder[2] - 1]->back, t->nodep[enterorder[0] - 1]); } /* buildsimpletree */ void addtraverse(node *p, node *q, boolean contin) { /* traverse through a tree, finding best place to add p */ insert_(p, q); numtrees++; if (evaluate(&curtree) > bestree.likelihood) { copy_(&curtree, &bestree); addwhere = q; } copy_(&priortree, &curtree); if (!q->tip && contin) { addtraverse(p, q->next->back, contin); addtraverse(p, q->next->next->back, contin); } } /* addtraverse */ void re_move(node **p, node **q) { /* remove p and record in q where it was */ *q = (*p)->next->back; hookup(*q, (*p)->next->next->back); (*p)->next->back = NULL; (*p)->next->next->back = NULL; update(*q); update((*q)->back); } /* re_move */ void globrearrange() { /* does global rearrangements */ tree globtree; tree oldtree; int i,j,k,num_sibs,num_sibs2; node *where,*sib_ptr,*sib_ptr2; double oldbestyet = curtree.likelihood; int success = false; alloctree(&globtree.nodep,nonodes2); alloctree(&oldtree.nodep,nonodes2); setuptree(&globtree,nonodes2); setuptree(&oldtree,nonodes2); allocview(&oldtree, nonodes2, totalleles); allocview(&globtree, nonodes2, totalleles); copy_(&curtree,&globtree); copy_(&curtree,&oldtree); for ( i = spp ; i < nonodes2 ; i++ ) { num_sibs = count_sibs(curtree.nodep[i]); sib_ptr = curtree.nodep[i]; if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); for ( j = 0 ; j <= num_sibs ; j++ ) { re_move(&sib_ptr,&where); copy_(&curtree,&priortree); if (where->tip) { copy_(&oldtree,&curtree); copy_(&oldtree,&bestree); sib_ptr = sib_ptr->next; continue; } else num_sibs2 = count_sibs(where); sib_ptr2 = where; for ( k = 0 ; k < num_sibs2 ; k++ ) { addwhere = NULL; addtraverse(sib_ptr,sib_ptr2->back,true); if ( addwhere && where != addwhere && where->back != addwhere && bestree.likelihood > globtree.likelihood) { copy_(&bestree,&globtree); success = true; } sib_ptr2 = sib_ptr2->next; } copy_(&oldtree,&curtree); copy_(&oldtree,&bestree); sib_ptr = sib_ptr->next; } } copy_(&globtree,&curtree); copy_(&globtree,&bestree); if (success && globtree.likelihood > oldbestyet) { succeeded = true; } else { succeeded = false; } freeview(&oldtree, nonodes2); freeview(&globtree, nonodes2); freetree(&globtree.nodep,nonodes2); freetree(&oldtree.nodep,nonodes2); } void rearrange(node *p) { /* rearranges the tree locally */ node *q, *r; if (!p->tip && !p->back->tip) { r = p->next->next; re_move(&r, &q ); copy_(&curtree, &priortree); addtraverse(r, q->next->back, false); addtraverse(r, q->next->next->back, false); copy_(&bestree, &curtree); } if (!p->tip) { rearrange(p->next->back); rearrange(p->next->next->back); } } /* rearrange */ void coordinates(node *p, double lengthsum, long *tipy, double *tipmax) { /* establishes coordinates of nodes */ node *q, *first, *last; if (p->tip) { p->xcoord = lengthsum; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { coordinates(q->back, lengthsum + q->v, tipy,tipmax); q = q->next; } while ((p == curtree.start || p != q) && (p != curtree.start || p->next != q)); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = lengthsum; if (p == curtree.start) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* coordinates */ void drawline(long i, double scale) { /* draws one row of the tree diagram by moving up tree */ node *p, *q; long n, j; boolean extra; node *r, *first = NULL, *last = NULL; boolean done; p = curtree.start; q = curtree.start; extra = false; if (i == (long)p->ycoord && p == curtree.start) { if (p->index - spp >= 10) fprintf(outfile, " %2ld", p->index - spp); else fprintf(outfile, " %ld", p->index - spp); extra = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= (long)r->back->ymin && i <= (long)r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || (p != curtree.start && r == p) || (p == curtree.start && r == p->next))); first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; if (p == curtree.start) last = p->back; } done = (p->tip || p == q); n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if ((long)p->ycoord != (long)q->ycoord) putc('+', outfile); else putc('-', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } } else { for (j = 1; j <= n; j++) putc(' ', outfile); } if (q != p) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); } /* drawline */ void printree() { /* prints out diagram of the tree */ long i; long tipy; double tipmax,scale; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; coordinates(curtree.start, 0.0, &tipy,&tipmax); scale = over / (tipmax + 0.0001); for (i = 1; i <= (tipy - down); i++) drawline(i,scale); putc('\n', outfile); } /* printree */ void treeout(node *p) { /* write out file with representation of final tree */ long i, n, w; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { putc('(', outtree); col++; treeout(p->next->back); putc(',', outtree); col++; if (col > 55) { putc('\n', outtree); col = 0; } treeout(p->next->next->back); if (p == curtree.start) { putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } treeout(p->back); } putc(')', outtree); col++; } x = p->v; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p == curtree.start) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.8f", (int)w + 7, x); col += w + 8; } } /* treeout */ void describe(node *p, double chilow, double chihigh) { /* print out information for one branch */ long i; node *q; double bigv, delta; q = p->back; fprintf(outfile, "%3ld ", q->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, "%15.8f", q->v); delta = p->deltav + p->back->deltav; bigv = p->v + delta; if (p->iter) fprintf(outfile, " (%12.8f,%12.8f)", chilow * bigv - delta, chihigh * bigv - delta); fprintf(outfile, "\n"); if (!p->tip) { describe(p->next->back, chilow,chihigh); describe(p->next->next->back, chilow,chihigh); } } /* describe */ void summarize(void) { /* print out branch lengths etc. */ double chilow,chihigh; fprintf(outfile, "\nremember: "); if (outgropt) fprintf(outfile, "(although rooted by outgroup) "); fprintf(outfile, "this is an unrooted tree!\n\n"); fprintf(outfile, "Ln Likelihood = %11.5f\n", curtree.likelihood); if (df == 1) { chilow = 0.000982; chihigh = 5.02389; } else if (df == 2) { chilow = 0.05064; chihigh = 7.3777; } else { chilow = 1.0 - 2.0 / (df * 9); chihigh = chilow; chilow -= 1.95996 * sqrt(2.0 / (df * 9)); chihigh += 1.95996 * sqrt(2.0 / (df * 9)); chilow *= chilow * chilow; chihigh *= chihigh * chihigh; } fprintf(outfile, "\nBetween And Length"); fprintf(outfile, " Approx. Confidence Limits\n"); fprintf(outfile, "------- --- ------"); fprintf(outfile, " ------- ---------- ------\n"); describe(curtree.start->next->back, chilow,chihigh); describe(curtree.start->next->next->back, chilow,chihigh); describe(curtree.start->back, chilow, chihigh); fprintf(outfile, "\n\n"); if (trout) { col = 0; treeout(curtree.start); } } /* summarize */ void nodeinit(node *p) { /* initialize a node */ node *q, *r; long i, j, m; if (p->tip) return; q = p->next->back; r = p->next->next->back; nodeinit(q); nodeinit(r); m = 0; for (i = 0; i < loci; i++) { for (j = 1; j < alleles[i]; j++) p->view[m+j-1] = 0.5 * q->view[m+j-1] + 0.5 * r->view[m+j-1]; m += alleles[i]; } if ((!lengths) || p->iter) p->v = 0.1; if ((!lengths) || p->back->iter) p->back->v = 0.1; } /* nodeinit */ void initrav(node *p) { /* traverse to initialize */ node* q; if (p->tip) nodeinit(p->back); else { q = p->next; while ( q != p) { initrav(q->back); q = q->next; } } } /* initrav */ void treevaluate() { /* evaluate user-defined tree, iterating branch lengths */ long i; initrav(curtree.start); initrav(curtree.start->back); for (i = 1; i <= smoothings * 4; i++) smooth(curtree.start); evaluate(&curtree); } /* treevaluate */ void maketree() { /* construct the tree */ long i; char* treestr; if (usertree) { if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); putc('\n', outfile); } setuptree(&curtree, nonodes2); for (which = 1; which <= spp; which++) inittip(which, &curtree); which = 1; while (which <= numtrees) { treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread2 (&treestr, &curtree.start, curtree.nodep, lengths, &trweight, &goteof, &haslengths, &spp,false,nonodes2); curtree.start = curtree.nodep[outgrno - 1]->back; treevaluate(); printree(); summarize(); which++; } FClose(intree); if (numtrees > 1 && loci > 1 ) { weight = (long *)Malloc(loci*sizeof(long)); for (i = 0; i < loci; i++) weight[i] = 1; standev2(numtrees, maxwhich, 0, loci-1, maxlogl, l0gl, l0gf, seed); free(weight); fprintf(outfile, "\n\n"); } } else { /* if ( !usertree ) */ if (jumb == 1) { setuptree(&curtree, nonodes2); setuptree(&priortree, nonodes2); setuptree(&bestree, nonodes2); if (njumble > 1) setuptree(&bestree2, nonodes2); } for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); nextsp = 3; buildsimpletree(&curtree); curtree.start = curtree.nodep[enterorder[0] - 1]->back; if (jumb == 1) numtrees = 1; nextsp = 4; if (progress) { printf("Adding species:\n"); writename(0, 3, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } while (nextsp <= spp) { buildnewtip(enterorder[nextsp - 1], &curtree, nextsp); copy_(&curtree, &priortree); bestree.likelihood = -DBL_MAX; addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, curtree.start, true ); copy_(&bestree, &curtree); if (progress) { writename(nextsp - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (global && nextsp == spp) { if (progress) { printf("\nDoing global rearrangements\n"); printf(" !"); for (i = 1; i <= spp - 2; i++) if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); printf(" "); } } succeeded = true; while (succeeded) { succeeded = false; if ( global && nextsp == spp ) globrearrange(); else rearrange(curtree.start); if (global && nextsp == spp) putc('\n', outfile); } if (global && nextsp == spp && progress) putchar('\n'); if (njumble > 1) { if (jumb == 1 && nextsp == spp) copy_(&bestree, &bestree2); else if (nextsp == spp) { if (bestree2.likelihood < bestree.likelihood) copy_(&bestree, &bestree2); } } if (nextsp == spp && jumb == njumble) { if (njumble > 1) copy_(&bestree2, &curtree); curtree.start = curtree.nodep[outgrno - 1]->back; printree(); summarize(); } nextsp++; } } if ( jumb < njumble) return; if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n\n", outtreename); } freeview(&curtree, nonodes2); if (!usertree) { freeview(&bestree, nonodes2); freeview(&priortree, nonodes2); } for (i = 0; i < spp; i++) free(x[i]); if (!contchars) { free(locus); free(pbar); } } /* maketree */ int main(int argc, Char *argv[]) { /* main program */ long i; #ifdef MAC argc = 1; /* macsetup("Contml",""); */ argv[0] = "Contml"; #endif init(argc, argv); emboss_getoptions("fcontml", argc, argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= datasets; ith++) { getinput(); if (ith == 1) firstset = false; if (datasets > 1) { fprintf(outfile, "Data set # %ld:\n\n", ith); if (progress) printf("\nData set # %ld:\n", ith); } for (jumb = 1; jumb <= njumble; jumb++) maketree(); if (usertree) for (i = 0; i < MAXSHIMOTREES; i++) free(l0gf[i]); } FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif ajPhyloFreqDel(&phylofreq); ajPhyloTreeDelarray(&phylotrees); ajFileClose(&embossoutfile); ajFileClose(&embossouttree); embExit(); return 0; } PHYLIPNEW-3.69.650/src/consense.c0000664000175000017500000001624011305225544013042 00000000000000#include "phylip.h" #include "cons.h" /* version 3.6. (c) Copyright 1993-2008 by the University of Washington. Written by Joseph Felsenstein, Hisashi Horino, Akiko Fuseki, Dan Fineman, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* The following extern's refer to things declared in cons.c */ AjPPhyloTree* phylotrees = NULL; extern int tree_pairing; extern Char intreename[FNMLNGTH], intree2name[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; long trees_in; extern node *root; extern long numopts, outgrno, col, setsz; extern long maxgrp; /* max. no. of groups in all trees found */ extern boolean trout, firsttree, noroot, outgropt, didreroot, prntsets, progress, treeprint, goteof, strict, mr, mre, ml; extern pointarray nodep; /* pointers to all nodes in tree */ extern group_type **grouping, **grping2, **group2;/* to store groups found */ extern long **order, **order2, lasti; extern group_type *fullset; extern long tipy; extern double trweight, ntrees, mlfrac; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void count_siblings(node **p); void treeout(node *); /* function prototypes */ #endif void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr method; /* Initial settings */ ibmpc = IBMCRT; ansi = ANSICRT; didreroot = false; firsttree = true; spp = 0 ; col = 0 ; /* This is needed so functions in cons.c work */ tree_pairing = NO_PAIRING ; strict = false; mr = false; mre = false; ml = false; mlfrac = 0.5; noroot = true; numopts = 0; outgrno = 1; outgropt = false; trout = true; prntsets = true; progress = true; treeprint = true; embInitPV(pgm, argc, argv,"PHYLIPNEW",VERSION); phylotrees = ajAcdGetTree("intreefile"); trees_in = 0; while (phylotrees[trees_in]) trees_in++; method = ajAcdGetListSingle("method"); if (ajStrMatchC(method, "strict")) strict = true; else if(ajStrMatchC(method, "mr")) mr = true; else if(ajStrMatchC(method, "mre")) mre = true; else if(ajStrMatchC(method, "ml")) { ml = true; mlfrac = ajAcdGetFloat("mlfrac"); } prntsets = ajAcdGetBoolean("prntsets"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; noroot = !ajAcdGetToggle("root"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nConsensus tree"); fprintf(outfile, " program, version %s\n\n", VERSION); ajStrDel(&method); return; } /* emboss_getoptions */ void count_siblings(node **p) { node *tmp_node; int i; if (!(*p)) { /* This is a leaf, */ return; } else { tmp_node = (*p)->next; } for (i = 0 ; i < 1000; i++) { if (tmp_node == (*p)) { /* When we've gone through all the siblings, */ break; } else if (tmp_node) { tmp_node = tmp_node->next; } else { /* Should this be executed? */ return ; } } } /* count_siblings */ void treeout(node *p) { /* write out file with representation of final tree */ long i, n = 0; Char c; node *q; double x; count_siblings (&p); if (p->tip) { /* If we're at a node which is a leaf, figure out how long the name is and print it out. */ for (i = 1; i <= MAXNCH; i++) { if (p->nayme[i - 1] != '\0') n = i; } for (i = 0; i < n; i++) { c = p->nayme[i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { /* If we're at a furcation, print out the proper formatting, loop through all the children, calling the procedure recursively. */ putc('(', outtree); col++; q = p->next; while (q != p) { /* This should terminate when we've gone through all the siblings, */ treeout(q->back); q = q->next; if (q == p) break; putc(',', outtree); col++; if (col > 60) { putc('\n', outtree); col = 0; } } putc(')', outtree); col++; } if (p->tip) x = ntrees; else x = (double)p->deltav; if (p == root) { /* When we're all done with this tree, */ fprintf(outtree, ";\n"); return; } /* Figure out how many characters the branch length requires: */ else { if (!strict) { if (x >= 100.0) { fprintf(outtree, ":%5.1f", x); col += 4; } else if (x >= 10.0) { fprintf(outtree, ":%4.1f", x); col += 3; } else if (x >= 1.00) { fprintf(outtree, ":%4.2f", x); col += 3; } } } } /* treeout */ int main(int argc, Char *argv[]) { /* Local variables added by Dan F. */ pattern_elm ***pattern_array; long i, j; long tip_count = 0; node *p, *q; #ifdef MAC argc = 1; /* macsetup("Consense", ""); */ argv[0] = "Consense"; #endif init(argc, argv); emboss_getoptions("fconsense", argc, argv); ntrees = 0.0; maxgrp = 32767; /* initial size of set hash table */ lasti = -1; if (prntsets) fprintf(outfile, "Species in order: \n\n"); countcomma(ajStrGetuniquePtr(&phylotrees[0]->Tree),&tip_count); tip_count++; /* countcomma does a raw comma count, tips is one greater */ /* Read the tree file and put together grouping, order, and timesseen */ read_groups (&pattern_array, trees_in, tip_count, phylotrees); /* Compute the consensus tree. */ putc('\n', outfile); nodep = (pointarray)Malloc(2*(1+spp)*sizeof(node *)); for (i = 0; i < spp; i++) { nodep[i] = (node *)Malloc(sizeof(node)); for (j = 0; j < MAXNCH; j++) nodep[i]->nayme[j] = '\0'; strncpy(nodep[i]->nayme, nayme[i], MAXNCH); } for (i = spp; i < 2*(1+spp); i++) nodep[i] = NULL; consensus(pattern_array, trees_in); printf("\n"); if (trout) { treeout(root); if (progress) printf("Consensus tree written to file \"%s\"\n\n", outtreename); } if (progress) printf("Output written to file \"%s\"\n\n", outfilename); for (i = 0; i < spp; i++) free(nodep[i]); for (i = spp; i < 2*(1 + spp); i++) { if (nodep[i] != NULL) { p = nodep[i]->next; do { q = p->next; free(p); p = q; } while (p != nodep[i]); free(p); } } free(nodep); FClose(outtree); FClose(intree); FClose(outfile); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif ajPhyloTreeDelarray(&phylotrees); ajFileClose(&embossoutfile); ajFileClose(&embossouttree); clean_up_final_consense(); embExit(); return 0; } /* main */ PHYLIPNEW-3.69.650/src/kitsch.c0000664000175000017500000005757111325562224012527 00000000000000/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include #include "phylip.h" #include "dist.h" #define epsilonk 0.000001 /* a very small but not too small number */ AjPPhyloDist* phylodists = NULL; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void doinit(void); void inputoptions(void); void getinput(void); void input_data(void); void add(node *, node *, node *); void re_move(node **, node **); void scrunchtraverse(node *, node **, double *); void combine(node *, node *); void scrunch(node *); void secondtraverse(node *, node *, node *, node *, long, long, long , double *); void firstraverse(node *, node *, double *); void sumtraverse(node *, double *); void evaluate(node *); void tryadd(node *, node **, node **); void addpreorder(node *, node *, node *); void tryrearr(node *, node **, boolean *); void repreorder(node *, node **, boolean *); void rearrange(node **); void dtraverse(node *); void describe(void); void copynode(node *, node *); void copy_(tree *, tree *); void maketree(void); /* function prototypes */ #endif const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; Char infilename[FNMLNGTH], intreename[FNMLNGTH]; long nonodes, numtrees, col, datasets, ith, njumble, jumb; /* numtrees is used by usertree option part of maketree */ long inseed; tree curtree, bestree; /* pointers to all nodes in tree */ boolean minev, jumble, usertree, lower, upper, negallowed, replicates, trout, printdata, progress, treeprint, mulsets, firstset; longer seed; double power; long *enterorder; /* Local variables for maketree, propagated globally for C version: */ long examined; double like, bestyet; node *there; boolean *names; Char ch; char *progname; double trweight; /* to make treeread happy */ boolean goteof, haslengths, lengths; /* ditto ... */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr matrixtype = NULL; long inseed0; minev = false; jumble = false; njumble = 1; lower = false; negallowed = false; power = 2.0; replicates = false; upper = false; usertree = false; trout = true; printdata = false; progress = true; treeprint = true; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); matrixtype = ajAcdGetListSingle("matrixtype"); if(ajStrMatchC(matrixtype, "l")) lower = true; else if(ajStrMatchC(matrixtype, "u")) upper = true; phylodists = ajAcdGetDistances("datafile"); while (phylodists[datasets]) datasets++; minev = ajAcdGetBoolean("minev"); phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; } power = ajAcdGetFloat("power"); if(minev) negallowed = true; else negallowed = ajAcdGetBoolean("negallowed"); replicates = ajAcdGetBoolean("replicates"); if(!usertree) { njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } //if mulsets and not jumble do jumble printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); embossoutfile = ajAcdGetOutfile("outfile"); embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) emboss_openfile(embossouttree, &outtree, &outtreename); /* printf("\n inseed: %ld",(inseed)); printf("\n jumble: %s",(jumble ? "true" : "false")); printf("\n njumble: %ld",(njumble)); printf("\n lengths: %s",(lengths ? "true" : "false")); printf("\n lower: %s",(lower ? "true" : "false")); printf("\n negallowed: %s",(negallowed ? "true" : "false")); printf("\n power: %f",(power)); printf("\n replicates: %s",(replicates ? "true" : "false")); printf("\n trout: %s",(trout ? "true" : "false")); printf("\n upper: %s",(upper ? "true" : "false")); printf("\n usertree: %s",(usertree ? "true" : "false")); printf("\n printdata: %s",(printdata ? "true" : "false")); printf("\n progress: %s",(progress ? "true" : "false")); printf("\n treeprint: %s",(treeprint ? "true" : "false")); printf("\n mulsets: %s",(mulsets ? "true" : "false")); printf("\n datasets: %s",(datasets ? "true" : "false")); */ } /* emboss_getoptions */ void doinit() { /* initializes variables */ inputnumbers2seq(phylodists[0], &spp, &nonodes, 1); alloctree(&curtree.nodep, nonodes); allocd(nonodes, curtree.nodep); allocw(nonodes, curtree.nodep); if (!usertree && njumble > 1) { alloctree(&bestree.nodep, nonodes); allocd(nonodes, bestree.nodep); allocw(nonodes, bestree.nodep); } nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); } /* doinit */ void inputoptions() { /* print options information */ if (!firstset) samenumspseq2(phylodists[ith-1], ith); fprintf(outfile, "\nFitch-Margoliash method "); fprintf(outfile, "with contemporary tips, version %s\n\n",VERSION); if (minev) fprintf(outfile, "Minimum evolution method option\n\n"); fprintf(outfile, " __ __ 2\n"); fprintf(outfile, " \\ \\ (Obs - Exp)\n"); fprintf(outfile, "Sum of squares = /_ /_ ------------\n"); fprintf(outfile, " "); if (power == (long)power) fprintf(outfile, "%2ld\n", (long)power); else fprintf(outfile, "%4.1f\n", power); fprintf(outfile, " i j Obs\n\n"); fprintf(outfile, "negative branch lengths"); if (!negallowed) fprintf(outfile, " not"); fprintf(outfile, " allowed\n\n"); } /* inputoptions */ void getinput() { /* reads the input data */ inputoptions(); } /* getinput */ void input_data() { /* read in distance matrix */ long i, j, k, columns; ajint ipos=0; columns = replicates ? 4 : 6; if (printdata) { fprintf(outfile, "\nName Distances"); if (replicates) fprintf(outfile, " (replicates)"); fprintf(outfile, "\n---- ---------"); if (replicates) fprintf(outfile, "-------------"); fprintf(outfile, "\n\n"); } setuptree(&curtree, nonodes); if (!usertree && njumble > 1) setuptree(&bestree, nonodes); for (i = 0; i < (spp); i++) { curtree.nodep[i]->d[i] = 0.0; curtree.nodep[i]->w[i] = 0.0; curtree.nodep[i]->weight = 0.0; initnamedist(phylodists[ith-1], i); for (j = 1; j <= (spp); j++) { curtree.nodep[i]->d[j - 1] = phylodists[ith-1]->Data[ipos]; curtree.nodep[i]->w[j - 1] = phylodists[ith-1]->Replicates[ipos++]; } } if (printdata) { for (i = 0; i < (spp); i++) { for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); putc(' ', outfile); for (j = 1; j <= (spp); j++) { fprintf(outfile, "%10.5f", curtree.nodep[i]->d[j - 1]); if (replicates) fprintf(outfile, " (%3ld)", (long)curtree.nodep[i]->w[j - 1]); if (j % columns == 0 && j < spp) { putc('\n', outfile); for (k = 1; k <= nmlngth + 1; k++) putc(' ', outfile); } } putc('\n', outfile); } putc('\n', outfile); } for (i = 0; i < (spp); i++) { for (j = 0; j < (spp); j++) { if (i + 1 != j + 1) { if (curtree.nodep[i]->d[j] < epsilonk) curtree.nodep[i]->d[j] = epsilonk; curtree.nodep[i]->w[j] /= exp(power * log(curtree.nodep[i]->d[j])); } } } } /* inputdata */ void add(node *below, node *newtip, node *newfork) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant */ if (below != curtree.nodep[below->index - 1]) below = curtree.nodep[below->index - 1]; if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; below->back = newfork->next->next; newfork->next->next->back = below; newfork->next->back = newtip; newtip->back = newfork->next; if (curtree.root == below) curtree.root = newfork; curtree.root->back = NULL; } /* add */ void re_move(node **item, node **fork) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork */ node *p, *q; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = curtree.nodep[(*item)->back->index - 1]; if (curtree.root == *fork) { if (*item == (*fork)->next->back) curtree.root = (*fork)->next->next->back; else curtree.root = (*fork)->next->back; } p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; } /* remove */ void scrunchtraverse(node *u, node **closest, double *tmax) { /* traverse to find closest node to the current one */ if (!u->sametime) { if (u->t > *tmax) { *closest = u; *tmax = u->t; } return; } u->t = curtree.nodep[u->back->index - 1]->t; if (!u->tip) { scrunchtraverse(u->next->back, closest,tmax); scrunchtraverse(u->next->next->back, closest,tmax); } } /* scrunchtraverse */ void combine(node *a, node *b) { /* put node b into the set having the same time as a */ if (a->weight + b->weight <= 0.0) a->t = 0.0; else a->t = (a->t * a->weight + b->t * b->weight) / (a->weight + b->weight); a->weight += b->weight; b->sametime = true; } /* combine */ void scrunch(node *s) { /* see if nodes can be combined to prevent negative lengths */ double tmax; node *closest; boolean found; closest = NULL; tmax = -1.0; do { if (!s->tip) { scrunchtraverse(s->next->back, &closest,&tmax); scrunchtraverse(s->next->next->back, &closest,&tmax); } found = (tmax > s->t); if (found) combine(s, closest); tmax = -1.0; } while (found); } /* scrunch */ void secondtraverse(node *a, node *q, node *u, node *v, long i, long j, long k, double *sum) { /* recalculate distances, add to sum */ long l; double wil, wjl, wkl, wli, wlj, wlk, TEMP; if (!(a->processed || a->tip)) { secondtraverse(a->next->back, q,u,v,i,j,k,sum); secondtraverse(a->next->next->back, q,u,v,i,j,k,sum); return; } if (!(a != q && a->processed)) return; l = a->index; wil = u->w[l - 1]; wjl = v->w[l - 1]; wkl = wil + wjl; wli = a->w[i - 1]; wlj = a->w[j - 1]; wlk = wli + wlj; q->w[l - 1] = wkl; a->w[k - 1] = wlk; if (wkl <= 0.0) q->d[l - 1] = 0.0; else q->d[l - 1] = (wil * u->d[l - 1] + wjl * v->d[l - 1]) / wkl; if (wlk <= 0.0) a->d[k - 1] = 0.0; else a->d[k - 1] = (wli * a->d[i - 1] + wlj * a->d[j - 1]) / wlk; if (minev) return; if (wkl > 0.0) { TEMP = u->d[l - 1] - v->d[l - 1]; (*sum) += wil * wjl / wkl * (TEMP * TEMP); } if (wlk > 0.0) { TEMP = a->d[i - 1] - a->d[j - 1]; (*sum) += wli * wlj / wlk * (TEMP * TEMP); } } /* secondtraverse */ void firstraverse(node *q_, node *r, double *sum) { /* firsttraverse */ /* go through tree calculating branch lengths */ node *q; long i, j, k; node *u, *v; q = q_; if (q == NULL) return; q->sametime = false; if (!q->tip) { firstraverse(q->next->back, r,sum); firstraverse(q->next->next->back, r,sum); } q->processed = true; if (q->tip) return; u = q->next->back; v = q->next->next->back; i = u->index; j = v->index; k = q->index; if (u->w[j - 1] + v->w[i - 1] <= 0.0) q->t = 0.0; else q->t = (u->w[j - 1] * u->d[j - 1] + v->w[i - 1] * v->d[i - 1]) / (2.0 * (u->w[j - 1] + v->w[i - 1])); q->weight = u->weight + v->weight + u->w[j - 1] + v->w[i - 1]; if (!negallowed) scrunch(q); u->v = q->t - u->t; v->v = q->t - v->t; u->back->v = u->v; v->back->v = v->v; secondtraverse(r,q,u,v,i,j,k,sum); } /* firstraverse */ void sumtraverse(node *q, double *sum) { /* traverse to finish computation of sum of squares */ long i, j; node *u, *v; double TEMP, TEMP1; if (minev && (q != curtree.root)) *sum += q->v; if (q->tip) return; sumtraverse(q->next->back, sum); sumtraverse(q->next->next->back, sum); if (!minev) { u = q->next->back; v = q->next->next->back; i = u->index; j = v->index; TEMP = u->d[j - 1] - 2.0 * q->t; TEMP1 = v->d[i - 1] - 2.0 * q->t; (*sum) += u->w[j - 1] * (TEMP * TEMP) + v->w[i - 1] * (TEMP1 * TEMP1); } } /* sumtraverse */ void evaluate(node *r) { /* fill in times and evaluate sum of squares for tree */ double sum; long i; sum = 0.0; for (i = 0; i < (nonodes); i++) curtree.nodep[i]->processed = curtree.nodep[i]->tip; firstraverse(r, r,&sum); sumtraverse(r, &sum); examined++; if (replicates && (lower || upper)) sum /= 2; like = -sum; } /* evaluate */ void tryadd(node *p, node **item, node **nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ add(p, *item, *nufork); evaluate(curtree.root); if (like > bestyet) { bestyet = like; there = p; } re_move(item, nufork); } /* tryadd */ void addpreorder(node *p, node *item, node *nufork) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ if (p == NULL) return; tryadd(p, &item,&nufork); if (!p->tip) { addpreorder(p->next->back, item, nufork); addpreorder(p->next->next->back, item, nufork); } } /* addpreorder */ void tryrearr(node *p, node **r, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success := TRUE and keeps the new tree. otherwise, restores the old tree */ node *frombelow, *whereto, *forknode; double oldlike; if (p->back == NULL) return; forknode = curtree.nodep[p->back->index - 1]; if (forknode->back == NULL) return; oldlike = like; if (p->back->next->next == forknode) frombelow = forknode->next->next->back; else frombelow = forknode->next->back; whereto = forknode->back; re_move(&p, &forknode); add(whereto, p, forknode); if ((*r)->back != NULL) *r = curtree.nodep[(*r)->back->index - 1]; evaluate(*r); if (like - oldlike > LIKE_EPSILON) { bestyet = like; *success = true; return; } re_move(&p, &forknode); add(frombelow, p, forknode); if ((*r)->back != NULL) *r = curtree.nodep[(*r)->back->index - 1]; like = oldlike; } /* tryrearr */ void repreorder(node *p, node **r, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ if (p == NULL) return; tryrearr(p,r,success); if (!p->tip) { repreorder(p->next->back,r,success); repreorder(p->next->next->back,r,success); } } /* repreorder */ void rearrange(node **r_) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ node **r; boolean success; r = r_; success = true; while (success) { success = false; repreorder(*r,r,&success); } } /* rearrange */ void dtraverse(node *q) { /* print table of lengths etc. */ long i; if (!q->tip) dtraverse(q->next->back); if (q->back != NULL) { fprintf(outfile, "%4ld ", q->back->index - spp); if (q->index <= spp) { for (i = 0; i < nmlngth; i++) putc(nayme[q->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", q->index - spp); fprintf(outfile, "%13.5f", curtree.nodep[q->back->index - 1]->t - q->t); q->v = curtree.nodep[q->back->index - 1]->t - q->t; q->back->v = q->v; fprintf(outfile, "%16.5f\n", curtree.root->t - q->t); } if (!q->tip) dtraverse(q->next->next->back); } /* dtraverse */ void describe() { /* prints table of lengths, times, sum of squares, etc. */ long i, j; double totalnum; double TEMP; if (!minev) fprintf(outfile, "\nSum of squares = %10.3f\n\n", -like); else fprintf(outfile, "Sum of branch lengths = %10.3f\n\n", -like); if ((fabs(power - 2) < 0.01) && !minev) { totalnum = 0.0; for (i = 0; i < (spp); i++) { for (j = 0; j < (spp); j++) { if (i + 1 != j + 1 && curtree.nodep[i]->d[j] > 0.0) { TEMP = curtree.nodep[i]->d[j]; totalnum += curtree.nodep[i]->w[j] * (TEMP * TEMP); } } } totalnum -= 2; if (replicates && (lower || upper)) totalnum /= 2; fprintf(outfile, "Average percent standard deviation ="); fprintf(outfile, "%10.5f\n\n", 100 * sqrt(-(like / totalnum))); } fprintf(outfile, "From To Length Height\n"); fprintf(outfile, "---- -- ------ ------\n\n"); dtraverse(curtree.root); putc('\n', outfile); if (trout) { col = 0; treeoutr(curtree.root,&col,&curtree); } } /* describe */ void copynode(node *c, node *d) { /* make a copy of a node */ memcpy(d->d, c->d, nonodes*sizeof(double)); memcpy(d->w, c->w, nonodes*sizeof(double)); d->t = c->t; d->sametime = c->sametime; d->weight = c->weight; d->processed = c->processed; d->xcoord = c->xcoord; d->ycoord = c->ycoord; d->ymin = c->ymin; d->ymax = c->ymax; } /* copynode */ void copy_(tree *a, tree *b) { /* make a copy of a tree */ long i, j=0; node *p, *q; for (i = 0; i < spp; i++) { copynode(a->nodep[i], b->nodep[i]); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { copynode(p, q); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->root = a->root; } /* copy */ void maketree() { /* constructs a binary tree from the pointers in curtree.nodep. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j, which; double bestlike, bstlike2=0, gotlike; boolean lastrearr; node *item, *nufork; char *treestr; if (!usertree) { if (jumb == 1) { input_data(); examined = 0; } for (i = 1; i <= (spp); i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); curtree.root = curtree.nodep[enterorder[0] - 1]; add(curtree.nodep[enterorder[0] - 1], curtree.nodep[enterorder[1] - 1], curtree.nodep[spp]); if (progress) { printf("Adding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } for (i = 3; i <= (spp); i++) { bestyet = -DBL_MAX; item = curtree.nodep[enterorder[i - 1] - 1]; nufork = curtree.nodep[spp + i - 2]; addpreorder(curtree.root, item, nufork); add(there, item, nufork); like = bestyet; rearrange(&curtree.root); evaluate(curtree.root); examined--; if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = (i == spp); if (lastrearr) { if (progress) { printf("\nDoing global rearrangements\n"); printf(" !"); for (j = 1; j <= (nonodes); j++) if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } bestlike = bestyet; do { gotlike = bestlike; if (progress) printf(" "); for (j = 0; j < (nonodes); j++) { there = curtree.root; bestyet = -DBL_MAX; item = curtree.nodep[j]; if (item != curtree.root) { re_move(&item, &nufork); there = curtree.root; addpreorder(curtree.root, item, nufork); add(there, item, nufork); } if (progress) { if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); } } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } } while (bestlike > gotlike); if (njumble > 1) { if (jumb == 1 || (jumb > 1 && bestlike > bstlike2)) { copy_(&curtree, &bestree); bstlike2 = bestlike; } } } } if (njumble == jumb) { if (njumble > 1) copy_(&bestree, &curtree); evaluate(curtree.root); printree(curtree.root, treeprint, false, true); describe(); } } else { input_data(); if (treeprint) fprintf(outfile, "\n\nUser-defined trees:\n\n"); names = (boolean *)Malloc(spp*sizeof(boolean)); which = 1; while (which <= numtrees ) { treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread2 (&treestr, &curtree.root, curtree.nodep, lengths, &trweight, &goteof, &haslengths, &spp,false,nonodes); evaluate(curtree.root); printree(curtree.root, treeprint, false, true); describe(); which++; } FClose(intree); free(names); } if (jumb == njumble && progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n", outtreename); } } /* maketree */ int main(int argc, Char *argv[]) { /* Fitch-Margoliash criterion with contemporary tips */ #ifdef MAC argc = 1; /* macsetup("Kitsch",""); */ argv[0] = "Kitsch"; #endif init(argc,argv); emboss_getoptions("fkitsch",argc,argv); /* reads in spp, options, and the data, then calls maketree to construct the tree */ progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= datasets; ith++) { if (datasets > 1) { fprintf(outfile, "\nData set # %ld:\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } getinput(); for (jumb = 1; jumb <= njumble; jumb++) maketree(); firstset = false; } FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif printf("\nDone.\n\n"); embExit(); return 0; } /* Fitch-Margoliash criterion with contemporary tips */ PHYLIPNEW-3.69.650/src/fitch.c0000664000175000017500000006610511325562224012330 00000000000000 #include "phylip.h" #include "dist.h" #include "float.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define zsmoothings 10 /* number of zero-branch correction iterations */ #define epsilonf 0.000001 /* a very small but not too small number */ #define delta 0.01 /* a not quite so small number */ #define MAXNUMTREES 100000000 /* a number bigger than conceivable numtrees */ AjPPhyloDist* phylodists = NULL; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(void); void fitch_getinput(void); void secondtraverse(node *, double , long *, double *); void firsttraverse(node *, long *, double *); double evaluate(tree *); void nudists(node *, node *); void makedists(node *); void makebigv(node *); void correctv(node *); void alter(node *, node *); void nuview(node *); void update(node *); void smooth(node *); void filltraverse(node *, node *, boolean); void fillin(node *, node *, boolean); void insert_(node *, node *, boolean); void copynode(node *, node *); void copy_(tree *, tree *); void setuptipf(long, tree *); void buildnewtip(long , tree *, long); void buildsimpletree(tree *, long); void addtraverse(node *, node *, boolean, long *, boolean *); void re_move(node **, node **); void rearrange(node *, long *, long *, boolean *); void describe(node *); void summarize(long); void nodeinit(node *); void initrav(node *); void treevaluate(void); void maketree(void); void globrearrange(long* numtrees,boolean* succeeded); /* function prototypes */ #endif const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; Char infilename[FNMLNGTH], intreename[FNMLNGTH]; long nonodes2, outgrno, nums, col, datasets, ith, njumble, jumb=0, numtrees; long inseed; vector *x; intvector *reps; boolean minev, global, jumble, lengths, usertree, lower, upper, negallowed, outgropt, replicates, trout, printdata, progress, treeprint, mulsets, firstset; double power; double trweight; /* to make treeread happy */ boolean goteof, haslengths; /* ditto ... */ boolean first; /* ditto ... */ node *addwhere; longer seed; long *enterorder; tree curtree, priortree, bestree, bestree2; Char ch; char *progname; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr matrixtype = NULL; long inseed0=0; minev = false; global = false; jumble = false; njumble = 1; lengths = false; lower = false; negallowed = false; outgrno = 1; outgropt = false; power = 2.0; replicates = false; trout = true; upper = false; usertree = false; printdata = false; progress = true; treeprint = true; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylodists = ajAcdGetDistances("datafile"); while (phylodists[datasets]) datasets++; minev = ajAcdGetBoolean("minev"); phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; } power = ajAcdGetFloat("power"); if(minev) negallowed = true; else negallowed = ajAcdGetBoolean("negallowed"); matrixtype = ajAcdGetListSingle("matrixtype"); if(ajStrMatchC(matrixtype, "l")) lower = true; else if(ajStrMatchC(matrixtype, "u")) upper = true; replicates = ajAcdGetBoolean("replicates"); if(!usertree) { global = ajAcdGetBoolean("global"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); embossoutfile = ajAcdGetOutfile("outfile"); embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) emboss_openfile(embossouttree, &outtree, &outtreename); /* printf("\n inseed: %ld",(inseed)); printf("\n global: %s",(global ? "true" : "false")); printf("\n jumble: %s",(jumble ? "true" : "false")); printf("\n njumble: %ld",(njumble)); printf("\n lengths: %s",(lengths ? "true" : "false")); printf("\n lower: %s",(lower ? "true" : "false")); printf("\n negallowed: %s",(negallowed ? "true" : "false")); printf("\n outgrno: %ld",(outgrno)); printf("\n outgropt: %s",(outgropt ? "true" : "false")); printf("\n power: %f",(power)); printf("\n replicates: %s",(replicates ? "true" : "false")); printf("\n trout: %s",(trout ? "true" : "false")); printf("\n upper: %s",(upper ? "true" : "false")); printf("\n usertree: %s",(usertree ? "true" : "false")); printf("\n printdata: %s",(printdata ? "true" : "false")); printf("\n progress: %s",(progress ? "true" : "false")); printf("\n treeprint: %s",(treeprint ? "true" : "false")); printf("\n mulsets: %s",(mulsets ? "true" : "false")); printf("\n datasets: %ld",(datasets)); */ } /* emboss_getoptions */ void allocrest() { long i; x = (vector *)Malloc(spp*sizeof(vector)); reps = (intvector *)Malloc(spp*sizeof(intvector)); for (i=0;i 1) { alloctree(&bestree2.nodep, nonodes2); allocd(nonodes2, bestree2.nodep); allocw(nonodes2, bestree2.nodep); } } allocrest(); } /* doinit */ void inputoptions() { /* print options information */ if (!firstset) samenumspseq2(phylodists[ith-1], ith); fprintf(outfile, "\nFitch-Margoliash method version %s\n\n",VERSION); if (minev) fprintf(outfile, "Minimum evolution method option\n\n"); fprintf(outfile, " __ __ 2\n"); fprintf(outfile, " \\ \\ (Obs - Exp)\n"); fprintf(outfile, "Sum of squares = /_ /_ ------------\n"); fprintf(outfile, " "); if (power == (long)power) fprintf(outfile, "%2ld\n", (long)power); else fprintf(outfile, "%4.1f\n", power); fprintf(outfile, " i j Obs\n\n"); fprintf(outfile, "Negative branch lengths "); if (!negallowed) fprintf(outfile, "not "); fprintf(outfile, "allowed\n\n"); if (global) fprintf(outfile, "global optimization\n\n"); } /* inputoptions */ void fitch_getinput() { /* reads the input data */ inputoptions(); } /* fitch_getinput */ void secondtraverse(node *q, double y, long *nx, double *sum) { /* from each of those places go back to all others */ /* nx comes from firsttraverse */ /* sum comes from evaluate via firsttraverse */ double z=0.0, TEMP=0.0; z = y + q->v; if (q->tip) { TEMP = q->d[(*nx) - 1] - z; *sum += q->w[(*nx) - 1] * (TEMP * TEMP); } else { secondtraverse(q->next->back, z, nx, sum); secondtraverse(q->next->next->back, z, nx,sum); } } /* secondtraverse */ void firsttraverse(node *p, long *nx, double *sum) { /* go through tree calculating branch lengths */ if (minev && (p != curtree.start)) *sum += p->v; if (p->tip) { if (!minev) { *nx = p->index; secondtraverse(p->back, 0.0, nx, sum); } } else { firsttraverse(p->next->back, nx,sum); firsttraverse(p->next->next->back, nx,sum); } } /* firsttraverse */ double evaluate(tree *t) { double sum=0.0; long nx=0; /* evaluate likelihood of a tree */ firsttraverse(t->start->back ,&nx, &sum); firsttraverse(t->start, &nx, &sum); if ((!minev) && replicates && (lower || upper)) sum /= 2; t->likelihood = -sum; return (-sum); } /* evaluate */ void nudists(node *x, node *y) { /* compute distance between an interior node and tips */ long nq=0, nr=0, nx=0, ny=0; double dil=0, djl=0, wil=0, wjl=0, vi=0, vj=0; node *qprime, *rprime; qprime = x->next; rprime = qprime->next->back; qprime = qprime->back; ny = y->index; dil = qprime->d[ny - 1]; djl = rprime->d[ny - 1]; wil = qprime->w[ny - 1]; wjl = rprime->w[ny - 1]; vi = qprime->v; vj = rprime->v; x->w[ny - 1] = wil + wjl; if (wil + wjl <= 0.0) x->d[ny - 1] = 0.0; else x->d[ny - 1] = ((dil - vi) * wil + (djl - vj) * wjl) / (wil + wjl); nx = x->index; nq = qprime->index; nr = rprime->index; dil = y->d[nq - 1]; djl = y->d[nr - 1]; wil = y->w[nq - 1]; wjl = y->w[nr - 1]; y->w[nx - 1] = wil + wjl; if (wil + wjl <= 0.0) y->d[nx - 1] = 0.0; else y->d[nx - 1] = ((dil - vi) * wil + (djl - vj) * wjl) / (wil + wjl); } /* nudists */ void makedists(node *p) { /* compute distances among three neighbors of a node */ long i=0, nr=0, ns=0; node *q, *r, *s; r = p->back; nr = r->index; for (i = 1; i <= 3; i++) { q = p->next; s = q->back; ns = s->index; if (s->w[nr - 1] + r->w[ns - 1] <= 0.0) p->dist = 0.0; else p->dist = (s->w[nr - 1] * s->d[nr - 1] + r->w[ns - 1] * r->d[ns - 1]) / (s->w[nr - 1] + r->w[ns - 1]); p = q; r = s; nr = ns; } } /* makedists */ void makebigv(node *p) { /* make new branch length */ long i=0; node *temp, *q, *r; q = p->next; r = q->next; for (i = 1; i <= 3; i++) { if (p->iter) { p->v = (p->dist + r->dist - q->dist) / 2.0; p->back->v = p->v; } temp = p; p = q; q = r; r = temp; } } /* makebigv */ void correctv(node *p) { /* iterate branch lengths if some are to be zero */ node *q, *r, *temp; long i=0, j=0, n=0, nq=0, nr=0, ntemp=0; double wq=0.0, wr=0.0; q = p->next; r = q->next; n = p->back->index; nq = q->back->index; nr = r->back->index; for (i = 1; i <= zsmoothings; i++) { for (j = 1; j <= 3; j++) { if (p->iter) { wr = r->back->w[n - 1] + p->back->w[nr - 1]; wq = q->back->w[n - 1] + p->back->w[nq - 1]; if (wr + wq <= 0.0 && !negallowed) p->v = 0.0; else p->v = ((p->dist - q->v) * wq + (r->dist - r->v) * wr) / (wr + wq); if (p->v < 0 && !negallowed) p->v = 0.0; p->back->v = p->v; } temp = p; p = q; q = r; r = temp; ntemp = n; n = nq; nq = nr; nr = ntemp; } } } /* correctv */ void alter(node *x, node *y) { /* traverse updating these views */ nudists(x, y); if (!y->tip) { alter(x, y->next->back); alter(x, y->next->next->back); } } /* alter */ void nuview(node *p) { /* renew information about subtrees */ long i=0; node *q, *r, *pprime, *temp; q = p->next; r = q->next; for (i = 1; i <= 3; i++) { temp = p; pprime = p->back; alter(p, pprime); p = q; q = r; r = temp; } } /* nuview */ void update(node *p) { /* update branch lengths around a node */ if (p->tip) return; makedists(p); if (p->iter || p->next->iter || p->next->next->iter) { makebigv(p); correctv(p); } nuview(p); } /* update */ void smooth(node *p) { /* go through tree getting new branch lengths and views */ if (p->tip) return; update(p); smooth(p->next->back); smooth(p->next->next->back); } /* smooth */ void filltraverse(node *pb, node *qb, boolean contin) { if (qb->tip) return; if (contin) { filltraverse(pb, qb->next->back,contin); filltraverse(pb, qb->next->next->back,contin); nudists(qb, pb); return; } if (!qb->next->back->tip) nudists(qb->next->back, pb); if (!qb->next->next->back->tip) nudists(qb->next->next->back, pb); } /* filltraverse */ void fillin(node *pa, node *qa, boolean contin) { if (!pa->tip) { fillin(pa->next->back, qa, contin); fillin(pa->next->next->back, qa, contin); } filltraverse(pa, qa, contin); } /* fillin */ void insert_(node *p, node *q, boolean contin_) { /* put p and q together and iterate info. on resulting tree */ double x=0.0, oldlike; hookup(p->next->next, q->back); hookup(p->next, q); x = q->v / 2.0; p->v = 0.0; p->back->v = 0.0; p->next->v = x; p->next->back->v = x; p->next->next->back->v = x; p->next->next->v = x; fillin(p->back, p, contin_); evaluate(&curtree); do { oldlike = curtree.likelihood; smooth(p); smooth(p->back); evaluate(&curtree); } while (fabs(curtree.likelihood - oldlike) > delta); } /* insert_ */ void copynode(node *c, node *d) { /* make a copy of a node */ memcpy(d->d, c->d, nonodes2*sizeof(double)); memcpy(d->w, c->w, nonodes2*sizeof(double)); d->v = c->v; d->iter = c->iter; d->dist = c->dist; d->xcoord = c->xcoord; d->ycoord = c->ycoord; d->ymin = c->ymin; d->ymax = c->ymax; } /* copynode */ void copy_(tree *a, tree *b) { /* make copy of a tree a to tree b */ long i, j=0; node *p, *q; for (i = 0; i < spp; i++) { copynode(a->nodep[i], b->nodep[i]); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes2; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { copynode(p, q); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->likelihood = a->likelihood; b->start = a->start; } /* copy_ */ void setuptipf(long m, tree *t) { /* initialize branch lengths and views in a tip */ long i=0; intvector n=(long *)Malloc(spp * sizeof(long)); node *WITH; WITH = t->nodep[m - 1]; memcpy(WITH->d, x[m - 1], (nonodes2 * sizeof(double))); memcpy(n, reps[m - 1], (spp * sizeof(long))); for (i = 0; i < spp; i++) { if (i + 1 != m && n[i] > 0) { if (WITH->d[i] < epsilonf) WITH->d[i] = epsilonf; WITH->w[i] = n[i] / exp(power * log(WITH->d[i])); } else { WITH->w[i] = 0.0; WITH->d[i] = 0.0; } } for (i = spp; i < nonodes2; i++) { WITH->w[i] = 1.0; WITH->d[i] = 0.0; } WITH->index = m; if (WITH->iter) WITH->v = 0.0; free(n); } /* setuptipf */ void buildnewtip(long m, tree *t, long nextsp) { /* initialize and hook up a new tip */ node *p; setuptipf(m, t); p = t->nodep[nextsp + spp - 3]; hookup(t->nodep[m - 1], p); } /* buildnewtip */ void buildsimpletree(tree *t, long nextsp) { /* make and initialize a three-species tree */ curtree.start=curtree.nodep[enterorder[0] - 1]; setuptipf(enterorder[0], t); setuptipf(enterorder[1], t); hookup(t->nodep[enterorder[0] - 1], t->nodep[enterorder[1] - 1]); buildnewtip(enterorder[2], t, nextsp); insert_(t->nodep[enterorder[2] - 1]->back, t->nodep[enterorder[0] - 1], false); } /* buildsimpletree */ void addtraverse(node *p, node *q, boolean contin, long *numtrees, boolean *succeeded) { /* traverse through a tree, finding best place to add p */ insert_(p, q, true); (*numtrees)++; if (evaluate(&curtree) > (bestree.likelihood + epsilonf * fabs(bestree.likelihood))){ copy_(&curtree, &bestree); addwhere = q; (*succeeded)=true; } copy_(&priortree, &curtree); if (!q->tip && contin) { addtraverse(p, q->next->back, contin,numtrees,succeeded); addtraverse(p, q->next->next->back, contin,numtrees,succeeded); } } /* addtraverse */ void re_move(node **p, node **q) { /* re_move p and record in q where it was */ *q = (*p)->next->back; hookup(*q, (*p)->next->next->back); (*p)->next->back = NULL; (*p)->next->next->back = NULL; update(*q); update((*q)->back); } /* re_move */ void globrearrange(long* numtrees,boolean* succeeded) { /* does global rearrangements */ tree globtree; tree oldtree; int i,j,k,num_sibs,num_sibs2; node *where,*sib_ptr,*sib_ptr2; double oldbestyet = curtree.likelihood; int success = false; alloctree(&globtree.nodep,nonodes2); alloctree(&oldtree.nodep,nonodes2); setuptree(&globtree,nonodes2); setuptree(&oldtree,nonodes2); allocd(nonodes2, globtree.nodep); allocd(nonodes2, oldtree.nodep); allocw(nonodes2, globtree.nodep); allocw(nonodes2, oldtree.nodep); copy_(&curtree,&globtree); copy_(&curtree,&oldtree); for ( i = spp ; i < nonodes2 ; i++ ) { num_sibs = count_sibs(curtree.nodep[i]); sib_ptr = curtree.nodep[i]; if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); for ( j = 0 ; j <= num_sibs ; j++ ) { re_move(&sib_ptr,&where); copy_(&curtree,&priortree); if (where->tip) { copy_(&oldtree,&curtree); copy_(&oldtree,&bestree); sib_ptr=sib_ptr->next; continue; } else num_sibs2 = count_sibs(where); sib_ptr2 = where; for ( k = 0 ; k < num_sibs2 ; k++ ) { addwhere = NULL; addtraverse(sib_ptr,sib_ptr2->back,true,numtrees,succeeded); if ( addwhere && where != addwhere && where->back != addwhere && bestree.likelihood > globtree.likelihood) { copy_(&bestree,&globtree); success = true; } sib_ptr2 = sib_ptr2->next; } copy_(&oldtree,&curtree); copy_(&oldtree,&bestree); sib_ptr = sib_ptr->next; } } copy_(&globtree,&curtree); copy_(&globtree,&bestree); if (success && globtree.likelihood > oldbestyet) { *succeeded = true; } else { *succeeded = false; } freed(nonodes2, globtree.nodep); freed(nonodes2, oldtree.nodep); freew(nonodes2, globtree.nodep); freew(nonodes2, oldtree.nodep); freetree(&globtree.nodep,nonodes2); freetree(&oldtree.nodep,nonodes2); } void rearrange(node *p, long *numtrees, long *nextsp, boolean *succeeded) { node *q, *r; if (!p->tip && !p->back->tip) { r = p->next->next; re_move(&r, &q); copy_(&curtree, &priortree); addtraverse(r, q->next->back, false, numtrees,succeeded); addtraverse(r, q->next->next->back, false, numtrees,succeeded); copy_(&bestree, &curtree); if (global && ((*nextsp) == spp)) { putchar('.'); fflush(stdout); } } if (!p->tip) { rearrange(p->next->back, numtrees,nextsp,succeeded); rearrange(p->next->next->back, numtrees,nextsp,succeeded); } } /* rearrange */ void describe(node *p) { /* print out information for one branch */ long i=0; node *q; q = p->back; fprintf(outfile, "%4ld ", q->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, "%15.5f\n", q->v); if (!p->tip) { describe(p->next->back); describe(p->next->next->back); } } /* describe */ void summarize(long numtrees) { /* print out branch lengths etc. */ long i, j, totalnum; fprintf(outfile, "\nremember:"); if (outgropt) fprintf(outfile, " (although rooted by outgroup)"); fprintf(outfile, " this is an unrooted tree!\n\n"); if (!minev) fprintf(outfile, "Sum of squares = %11.5f\n\n", -curtree.likelihood); else fprintf(outfile, "Sum of branch lengths = %11.5f\n\n", -curtree.likelihood); if ((power == 2.0) && !minev) { totalnum = 0; for (i = 1; i <= nums; i++) { for (j = 1; j <= nums; j++) { if (i != j) totalnum += reps[i - 1][j - 1]; } } fprintf(outfile, "Average percent standard deviation = "); fprintf(outfile, "%11.5f\n\n", 100 * sqrt(-curtree.likelihood / (totalnum - 2))); } fprintf(outfile, "Between And Length\n"); fprintf(outfile, "------- --- ------\n"); describe(curtree.start->next->back); describe(curtree.start->next->next->back); describe(curtree.start->back); fprintf(outfile, "\n\n"); if (trout) { col = 0; treeout(curtree.start, &col, 0.43429445222, true, curtree.start); } } /* summarize */ void nodeinit(node *p) { /* initialize a node */ long i, j; for (i = 1; i <= 3; i++) { for (j = 0; j < nonodes2; j++) { p->w[j] = 1.0; p->d[j] = 0.0; } p = p->next; } if ((!lengths) || p->iter) p->v = 1.0; if ((!lengths) || p->back->iter) p->back->v = 1.0; } /* nodeinit */ void initrav(node *p) { /* traverse to initialize */ if (p->tip) return; nodeinit(p); initrav(p->next->back); initrav(p->next->next->back); } /* initrav */ void treevaluate() { /* evaluate user-defined tree, iterating branch lengths */ long i; double oldlike; for (i = 1; i <= spp; i++) setuptipf(i, &curtree); unroot(&curtree,nonodes2); initrav(curtree.start); if (curtree.start->back != NULL) { initrav(curtree.start->back); evaluate(&curtree); do { oldlike = curtree.likelihood; smooth(curtree.start); evaluate(&curtree); } while (fabs(curtree.likelihood - oldlike) > delta); } evaluate(&curtree); } /* treevaluate */ void maketree() { /* contruct the tree */ long nextsp; boolean succeeded=false; long i, j, which; char* treestr; if (usertree) { dist_inputdata(phylodists[ith-1], replicates, printdata, lower, upper, x, reps); setuptree(&curtree, nonodes2); for (which = 1; which <= spp; which++) setuptipf(which, &curtree); if (numtrees > MAXNUMTREES) { printf("\nERROR: number of input trees is read incorrectly from %s\n", intreename); embExitBad(); } if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } first = true; which = 1; while (which <= numtrees) { treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread2 (&treestr, &curtree.start, curtree.nodep, lengths, &trweight, &goteof, &haslengths, &spp,false,nonodes2); nums = spp; curtree.start = curtree.nodep[outgrno - 1]->back; treevaluate(); printree(curtree.start, treeprint, false, false); summarize(numtrees); clear_connections(&curtree,nonodes2); which++; } FClose(intree); } else { if (jumb == 1) { dist_inputdata(phylodists[ith-1], replicates, printdata, lower, upper, x, reps); setuptree(&curtree, nonodes2); setuptree(&priortree, nonodes2); setuptree(&bestree, nonodes2); if (njumble > 1) setuptree(&bestree2, nonodes2); } for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); nextsp = 3; buildsimpletree(&curtree, nextsp); curtree.start = curtree.nodep[enterorder[0] - 1]->back; if (jumb == 1) numtrees = 1; nextsp = 4; if (progress) { printf("Adding species:\n"); writename(0, 3, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } while (nextsp <= spp) { nums = nextsp; buildnewtip(enterorder[nextsp - 1], &curtree, nextsp); copy_(&curtree, &priortree); bestree.likelihood = -DBL_MAX; curtree.start = curtree.nodep[enterorder[0] - 1]->back; addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, curtree.start, true, &numtrees,&succeeded); copy_(&bestree, &curtree); if (progress) { writename(nextsp - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (global && nextsp == spp) { if (progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = spp; j < nonodes2; j++) if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); printf(" "); } } succeeded = true; while (succeeded) { succeeded = false; curtree.start = curtree.nodep[enterorder[0] - 1]->back; if (nextsp == spp && global) globrearrange (&numtrees,&succeeded); else{ rearrange(curtree.start,&numtrees,&nextsp,&succeeded); } if (global && ((nextsp) == spp) && progress) printf("\n "); } if (global && nextsp == spp) { putc('\n', outfile); if (progress) putchar('\n'); } if (njumble > 1) { if (jumb == 1 && nextsp == spp) copy_(&bestree, &bestree2); else if (nextsp == spp) { if (bestree2.likelihood < bestree.likelihood) copy_(&bestree, &bestree2); } } if (nextsp == spp && jumb == njumble) { if (njumble > 1) copy_(&bestree2, &curtree); curtree.start = curtree.nodep[outgrno - 1]->back; printree(curtree.start, treeprint, true, false); summarize(numtrees); } nextsp++; } } if (jumb == njumble && progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) { printf("\nTree also written onto file \"%s\"\n", outtreename); } } } /* maketree */ int main(int argc, Char *argv[]) { int i; #ifdef MAC argc = 1; /* macsetup("Fitch",""); */ argv[0]="Fitch"; #endif init(argc,argv); emboss_getoptions("ffitch",argc,argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (i=0;i 1) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("\nData set # %ld:\n\n",ith); } fitch_getinput(); for (jumb = 1; jumb <= njumble; jumb++) maketree(); firstset = false; } if (trout) FClose(outtree); FClose(outfile); FClose(infile); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/neighbor.c0000664000175000017500000003251411305225544013024 00000000000000 /* version 3.6. (c) Copyright 1993-2005 by the University of Washington. Written by Mary Kuhner, Jon Yamato, Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include #include "phylip.h" #include "dist.h" AjPPhyloDist* phylodists = NULL; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(void); void getinput(void); void describe(node *, double); void summarize(void); void nodelabel(boolean); void jointree(void); void maketree(void); void freerest(void); /* function prototypes */ #endif const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; Char infilename[FNMLNGTH]; long nonodes2, outgrno, col, datasets, ith; long inseed; vector *x; intvector *reps; boolean jumble, lower, upper, outgropt, replicates, trout, printdata, progress, treeprint, mulsets, njoin; tree curtree; longer seed; long *enterorder; Char progname[20]; /* variables for maketree, propagated globally for C version: */ node **cluster; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr matrixtype = NULL; AjPStr treetype=NULL; long inseed0 = 0; putchar('\n'); jumble = false; lower = false; outgrno = 1; outgropt = false; replicates = false; trout = true; upper = false; printdata = false; progress = true; treeprint = true; njoin = true; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); matrixtype = ajAcdGetListSingle("matrixtype"); if(ajStrMatchC(matrixtype, "l")) lower = true; else if(ajStrMatchC(matrixtype, "u")) upper = true; phylodists = ajAcdGetDistances("datafile"); treetype = ajAcdGetListSingle("treetype"); if(ajStrMatchC(treetype, "n")) njoin = true; else if(ajStrMatchC(treetype, "u")) njoin = false; if(njoin) { outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; } replicates = ajAcdGetBoolean("replicates"); jumble = ajAcdGetToggle("jumble"); if(jumble) { inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); embossoutfile = ajAcdGetOutfile("outfile"); embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) emboss_openfile(embossouttree, &outtree, &outtreename); fprintf(outfile, "\nNeighbor-Joining/UPGMA method version %s\n\n",VERSION); } /* emboss_getoptions */ void allocrest() { long i; x = (vector *)Malloc(spp*sizeof(vector)); for (i = 0; i < spp; i++) x[i] = (vector)Malloc(spp*sizeof(double)); reps = (intvector *)Malloc(spp*sizeof(intvector)); for (i = 0; i < spp; i++) reps[i] = (intvector)Malloc(spp*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); cluster = (node **)Malloc(spp*sizeof(node *)); } /* allocrest */ void freerest() { long i; for (i = 0; i < spp; i++) free(x[i]); free(x); for (i = 0; i < spp; i++) free(reps[i]); free(reps); free(nayme); free(enterorder); free(cluster); } /* freerest */ void doinit() { /* initializes variables */ node *p; inputnumbers2seq(phylodists[0], &spp, &nonodes2, 2); nonodes2 += (njoin ? 0 : 1); alloctree(&curtree.nodep, nonodes2+1); p = curtree.nodep[nonodes2]->next; curtree.nodep[nonodes2]->next = curtree.nodep[nonodes2]; free(p->next); free(p); allocrest(); } /* doinit */ void inputoptions() { /* read options information */ if (ith != 1) samenumspseq2(phylodists[ith-1], ith); putc('\n', outfile); if (njoin) fprintf(outfile, " Neighbor-joining method\n"); else fprintf(outfile, " UPGMA method\n"); fprintf(outfile, "\n Negative branch lengths allowed\n\n"); } /* inputoptions */ void describe(node *p, double height) { /* print out information for one branch */ long i; node *q; q = p->back; if (njoin) fprintf(outfile, "%4ld ", q->index - spp); else fprintf(outfile, "%4ld ", q->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index - 1][i], outfile); putc(' ', outfile); } else { if (njoin) fprintf(outfile, "%4ld ", p->index - spp); else { fprintf(outfile, "%4ld ", p->index - spp); } } if (njoin) fprintf(outfile, "%12.5f\n", q->v); else fprintf(outfile, "%10.5f %10.5f\n", q->v, q->v+height); if (!p->tip) { describe(p->next->back, height+q->v); describe(p->next->next->back, height+q->v); } } /* describe */ void summarize() { /* print out branch lengths etc. */ putc('\n', outfile); if (njoin) { fprintf(outfile, "remember:"); if (outgropt) fprintf(outfile, " (although rooted by outgroup)"); fprintf(outfile, " this is an unrooted tree!\n"); } if (njoin) { fprintf(outfile, "\nBetween And Length\n"); fprintf(outfile, "------- --- ------\n"); } else { fprintf(outfile, "From To Length Height\n"); fprintf(outfile, "---- -- ------ ------\n"); } describe(curtree.start->next->back, 0.0); describe(curtree.start->next->next->back, 0.0); if (njoin) describe(curtree.start->back, 0.0); fprintf(outfile, "\n\n"); } /* summarize */ void nodelabel(boolean isnode) { if (isnode) printf("node"); else printf("species"); } /* nodelabel */ void jointree() { /* calculate the tree */ long nc, nextnode, mini=0, minj=0, i, j, ia, ja, ii, jj, nude, iter; double fotu2, total, tmin, dio, djo, bi, bj, bk, dmin=0, da; long el[3]; vector av; intvector oc; double *R; /* added in revisions by Y. Ina */ R = (double *)Malloc(spp * sizeof(double)); for (i = 0; i <= spp - 2; i++) { for (j = i + 1; j < spp; j++) { da = (x[i][j] + x[j][i]) / 2.0; x[i][j] = da; x[j][i] = da; } } /* First initialization */ fotu2 = spp - 2.0; nextnode = spp + 1; av = (vector)Malloc(spp*sizeof(double)); oc = (intvector)Malloc(spp*sizeof(long)); for (i = 0; i < spp; i++) { av[i] = 0.0; oc[i] = 1; } /* Enter the main cycle */ if (njoin) iter = spp - 3; else iter = spp - 1; for (nc = 1; nc <= iter; nc++) { for (j = 2; j <= spp; j++) { for (i = 0; i <= j - 2; i++) x[j - 1][i] = x[i][j - 1]; } tmin = DBL_MAX; /* Compute sij and minimize */ if (njoin) { /* many revisions by Y. Ina from here ... */ for (i = 0; i < spp; i++) R[i] = 0.0; for (ja = 2; ja <= spp; ja++) { jj = enterorder[ja - 1]; if (cluster[jj - 1] != NULL) { for (ia = 0; ia <= ja - 2; ia++) { ii = enterorder[ia]; if (cluster[ii - 1] != NULL) { R[ii - 1] += x[ii - 1][jj - 1]; R[jj - 1] += x[ii - 1][jj - 1]; } } } } } /* ... to here */ for (ja = 2; ja <= spp; ja++) { jj = enterorder[ja - 1]; if (cluster[jj - 1] != NULL) { for (ia = 0; ia <= ja - 2; ia++) { ii = enterorder[ia]; if (cluster[ii - 1] != NULL) { if (njoin) { total = fotu2 * x[ii - 1][jj - 1] - R[ii - 1] - R[jj - 1]; /* this statement part of revisions by Y. Ina */ } else total = x[ii - 1][jj - 1]; if (total < tmin) { tmin = total; mini = ii; minj = jj; } } } } } /* compute lengths and print */ if (njoin) { dio = 0.0; djo = 0.0; for (i = 0; i < spp; i++) { dio += x[i][mini - 1]; djo += x[i][minj - 1]; } dmin = x[mini - 1][minj - 1]; dio = (dio - dmin) / fotu2; djo = (djo - dmin) / fotu2; bi = (dmin + dio - djo) * 0.5; bj = dmin - bi; bi -= av[mini - 1]; bj -= av[minj - 1]; } else { bi = x[mini - 1][minj - 1] / 2.0 - av[mini - 1]; bj = x[mini - 1][minj - 1] / 2.0 - av[minj - 1]; av[mini - 1] += bi; } if (progress) { printf("Cycle %3ld: ", iter - nc + 1); if (njoin) nodelabel((boolean)(av[mini - 1] > 0.0)); else nodelabel((boolean)(oc[mini - 1] > 1.0)); printf(" %ld (%10.5f) joins ", mini, bi); if (njoin) nodelabel((boolean)(av[minj - 1] > 0.0)); else nodelabel((boolean)(oc[minj - 1] > 1.0)); printf(" %ld (%10.5f)\n", minj, bj); #ifdef WIN32 phyFillScreenColor(); #endif } hookup(curtree.nodep[nextnode - 1]->next, cluster[mini - 1]); hookup(curtree.nodep[nextnode - 1]->next->next, cluster[minj - 1]); cluster[mini - 1]->v = bi; cluster[minj - 1]->v = bj; cluster[mini - 1]->back->v = bi; cluster[minj - 1]->back->v = bj; cluster[mini - 1] = curtree.nodep[nextnode - 1]; cluster[minj - 1] = NULL; nextnode++; if (njoin) av[mini - 1] = dmin * 0.5; /* re-initialization */ fotu2 -= 1.0; for (j = 0; j < spp; j++) { if (cluster[j] != NULL) { if (njoin) { da = (x[mini - 1][j] + x[minj - 1][j]) * 0.5; if (mini - j - 1 < 0) x[mini - 1][j] = da; if (mini - j - 1 > 0) x[j][mini - 1] = da; } else { da = x[mini - 1][j] * oc[mini - 1] + x[minj - 1][j] * oc[minj - 1]; da /= oc[mini - 1] + oc[minj - 1]; x[mini - 1][j] = da; x[j][mini - 1] = da; } } } for (j = 0; j < spp; j++) { x[minj - 1][j] = 0.0; x[j][minj - 1] = 0.0; } oc[mini - 1] += oc[minj - 1]; } /* the last cycle */ nude = 1; for (i = 1; i <= spp; i++) { if (cluster[i - 1] != NULL) { el[nude - 1] = i; nude++; } } if (!njoin) { curtree.start = cluster[el[0] - 1]; curtree.start->back = NULL; free(av); free(oc); return; } bi = (x[el[0] - 1][el[1] - 1] + x[el[0] - 1][el[2] - 1] - x[el[1] - 1] [el[2] - 1]) * 0.5; bj = x[el[0] - 1][el[1] - 1] - bi; bk = x[el[0] - 1][el[2] - 1] - bi; bi -= av[el[0] - 1]; bj -= av[el[1] - 1]; bk -= av[el[2] - 1]; if (progress) { printf("last cycle:\n"); putchar(' '); nodelabel((boolean)(av[el[0] - 1] > 0.0)); printf(" %ld (%10.5f) joins ", el[0], bi); nodelabel((boolean)(av[el[1] - 1] > 0.0)); printf(" %ld (%10.5f) joins ", el[1], bj); nodelabel((boolean)(av[el[2] - 1] > 0.0)); printf(" %ld (%10.5f)\n", el[2], bk); #ifdef WIN32 phyFillScreenColor(); #endif } hookup(curtree.nodep[nextnode - 1], cluster[el[0] - 1]); hookup(curtree.nodep[nextnode - 1]->next, cluster[el[1] - 1]); hookup(curtree.nodep[nextnode - 1]->next->next, cluster[el[2] - 1]); cluster[el[0] - 1]->v = bi; cluster[el[1] - 1]->v = bj; cluster[el[2] - 1]->v = bk; cluster[el[0] - 1]->back->v = bi; cluster[el[1] - 1]->back->v = bj; cluster[el[2] - 1]->back->v = bk; curtree.start = cluster[el[0] - 1]->back; free(av); free(oc); } /* jointree */ void maketree() { /* construct the tree */ long i ; dist_inputdata(phylodists[ith-1], replicates, printdata, lower, upper, x, reps); if (njoin && (spp < 3)) { printf("\nERROR: Neighbor-Joining runs must have at least 3 species\n\n"); embExitBad(); } if (progress) putchar('\n'); if (ith == 1) setuptree(&curtree, nonodes2 + 1); for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); for (i = 0; i < spp; i++) cluster[i] = curtree.nodep[i]; jointree(); if (njoin) curtree.start = curtree.nodep[outgrno - 1]->back; printree(curtree.start, treeprint, njoin, (boolean)(!njoin)); if (treeprint) summarize(); if (trout) { col = 0; if (njoin) treeout(curtree.start, &col, 0.43429448222, njoin, curtree.start); else curtree.root = curtree.start, treeoutr(curtree.start,&col,&curtree); } if (progress) { printf("\nOutput written on file \"%s\"\n\n", outfilename); if (trout) printf("Tree written on file \"%s\"\n\n", outtreename); } } /* maketree */ int main(int argc, Char *argv[]) { /* main program */ #ifdef MAC argc = 1; /* macsetup("Neighbor",""); */ argv[0] = "Neighbor"; #endif init(argc, argv); emboss_getoptions("fneighbor",argc,argv); ibmpc = IBMCRT; ansi = ANSICRT; doinit(); ith = 1; while (phylodists[ith-1]) { if (ith > 1) { fprintf(outfile, "Data set # %ld:\n",ith); if (progress) printf("Data set # %ld:\n",ith); } inputoptions(); maketree(); ith++; } FClose(infile); FClose(outfile); FClose(outtree); freerest(); freetree(&curtree.nodep, nonodes2+1); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/mix.c0000664000175000017500000006463311305225544012033 00000000000000 #include "phylip.h" #include "disc.h" #include "wagner.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxtrees 100 /* maximum number of tied trees stored */ typedef long *placeptr; AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylomix = NULL; AjPPhyloTree* phylotrees = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(void); void doinput(void); void evaluate(node2 *); void reroot(node2 *); void savetraverse(node2 *); void savetree(void); void mix_addtree(long *pos); void mix_findtree(boolean *, long *, long, long *, long **); void tryadd(node2 *, node2 **, node2 **); void addpreorder(node2 *, node2 *, node2 *); void tryrearr(node2 *, node2 **, boolean *); void repreorder(node2 *, node2 **, boolean *); void rearrange(node2 **r); void mix_addelement(node2 **, long *, long *, boolean *, char**); void mix_treeread(void); void describe(void); void maketree(void); void reallocchars(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], intreename[FNMLNGTH], weightfilename[FNMLNGTH], ancfilename[FNMLNGTH], mixfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; node2 *root; long outgrno, msets, ith, njumble, jumb, numtrees; /* outgrno indicates outgroup */ long inseed, inseed0; boolean jumble, usertree, weights, ancvar, questions, allsokal, allwagner, mixture, trout, noroot, outgropt, didreroot, progress, treeprint, stepbox, ancseq, mulsets, firstset, justwts; boolean *ancone, *anczero, *ancone0, *anczero0; pointptr2 treenode; /* pointers to all nodes in tree */ double threshold; double *threshwt; bitptr wagner, wagner0; longer seed; long *enterorder; double **fsteps; char *guess; long **bestrees; steptr numsteps, numsone, numszero; gbit *garbage; char ch; char *progname; /* Local variables for maketree: */ long minwhich; double like, bestyet, bestlike, bstlike2, minsteps; boolean lastrearr,full; double nsteps[maxuser]; node2 *there; long fullset; bitptr steps, zeroanc, oneanc, fulzeroanc, empzeroanc; long *place, col; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr method = NULL; ajint numseqs=0; ajint numwts=0; jumble = false; njumble = 1; outgrno = 1; outgropt = false; trout = true; usertree = false; weights = false; justwts = false; ancvar = false; allsokal = false; allwagner = false; mixture = false; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); while (phylostates[numseqs]) numseqs++; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { while (phylotrees[numtrees]) numtrees++; usertree = true; } method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "w")) allwagner = true; else if(ajStrMatchC(method, "c")) allsokal = true; else if(ajStrMatchC(method, "m")) { mixture = allwagner = true; phylomix = ajAcdGetProperties("mixfile"); } if(!usertree) { njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; threshold = ajAcdGetFloat("threshold"); printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nMixed parsimony algorithm, version %s\n\n",VERSION); } /* emboss_getoptions */ void reallocchars() { long i; if (usertree) { for (i = 0; i < maxuser; i++) { free (fsteps[i]); fsteps[i] = (double *)Malloc(chars*sizeof(double)); } } free(extras); free(weight); free(threshwt); free(numsteps); free(numszero); free(numsone); free(guess); free(ancone); free(anczero); free(ancone0); free(anczero0); extras = (steptr)Malloc(chars*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); numsteps = (steptr)Malloc(chars*sizeof(long)); numszero = (steptr)Malloc(chars*sizeof(long)); numsone = (steptr)Malloc(chars*sizeof(long)); guess = (Char *)Malloc(chars*sizeof(Char)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); } void allocrest() { long i; if (usertree) { fsteps = (double **)Malloc(maxuser*sizeof(double *)); for (i = 0; i < maxuser; i++) fsteps[i] = (double *)Malloc(chars*sizeof(double)); } bestrees = (long **)Malloc(maxtrees*sizeof(long *)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1] = (long *)Malloc((spp+1)*sizeof(long)); extras = (steptr)Malloc(chars*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); numsteps = (steptr)Malloc(chars*sizeof(long)); numszero = (steptr)Malloc(chars*sizeof(long)); numsone = (steptr)Malloc(chars*sizeof(long)); guess = (Char *)Malloc(chars*sizeof(Char)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); wagner = (bitptr)Malloc(words*sizeof(long)); wagner0 = (bitptr)Malloc(words*sizeof(long)); place = (long *)Malloc(nonodes*sizeof(long)); steps = (bitptr)Malloc(words*sizeof(long)); zeroanc = (bitptr)Malloc(words*sizeof(long)); oneanc = (bitptr)Malloc(words*sizeof(long)); fulzeroanc = (bitptr)Malloc(words*sizeof(long)); empzeroanc = (bitptr)Malloc(words*sizeof(long)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); words = chars / bits + 1; if (printdata) fprintf(outfile, "%ld species, %ld characters\n\n", spp, chars); alloctree2(&treenode); setuptree2(treenode); allocrest(); } /* doinit */ void inputoptions() { /* input the information on the options */ long i; if(justwts){ if(firstset){ if (ancvar) inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); if (mixture) inputmixturestr(phylomix->Str[0], wagner0); } for (i = 0; i < (chars); i++) weight[i] = 1; inputweightsstr(phyloweights->Str[0], chars, weight, &weights); for (i = 0; i < (words); i++) { if (mixture) wagner[i] = wagner0[i]; else if (allsokal) wagner[i] = 0; else wagner[i] = (1L << (bits + 1)) - (1L << 1); } } else { if (!firstset) { samenumspstate(phylostates[ith-1], &chars, ith); reallocchars(); } for (i = 0; i < (chars); i++) weight[i] = 1; if (ancvar) inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); if (mixture) inputmixturestr(phylomix->Str[0], wagner0); if (weights) inputweightsstr(phyloweights->Str[0], chars, weight, &weights); for (i = 0; i < (words); i++) { if (mixture) wagner[i] = wagner0[i]; else if (allsokal) wagner[i] = 0; else wagner[i] = (1L << (bits + 1)) - (1L << 1); } } for (i = 0; i < (chars); i++) { if (!ancvar) { anczero[i] = true; ancone[i] = (((1L << (i % bits + 1)) & wagner[i / bits]) != 0); } else { anczero[i] = anczero0[i]; ancone[i] = ancone0[i]; } } noroot = true; questions = false; for (i = 0; i < (chars); i++) { if (weight[i] > 0) { noroot = (noroot && ancone[i] && anczero[i] && ((((1L << (i % bits + 1)) & wagner[i / bits]) != 0) || threshold <= 2.0)); } questions = (questions || (ancone[i] && anczero[i])); threshwt[i] = threshold * weight[i]; } } /* inputoptions */ void doinput() { /* reads the input data */ inputoptions(); if(!justwts || firstset) disc_inputdata2(phylostates[ith-1], treenode); } /* doinput */ void evaluate(node2 *r) { /* Determines the number of steps needed for a tree. This is the minimum number needed to evolve chars on this tree */ long i, stepnum, smaller; double sum, term; sum = 0.0; for (i = 0; i < (chars); i++) { numszero[i] = 0; numsone[i] = 0; } full = true; for (i = 0; i < (words); i++) zeroanc[i] = fullset; postorder(r, fullset, full, wagner, zeroanc); cpostorder(r, full, zeroanc, numszero, numsone); count(r->fulstte1, zeroanc, numszero, numsone); for (i = 0; i < (words); i++) zeroanc[i] = 0; full = false; postorder(r, fullset, full, wagner, zeroanc); cpostorder(r, full, zeroanc, numszero, numsone); count(r->empstte0, zeroanc, numszero, numsone); for (i = 0; i < (chars); i++) { smaller = spp * weight[i]; numsteps[i] = smaller; if (anczero[i]) { numsteps[i] = numszero[i]; smaller = numszero[i]; } if (ancone[i] && numsone[i] < smaller) numsteps[i] = numsone[i]; stepnum = numsteps[i] + extras[i]; if (stepnum <= threshwt[i]) term = stepnum; else term = threshwt[i]; sum += term; if (usertree && which <= maxuser) fsteps[which - 1][i] = term; guess[i] = '?'; if (!ancone[i] || (anczero[i] && numszero[i] < numsone[i])) guess[i] = '0'; else if (!anczero[i] || (ancone[i] && numsone[i] < numszero[i])) guess[i] = '1'; } if (usertree && which <= maxuser) { nsteps[which - 1] = sum; if (which == 1) { minwhich = 1; minsteps = sum; } else if (sum < minsteps) { minwhich = which; minsteps = sum; } } like = -sum; } /* evaluate */ void reroot(node2 *outgroup) { /* reorients tree, putting outgroup in desired position. */ node2 *p, *q; if (outgroup->back->index == root->index) return; p = root->next; q = root->next->next; p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; outgroup->back->back = q; outgroup->back = p; } /* reroot */ void savetraverse(node2 *p) { /* sets BOOLEANs that indicate which way is down */ p->bottom = true; if (p->tip) return; p->next->bottom = false; savetraverse(p->next->back); p->next->next->bottom = false; savetraverse(p->next->next->back); } /* savetraverse */ void savetree() { /* record in place where each species has to be added to reconstruct this tree */ long i, j; node2 *p; boolean done; if (noroot) reroot(treenode[outgrno - 1]); savetraverse(root); for (i = 0; i < (nonodes); i++) place[i] = 0; place[root->index - 1] = 1; for (i = 1; i <= (spp); i++) { p = treenode[i - 1]; while (place[p->index - 1] == 0) { place[p->index - 1] = i; while (!p->bottom) p = p->next; p = p->back; } if (i > 1) { place[i - 1] = place[p->index - 1]; j = place[p->index - 1]; done = false; while (!done) { place[p->index - 1] = spp + i - 1; while (!p->bottom) p = p->next; p = p->back; done = (p == NULL); if (!done) done = (place[p->index - 1] != j); } } } } /* savetree */ void mix_addtree(long *pos) { /* puts tree from ARRAY place in its proper position in ARRAY bestrees */ long i; for (i =nextree - 1; i >= (*pos); i--) memcpy(bestrees[i], bestrees[i - 1], spp*sizeof(long)); for (i = 0; i < (spp); i++) bestrees[(*pos) - 1][i] = place[i]; nextree++; } /* mix_addtree */ void mix_findtree(boolean *found, long *pos, long nextree, long *place, long **bestrees) { /* finds tree given by ARRAY place in ARRAY bestrees by binary search */ /* used by dnacomp, dnapars, dollop, mix, & protpars */ long i, lower, upper; boolean below, done; below = false; lower = 1; upper = nextree - 1; (*found) = false; while (!(*found) && lower <= upper) { (*pos) = (lower + upper) / 2; i = 3; done = false; while (!done) { done = (i > spp); if (!done) done = (place[i - 1] != bestrees[(*pos) - 1][i - 1]); if (!done) i++; } (*found) = (i > spp); below = (place[i - 1] < bestrees[(*pos )- 1][i - 1]); if (*found) break; if (below) upper = (*pos) - 1; else lower = (*pos) + 1; } if (!(*found) && !below) (*pos)++; } /* mix_findtree */ void tryadd(node2 *p, node2 **item, node2 **nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ long pos; boolean found; node2 *rute; add3(p, *item, *nufork, &root, treenode); evaluate(root); if (lastrearr) { if (like >= bstlike2) { rute = root->next->back; savetree(); reroot(rute); if (like > bstlike2) { bestlike = bstlike2 = like; pos = 1; nextree = 1; mix_addtree(&pos); } else { pos = 0; mix_findtree(&found, &pos, nextree, place, bestrees); if (!found) { if (nextree <= maxtrees) mix_addtree(&pos); } } } } if (like > bestyet) { bestyet = like; there = p; } re_move3(item, nufork, &root, treenode); } /* tryadd */ void addpreorder(node2 *p, node2 *item, node2 *nufork) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ if (p == NULL) return; tryadd(p, &item, &nufork); if (!p->tip) { addpreorder(p->next->back, item, nufork); addpreorder(p->next->next->back, item, nufork); } } /* addpreorder */ void tryrearr(node2 *p, node2 **r, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success := TRUE and keeps the new tree. otherwise, restores the old tree */ node2 *frombelow, *whereto, *forknode; double oldlike; if (p->back == NULL) return; forknode = treenode[p->back->index - 1]; if (forknode->back == NULL) return; oldlike = bestyet; if (p->back->next->next == forknode) frombelow = forknode->next->next->back; else frombelow = forknode->next->back; whereto = treenode[forknode->back->index - 1]; re_move3(&p, &forknode, &root, treenode); add3(whereto, p, forknode, &root, treenode); evaluate(*r); if ( like - oldlike > LIKE_EPSILON ) { *success = true; bestyet = like; } else { re_move3(&p, &forknode, &root, treenode); add3(frombelow, p, forknode, &root, treenode); } } /* tryrearr */ void repreorder(node2 *p, node2 **r, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ if (p == NULL) return; tryrearr(p, r, success); if (!p->tip) { repreorder(p->next->back, r,success); repreorder(p->next->next->back, r,success); } } /* repreorder */ void rearrange(node2 **r) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ boolean success=true; while (success) { success = false; repreorder(*r,r,&success); } } /* rearrange */ void mix_addelement(node2 **p, long *nextnode, long *lparens, boolean *names, char** treestr) { /* recursive procedure adds nodes to user-defined tree */ node2 *q; long i, n; boolean found; Char str[nmlngth]; sgetch(&ch, lparens, treestr); if (ch == '(' ) { if ((*lparens) >= spp) { printf("\n\nERROR IN USER TREE: Too many left parentheses\n\n"); embExitBad(); } (*nextnode)++; q = treenode[(*nextnode) - 1]; mix_addelement(&q->next->back, nextnode, lparens, names, treestr); q->next->back->back = q->next; do { ch = *(*treestr)++; } while (ch && ch != ','); mix_addelement(&q->next->next->back, nextnode, lparens, names, treestr); q->next->next->back->back = q->next->next; do { ch = *(*treestr)++; } while (ch && ch != ')'); *p = q; return; } for (i = 0; i < nmlngth; i++) str[i] = ' '; n = 1; do { if (ch == '_') ch = ' '; str[n - 1] =ch; ch = *(*treestr)++ ; n++; } while (ch != ',' && ch != ')' && ch != ':' && n <= nmlngth); n = 1; do { found = true; for (i = 0; i < nmlngth; i++) found = (found && ((str[i] == nayme[n - 1][i]) || ((nayme[n - 1][i] == '_') && (str[i] == ' ')))); if (found) { if (names[n - 1] == false) { *p = treenode[n - 1]; names[n - 1] = true; } else { printf("\n\nERROR IN USER TREE: Duplicate name found: "); for (i = 0; i < nmlngth; i++) putchar(nayme[n - 1][i]); printf("\n\n"); embExitBad(); } } else n++; } while (!(n > spp || found )); if (n <= spp) return; printf("CANNOT FIND SPECIES: "); for (i = 0; i < nmlngth; i++) putchar(str[i]); putchar('\n'); } /* mix_addelement */ void mix_treeread() { /* read in user-defined tree and set it up */ long nextnode, lparens, i; boolean *names; char* treestr; root = treenode[spp]; nextnode = spp; root->back = NULL; names = (boolean *)Malloc(spp*sizeof(boolean)); for (i = 0; i < (spp); i++) names[i] = false; lparens = 0; treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); mix_addelement(&root, &nextnode, &lparens, names, &treestr); if (ch == '[') { do ch = *treestr++; while (ch != ']'); ch = *treestr++; } do { ch = *treestr++; } while (ch && ch != ';'); if (progress) printf("."); free(names); } /* mix_treeread */ void describe() { /* prints ancestors, steps and table of numbers of steps in each character */ if (treeprint) fprintf(outfile, "\nrequires a total of %10.3f\n", -like); putc('\n', outfile); if (stepbox) writesteps(weights, numsteps); if (questions && (!noroot || didreroot)) guesstates(guess); if (ancseq) { hypstates(fullset, full, noroot, didreroot, root, wagner, zeroanc, oneanc, treenode, guess, garbage); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout2(root, &col, root); } } /* describe */ void maketree() { /* constructs a binary tree from the pointers in treenode. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j; double gotlike; node2 *item, *nufork, *dummy; fullset = (1L << (bits + 1)) - (1L << 1); for (i=0 ; i gotlike); } } if (progress) putchar('\n'); for (i = spp - 1; i >= 1; i--) re_move3(&treenode[i], &dummy, &root, treenode); if (jumb == njumble) { if (treeprint) { putc('\n', outfile); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n",(long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i <= (nextree - 2); i++) { root = treenode[0]; add3(treenode[0], treenode[1], treenode[spp], &root, treenode); for (j = 3; j <= (spp); j++) add3(treenode[bestrees[i][j - 1] - 1], treenode[j - 1], treenode[spp + j - 2], &root, treenode); if (noroot) reroot(treenode[outgrno - 1]); didreroot = (outgropt && noroot); evaluate(root); printree(treeprint, noroot, didreroot, root); describe(); for (j = 1; j < (spp); j++) re_move3(&treenode[j], &dummy, &root, treenode); } } } else { if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } which = 1; if (progress) printf(" "); while (which <= numtrees ) { mix_treeread(); didreroot = (outgropt && noroot); if (noroot) reroot(treenode[outgrno - 1]); evaluate(root); printree(treeprint, noroot, didreroot, root); describe(); which++; } if (progress) printf("\n"); FClose(intree); fprintf(outfile, "\n\n"); if (numtrees > 2 && chars > 1 ) { if (progress) printf(" sampling for SH test\n"); standev(numtrees, minwhich, minsteps, nsteps, fsteps, seed); } } if (jumb == njumble) { if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTrees also written onto file \"%s\"\n", outtreename); putchar('\n'); } } if (ancseq) freegarbage(&garbage); } /* maketree */ int main(int argc, Char *argv[]) { /* Mixed parsimony by uphill search */ #ifdef MAC argc = 1; /* macsetup("Mix",""); */ argv[0] = "Mix"; #endif init(argc, argv); emboss_getoptions("fmix",argc,argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; garbage = NULL; bits = 8*sizeof(long) - 1; doinit(); for (ith = 1; ith <= msets; ith++) { if(firstset){ if (allsokal && !mixture) fprintf(outfile, "Camin-Sokal parsimony method\n\n"); if (allwagner && !mixture) fprintf(outfile, "Wagner parsimony method\n\n"); if (mixture) fprintf(outfile, "Mixture of Wagner and Camin-Sokal parsimony methods\n\n"); } doinput(); if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } if (justwts){ if(firstset && mixture && (printdata || stepbox || ancseq)) printmixture(outfile, wagner); fprintf(outfile, "Weights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } else if (mixture && (printdata || stepbox || ancseq)) printmixture(outfile, wagner); if (printdata){ if (weights || justwts) printweights(outfile, 0, chars, weight, "Characters"); if (ancvar) printancestors(outfile, anczero, ancone); } if (ith == 1) firstset = false; for (jumb = 1; jumb <= njumble; jumb++) maketree(); } free(place); free(steps); free(zeroanc); free(oneanc); free(fulzeroanc); free(empzeroanc); FClose(outfile); FClose(infile); FClose(outtree); #ifdef MAC fixmacfile(outtreename); fixmacfile(outfilename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Mixed parsimony by uphill search */ PHYLIPNEW-3.69.650/src/dolmove.c0000664000175000017500000011135311616234204012671 00000000000000#include "phylip.h" #include "moves.h" #include "disc.h" #include "dollo.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define overr 4 #define which 1 AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylofact = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees = NULL; typedef enum { horiz, vert, up, overt, upcorner, downcorner, onne, zerro, question, polym } chartype; typedef enum { rearr, flipp, reroott, none } rearrtype; typedef enum { arb, use, spec } howtree; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void inputoptions(void); void allocrest(void); void doinput(void); void configure(void); void prefix(chartype); void postfix(chartype); void makechar(chartype); void dolmove_correct(node *); void dolmove_count(node *); void preorder(node *); void evaluate(node *); void reroot(node *); void dolmove_hyptrav(node *); void dolmove_hypstates(void); void grwrite(chartype, long, long *); void dolmove_drawline(long); void dolmove_printree(void); void arbitree(void); void yourtree(void); void initdolmovenode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void buildtree(void); void rearrange(void); void tryadd(node *, node **, node **, double *); void addpreorder(node *, node *, node *, double *); void try(void); void undo(void); void treewrite(boolean); void clade(void); void flip(void); void changeoutgroup(void); void redisplay(void); void treeconstruct(void); /* function prototypes */ #endif Char infilename[FNMLNGTH],intreename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outtreename; AjPFile embossouttree; node *root; long outgrno, col, screenlines, screenwidth, scrollinc,treelines, leftedge,topedge,vmargin,hscroll,vscroll,farthest; /* outgrno indicates outgroup */ boolean weights, thresh, ancvar, questions, dollo, factors, waswritten; boolean *ancone, *anczero, *ancone0, *anczero0; Char *factor; pointptr treenode; /* pointers to all nodes in tree */ double threshold; double *threshwt; unsigned char cha[10]; boolean reversed[10]; boolean graphic[10]; howtree how; char *progname; char ch; /* Variables for treeread */ boolean usertree, goteof, firsttree, haslengths; pointarray nodep; node *grbg; long *zeros; /* Local variables for treeconstruct, propagated globally for c version: */ long dispchar, dispword, dispbit, atwhat, what, fromwhere, towhere, oldoutgrno, compatible; double like, bestyet, gotlike; Char *guess; boolean display, newtree, changed, subtree, written, oldwritten, restoring, wasleft, oldleft, earlytree; boolean *in_tree; steptr numsteps; long fullset; bitptr zeroanc, oneanc; node *nuroot; rearrtype lastop; steptr numsone, numszero; boolean *names; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr method = NULL; AjPStr initialtree = NULL; how = arb; usertree = false; goteof = false; thresh = false; threshold = spp; weights = false; ancvar = false; factors = false; dollo = true; screenlines = 24; scrollinc = 20; screenwidth = 80; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; initialtree = ajAcdGetListSingle("initialtree"); if(ajStrMatchC(initialtree, "a")) how = arb; if(ajStrMatchC(initialtree, "u")) how = use; if(ajStrMatchC(initialtree, "s")) { how = spec; phylotrees = ajAcdGetTree("intreefile"); usertree = true; } phylofact = ajAcdGetProperties("factorfile"); if(phylofact) factors = true; method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "d")) dollo = true; else dollo = false; thresh = ajAcdGetToggle("dothreshold"); if(thresh) threshold = ajAcdGetFloat("threshold"); screenwidth = ajAcdGetInt("screenwidth"); screenlines = ajAcdGetInt("screenlines"); if (scrollinc < screenwidth / 2.0) hscroll = scrollinc; else hscroll = screenwidth / 2; if (scrollinc < screenlines / 2.0) vscroll = scrollinc; else vscroll = screenlines / 2; printf("\nInteractive Dollo or polymorphism parsimony, version %s\n\n",VERSION); embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } /* emboss_getoptions */ void inputoptions() { /* input the information on the options */ long i; for (i = 0; i < (chars); i++) weight[i] = 1; if (ancvar) inputancestorsstr(phyloanc->Str[0],anczero0, ancone0); if (factors) inputfactorsstr(phylofact->Str[0], chars, factor, &factors); if (weights) inputweightsstr(phyloweights->Str[0], chars, weight, &weights); putchar('\n'); if (weights) printweights(stdout, 0, chars, weight, "Characters"); if (factors) printfactors(stdout, chars, factor, ""); for (i = 0; i < (chars); i++) { if (!ancvar) { anczero[i] = true; ancone[i] = false; } else { anczero[i] = anczero0[i]; ancone[i] = ancone0[i]; } } if (ancvar) printancestors(stdout, anczero, ancone); if (!thresh) threshold = spp; questions = false; for (i = 0; i < (chars); i++) { questions = (questions || (ancone[i] && anczero[i])); threshwt[i] = threshold * weight[i]; } } /* inputoptions */ void allocrest() { nayme = (naym *)Malloc(spp*sizeof(naym)); in_tree = (boolean *)Malloc(nonodes*sizeof(boolean)); extras = (steptr)Malloc(chars*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); numszero = (steptr)Malloc(chars*sizeof(long)); numsone = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); factor = (Char *)Malloc(chars*sizeof(Char)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); zeroanc = (bitptr)Malloc(words*sizeof(long)); oneanc = (bitptr)Malloc(words*sizeof(long)); } /* allocrest */ void doinput() { /* reads the input data */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); words = chars / bits + 1; printf("%2ld species, %3ld characters\n", spp, chars); alloctree(&treenode); setuptree(treenode); allocrest(); inputoptions(); disc_inputdata(phylostates[0], treenode, dollo, false, stdout); } /* doinput */ void configure() { /* configure to machine -- set up special characters */ chartype a; for (a = horiz; (long)a <= (long)polym; a = (chartype)((long)a + 1)) reversed[(long)a] = false; for (a = horiz; (long)a <= (long)polym; a = (chartype)((long)a + 1)) graphic[(long)a] = false; if (ibmpc) { cha[(long)horiz] = 205; graphic[(long)horiz] = true; cha[(long)vert] = 186; graphic[(long)vert] = true; cha[(long)up] = 186; graphic[(long)up] = true; cha[(long)overt] = 205; graphic[(long)overt] = true; cha[(long)onne] = 219; reversed[(long)onne] = true; cha[(long)zerro] = 176; graphic[(long)zerro] = true; cha[(long)question] = 178; /* or try CHR(177) */ cha[(long)polym] = '\001'; reversed[(long)polym] = true; cha[(long)upcorner] = 200; graphic[(long)upcorner] = true; cha[(long)downcorner] = 201; graphic[(long)downcorner] = true; graphic[(long)question] = true; return; } if (ansi) { cha[(long)onne] = ' '; reversed[(long)onne] = true; cha[(long)horiz] = cha[(long)onne]; reversed[(long)horiz] = true; cha[(long)vert] = cha[(long)onne]; reversed[(long)vert] = true; cha[(long)up] = 'x'; graphic[(long)up] = true; cha[(long)overt] = 'q'; graphic[(long)overt] = true; cha[(long)zerro] = 'a'; graphic[(long)zerro] = true; reversed[(long)zerro] = true; cha[(long)question] = '?'; cha[(long)question] = '?'; reversed[(long)question] = true; cha[(long)polym] = '%'; reversed[(long)polym] = true; cha[(long)upcorner] = 'm'; graphic[(long)upcorner] = true; cha[(long)downcorner] = 'l'; graphic[(long)downcorner] = true; return; } cha[(long)horiz] = '='; cha[(long)vert] = ' '; cha[(long)up] = '!'; cha[(long)overt] = '-'; cha[(long)onne] = '*'; cha[(long)zerro] = '='; cha[(long)question] = '.'; cha[(long)polym] = '%'; cha[(long)upcorner] = '`'; cha[(long)downcorner] = ','; } /* configure */ void prefix(chartype a) { /* give prefix appropriate for this character */ if (reversed[(long)a]) prereverse(ansi); if (graphic[(long)a]) pregraph(ansi); } /* prefix */ void postfix(chartype a) { /* give postfix appropriate for this character */ if (reversed[(long)a]) postreverse(ansi); if (graphic[(long)a]) postgraph(ansi); } /* postfix */ void makechar(chartype a) { /* print out a character with appropriate prefix or postfix */ prefix(a); putchar(cha[(long)a]); postfix(a); } /* makechar */ void dolmove_correct(node *p) { /* get final states for intermediate nodes */ long i; long z0, z1, s0, s1, temp; if (p->tip) return; for (i = 0; i < (words); i++) { if (p->back == NULL) { s0 = zeroanc[i]; s1 = oneanc[i]; } else { s0 = treenode[p->back->index - 1]->statezero[i]; s1 = treenode[p->back->index - 1]->stateone[i]; } z0 = (s0 & p->statezero[i]) | (p->next->back->statezero[i] & p->next->next->back->statezero[i]); z1 = (s1 & p->stateone[i]) | (p->next->back->stateone[i] & p->next->next->back->stateone[i]); if (dollo) { temp = z0 & (~(zeroanc[i] & z1)); z1 &= ~(oneanc[i] & z0); z0 = temp; } temp = fullset & (~z0) & (~z1); p->statezero[i] = z0 | (temp & s0 & (~s1)); p->stateone[i] = z1 | (temp & s1 & (~s0)); } } /* dolmove_correct */ void dolmove_count(node *p) { /* counts the number of steps in a fork of the tree. The program spends much of its time in this PROCEDURE */ long i, j, l; bitptr steps; steps = (bitptr)Malloc(words*sizeof(long)); if (dollo) { for (i = 0; i < (words); i++) steps[i] = (treenode[p->back->index - 1]->stateone[i] & p->statezero[i] & zeroanc[i]) | (treenode[p->back->index - 1]->statezero[i] & p->stateone[i] & oneanc[i]); } else { for (i = 0; i < (words); i++) steps[i] = treenode[p->back->index - 1]->stateone[i] & treenode[p->back->index - 1]->statezero[i] & p->stateone[i] & p->statezero[i]; } j = 1; l = 0; for (i = 0; i < (chars); i++) { l++; if (l > bits) { l = 1; j++; } if (((1L << l) & steps[j - 1]) != 0) { if (((1L << l) & zeroanc[j - 1]) != 0) numszero[i] += weight[i]; else numsone[i] += weight[i]; } } free(steps); } /* dolmove_count */ void preorder(node *p) { /* go back up tree setting up and counting interior node states */ if (!p->tip) { dolmove_correct(p); preorder(p->next->back); preorder(p->next->next->back); } if (p->back != NULL) dolmove_count(p); } /* preorder */ void evaluate(node *r) { /* Determines the number of losses or polymorphisms needed for a tree. This is the minimum number needed to evolve chars on this tree */ long i, stepnum, smaller; double sum; boolean nextcompat, thiscompat, done; sum = 0.0; for (i = 0; i < (chars); i++) { numszero[i] = 0; numsone[i] = 0; } for (i = 0; i < (words); i++) { zeroanc[i] = fullset; oneanc[i] = 0; } compatible = 0; nextcompat = true; postorder(r); preorder(r); for (i = 0; i < (words); i++) { zeroanc[i] = 0; oneanc[i] = fullset; } postorder(r); preorder(r); for (i = 0; i < (chars); i++) { smaller = spp * weight[i]; numsteps[i] = smaller; if (anczero[i]) { numsteps[i] = numszero[i]; smaller = numszero[i]; } if (ancone[i] && numsone[i] < smaller) numsteps[i] = numsone[i]; stepnum = numsteps[i] + extras[i]; if (stepnum <= threshwt[i]) sum += stepnum; else sum += threshwt[i]; thiscompat = (stepnum <= weight[i]); if (factors) { done = (i + 1 == chars); if (!done) done = (factor[i + 1] != factor[i]); nextcompat = (nextcompat && thiscompat); if (done) { if (nextcompat) compatible += weight[i]; nextcompat = true; } } else if (thiscompat) compatible += weight[i]; guess[i] = '?'; if (!ancone[i] || (anczero[i] && numszero[i] < numsone[i])) guess[i] = '0'; else if (!anczero[i] || (ancone[i] && numsone[i] < numszero[i])) guess[i] = '1'; } like = -sum; } /* evaluate */ void reroot(node *outgroup) { /* reorients tree, putting outgroup in desired position. */ node *p, *q, *newbottom, *oldbottom; boolean onleft; if (outgroup->back->index == root->index) return; newbottom = outgroup->back; p = treenode[newbottom->index - 1]->back; while (p->index != root->index) { oldbottom = treenode[p->index - 1]; treenode[p->index - 1] = p; p = oldbottom->back; } onleft = (p == root->next); if (restoring) if (!onleft && wasleft){ p = root->next->next; q = root->next; } else { p = root->next; q = root->next->next; } else { if (onleft) oldoutgrno = root->next->next->back->index; else oldoutgrno = root->next->back->index; wasleft = onleft; p = root->next; q = root->next->next; } p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; if (restoring) { if (!onleft && wasleft) { outgroup->back->back = root->next; outgroup->back = root->next->next; } else { outgroup->back->back = root->next->next; outgroup->back = root->next; } } else { outgroup->back->back = root->next->next; outgroup->back = root->next; } treenode[newbottom->index - 1] = newbottom; } /* reroot */ void dolmove_hyptrav(node *r) { /* compute states at interior nodes for one character */ if (!r->tip) dolmove_correct(r); if (((1L << dispbit) & r->stateone[dispword - 1]) != 0) { if (((1L << dispbit) & r->statezero[dispword - 1]) != 0) { if (dollo) r->state = '?'; else r->state = 'P'; } else r->state = '1'; } else { if (((1L << dispbit) & r->statezero[dispword - 1]) != 0) r->state = '0'; else r->state = '?'; } if (!r->tip) { dolmove_hyptrav(r->next->back); dolmove_hyptrav(r->next->next->back); } } /* dolmove_hyptrav */ void dolmove_hypstates() { /* fill in and describe states at interior nodes */ long i, j, k; for (i = 0; i < (words); i++) { zeroanc[i] = 0; oneanc[i] = 0; } for (i = 0; i < (chars); i++) { j = i / bits + 1; k = i % bits + 1; if (guess[i] == '0') zeroanc[j - 1] = ((long)zeroanc[j - 1]) | (1L << k); if (guess[i] == '1') oneanc[j - 1] = ((long)oneanc[j - 1]) | (1L << k); } filltrav(root); dolmove_hyptrav(root); } /* dolmove_hypstates */ void grwrite(chartype c, long num, long *pos) { int i; prefix(c); for (i = 1; i <= num; i++) { if ((*pos) >= leftedge && (*pos) - leftedge + 1 < screenwidth) putchar(cha[(long)c]); (*pos)++; } postfix(c); } /* grwrite */ void dolmove_drawline(long i) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first =NULL, *last =NULL; long n, j, pos; boolean extra, done; Char s, cc; chartype c, d; pos = 1; p = nuroot; q = nuroot; extra = false; if (i == (long)p->ycoord && (p == root || subtree)) { c = overt; if (display) { if (p == root) { if(!dispchar) cc = guess[0]; else cc = guess[dispchar - 1]; } else cc = p->state; switch (cc) { case '1': c = onne; break; case '0': c = zerro; break; case '?': c = question; break; case 'P': c = polym; break; } } if ((subtree)) stwrite("Subtree:", 8, &pos, leftedge, screenwidth); if (p->index >= 100) nnwrite(p->index, 3, &pos, leftedge, screenwidth); else if (p->index >= 10) { grwrite(c, 1, &pos); nnwrite(p->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(p->index, 1, &pos, leftedge, screenwidth); } extra = true; } else { if (subtree) stwrite(" ", 10, &pos, leftedge, screenwidth); else stwrite(" ", 2, &pos, leftedge, screenwidth); } do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)p->xcoord - (long)q->xcoord; if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if ((long)q->ycoord > (long)p->ycoord) d = upcorner; else d = downcorner; c = overt; s = q->state; if (s == 'P' && p->state != 'P') s = p->state; if (display) { switch (s) { case '1': c = onne; break; case '0': c = zerro; break; case '?': c = question; break; case 'P': c = polym; break; } d = c; } if (n > 1) { grwrite(d, 1, &pos); grwrite(c, n - 3, &pos); } if (q->index >= 100) nnwrite(q->index, 3, &pos, leftedge, screenwidth); else if (q->index >= 10) { grwrite(c, 1, &pos); nnwrite(q->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(q->index, 1, &pos, leftedge, screenwidth); } extra = true; } else if (!q->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { c = up; if (i < (long)p->ycoord) s = p->next->back->state; else s = p->next->next->back->state; if (s == 'P' && p->state != 'P') s = p->state; if (display) { switch (s) { case '1': c = onne; break; case '0': c = zerro; break; case '?': c = question; break; case 'P': c = polym; break; } } grwrite(c, 1, &pos); chwrite(' ', n - 1, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); if (p != q) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { n = 0; for (j = 1; j <= nmlngth; j++) { if (nayme[p->index - 1][j - 1] != '\0') n = j; } chwrite(':', 1, &pos, leftedge, screenwidth); for (j = 0; j < n; j++) chwrite(nayme[p->index - 1][j], 1, &pos, leftedge, screenwidth); } putchar('\n'); } /* dolmove_drawline */ void dolmove_printree() { /* prints out diagram of the tree */ long tipy; long i, dow; if (!subtree) nuroot = root; if (changed || newtree) evaluate(root); if (display) dolmove_hypstates(); #ifdef WIN32 if(ibmpc || ansi){ phyClearScreen(); } else { printf("\n"); } #else if (ansi || ibmpc) printf("\033[2J\033[H"); else putchar('\n'); #endif tipy = 1; dow = down; if (spp * dow > screenlines && !subtree) dow--; printf("(unrooted)"); if (display) { printf(" "); makechar(onne); printf(":1 "); makechar(question); printf(":? "); makechar(zerro); printf(":0 "); makechar(polym); printf(":0/1"); } else printf(" "); if (!earlytree) { printf("%10.1f Steps", -like); } if (display) printf(" SITE%4ld", dispchar); else printf(" "); if (!earlytree) { printf(" %3ld chars compatible\n", compatible); } printf("%-20s",dollo ? "Dollo" : "Polymorphism"); if (changed && !earlytree) { if (-like < bestyet) { printf(" BEST YET!"); bestyet = -like; } else if (fabs(-like - bestyet) < 0.000001) printf(" (as good as best)"); else { if (-like < gotlike) printf(" better"); else if (-like > gotlike) printf(" worse!"); } } printf("\n"); farthest = 0; coordinates(nuroot, &tipy, 1.5, &farthest); vmargin = 5; treelines = tipy - dow; if (topedge != 1){ printf("** %ld lines above screen **\n", topedge - 1); vmargin++;} if ((treelines - topedge + 1) > (screenlines - vmargin)) vmargin++; for (i = 1; i <= treelines; i++) { if (i >= topedge && i < topedge + screenlines - vmargin) dolmove_drawline(i); } if ((treelines - topedge + 1) > (screenlines - vmargin)) printf("** %ld lines below screen **\n", treelines - (topedge - 1 + screenlines - vmargin)); if (treelines - topedge + vmargin + 1 < screenlines) putchar('\n'); gotlike = -like; changed = false; } /* dolmove_printree */ void arbitree() { long i; root = treenode[0]; add2(treenode[0], treenode[1], treenode[spp], &root, restoring, wasleft, treenode); for (i = 3; i <= (spp); i++) add2(treenode[spp + i - 3], treenode[i - 1], treenode[spp + i - 2], &root, restoring, wasleft, treenode); for (i = 0; i < (nonodes); i++) in_tree[i] = true; } /* arbitree */ void yourtree() { long i, j; boolean ok; root = treenode[0]; add2(treenode[0], treenode[1], treenode[spp], &root, restoring, wasleft, treenode); i = 2; do { i++; dolmove_printree(); printf("Add species%3ld: ", i); for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); do { printf("\nbefore node (type number): "); inpnum(&j, &ok); ok = (ok && ((j >= 1 && j < i) || (j > spp && j < spp + i - 1))); if (!ok) printf("Impossible number. Please try again:\n"); } while (!ok); add2(treenode[j - 1], treenode[i - 1], treenode[spp + i - 2], &root, restoring, wasleft, treenode); } while (i != spp); for (i = 0; i < (nonodes); i++) in_tree[i] = true; } /* yourtree */ void initdolmovenode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ /* LM 7/27 I added this function and the commented lines around */ /* treeread() to get the program running, but all 4 move programs*/ /* are improperly integrated into the v4.0 support files. As is */ /* this is a patchwork function */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnutreenode(grbg, p, nodei, chars, zeros); treenode[nodei - 1] = *p; break; case nonbottom: gnutreenode(grbg, p, nodei, chars, zeros); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); /* process lengths and discard */ default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/ break; /*length should never occur */ } } /* initdolmovenode */ void buildtree() { long i, j, nextnode; node *p; char* treestr; changed = false; newtree = false; switch (how) { case arb: arbitree(); break; case use: names = (boolean *)Malloc(spp*sizeof(boolean)); firsttree = true; /**/ nodep = NULL; /**/ nextnode = 0; /**/ haslengths = 0; /**/ zeros = (long *)Malloc(chars*sizeof(long)); /**/ for (i = 0; i < chars; i++) /**/ zeros[i] = 0; /**/ treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdolmovenode,false,nonodes); for (i = spp; i < (nonodes); i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->stateone = (bitptr)Malloc(words*sizeof(long)); p->statezero = (bitptr)Malloc(words*sizeof(long)); p = p->next; } } /* debug: see comment at initdolmovenode() */ /*treeread(which, ch, &root, treenode, names);*/ for (i = 0; i < (spp); i++) in_tree[i] = names[i]; free(names); FClose(intree); break; case spec: yourtree(); break; } outgrno = root->next->back->index; if (in_tree[outgrno - 1]) reroot(treenode[outgrno - 1]); } /* buildtree */ void rearrange() { long i, j; boolean ok1, ok2; node *p, *q; printf("Remove everything to the right of which node? "); inpnum(&i, &ok1); ok1 = (ok1 && i >= 1 && i < spp * 2 && i != root->index); if (ok1) { printf("Add before which node? "); inpnum(&j, &ok2); ok2 = (ok2 && j >= 1 && j < spp * 2); if (ok2) { ok2 = (treenode[j - 1] != treenode[treenode[i - 1]->back->index - 1]); p = treenode[j - 1]; while (p != root) { ok2 = (ok2 && p != treenode[i - 1]); p = treenode[p->back->index - 1]; } if (ok1 && ok2) { what = i; q = treenode[treenode[i - 1]->back->index - 1]; if (q->next->back->index == i) fromwhere = q->next->next->back->index; else fromwhere = q->next->back->index; towhere = j; re_move2(&treenode[i - 1], &q, &root, &wasleft, treenode); add2(treenode[j - 1], treenode[i - 1], q, &root, restoring, wasleft, treenode); } lastop = rearr; } } changed = (ok1 && ok2); dolmove_printree(); if (!(ok1 && ok2)) printf("Not a possible rearrangement. Try again: \n"); else { oldwritten = written; written = false; } } /* rearrange */ void tryadd(node *p, node **item, node **nufork, double *place) { /* temporarily adds one fork and one tip to the tree. Records scores in ARRAY place */ add2(p, *item, *nufork, &root, restoring, wasleft, treenode); evaluate(root); place[p->index - 1] = -like; re_move2(item, nufork, &root, &wasleft, treenode); } /* tryadd */ void addpreorder(node *p, node *item_, node *nufork_, double *place) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ node *item, *nufork; item = item_; nufork = nufork_; if (p == NULL) return; tryadd(p, &item,&nufork,place); if (!p->tip) { addpreorder(p->next->back, item,nufork,place); addpreorder(p->next->next->back,item,nufork,place); } } /* addpreorder */ void try() { /* Remove node, try it in all possible places */ double *place; long i, j, oldcompat; double current; node *q, *dummy, *rute; boolean tied, better, ok; printf("Try other positions for which node? "); inpnum(&i, &ok); if (!(ok && i >= 1 && i <= nonodes && i != root->index)) { printf("Not a possible choice! "); return; } printf("WAIT ...\n"); place = (double *)Malloc(nonodes*sizeof(double)); for (j = 0; j < (nonodes); j++) place[j] = -1.0; evaluate(root); current = -like; oldcompat = compatible; what = i; q = treenode[treenode[i - 1]->back->index - 1]; if (q->next->back->index == i) fromwhere = q->next->next->back->index; else fromwhere = q->next->back->index; rute = root; if (root->index == treenode[i - 1]->back->index) { if (treenode[treenode[i - 1]->back->index - 1]->next->back == treenode[i - 1]) rute = treenode[treenode[i - 1]->back->index - 1]->next->next->back; else rute = treenode[treenode[i - 1]->back->index - 1]->next->back; } re_move2(&treenode[i - 1], &dummy, &root, &wasleft, treenode); oldleft = wasleft; root = rute; addpreorder(root, treenode[i - 1], dummy, place); wasleft = oldleft; restoring = true; add2(treenode[fromwhere - 1], treenode[what - 1], dummy, &root, restoring, wasleft, treenode); like = -current; compatible = oldcompat; restoring = false; better = false; printf(" BETTER: "); for (j = 1; j <= (nonodes); j++) { if (place[j - 1] < current && place[j - 1] >= 0.0) { printf("%3ld:%6.2f", j, place[j - 1]); better = true; } } if (!better) printf(" NONE"); printf("\n TIED: "); tied = false; for (j = 1; j <= (nonodes); j++) { if (fabs(place[j - 1] - current) < 1.0e-6 && j != fromwhere) { if (j < 10) printf("%2ld", j); else printf("%3ld", j); tied = true; } } if (tied) printf(":%6.2f\n", current); else printf("NONE\n"); changed = true; free(place); } /* try */ void undo() { /* restore to tree before last rearrangement */ long temp; boolean btemp; node *q; switch (lastop) { case rearr: restoring = true; oldleft = wasleft; re_move2(&treenode[what - 1], &q, &root, &wasleft, treenode); btemp = wasleft; wasleft = oldleft; add2(treenode[fromwhere - 1], treenode[what - 1], q, &root, restoring, wasleft, treenode); wasleft = btemp; restoring = false; temp = fromwhere; fromwhere = towhere; towhere = temp; changed = true; break; case flipp: q = treenode[atwhat - 1]->next->back; treenode[atwhat - 1]->next->back = treenode[atwhat - 1]->next->next->back; treenode[atwhat - 1]->next->next->back = q; treenode[atwhat - 1]->next->back->back = treenode[atwhat - 1]->next; treenode[atwhat - 1]->next->next->back->back = treenode[atwhat - 1]->next->next; break; case reroott: restoring = true; temp = oldoutgrno; oldoutgrno = outgrno; outgrno = temp; reroot(treenode[outgrno - 1]); restoring = false; break; case none: /* blank case */ break; } dolmove_printree(); if (lastop == none) { printf("No operation to undo! "); return; } btemp = oldwritten; oldwritten = written; written = btemp; } /* undo */ void treewrite(boolean done) { /* write out tree to a file */ if (!done) dolmove_printree(); if (waswritten && ch == 'N') return; col = 0; treeout(root, 1, &col, root); printf("\nTree written to file \"%s\"\n\n", outtreename); waswritten = true; written = true; FClose(outtree); #ifdef MAC fixmacfile(outtreename); #endif } /* treewrite */ void clade() { /* pick a subtree and show only that on screen */ long i; boolean ok; printf("Select subtree rooted at which node (0 for whole tree)? "); inpnum(&i, &ok); ok = (ok && ((unsigned)i) <= ((unsigned)nonodes)); if (ok) { subtree = (i > 0); if (subtree) nuroot = treenode[i - 1]; else nuroot = root; } dolmove_printree(); if (!ok) printf("Not possible to use this node. "); } /* clade */ void flip() { /* flip at a node left-right */ long i; boolean ok; node *p; printf("Flip branches at which node? "); inpnum(&i, &ok); ok = (ok && i > spp && i <= nonodes); if (ok) { p = treenode[i - 1]->next->back; treenode[i - 1]->next->back = treenode[i - 1]->next->next->back; treenode[i - 1]->next->next->back = p; treenode[i - 1]->next->back->back = treenode[i - 1]->next; treenode[i - 1]->next->next->back->back = treenode[i - 1]->next->next; atwhat = i; lastop = flipp; } dolmove_printree(); if (ok) { oldwritten = written; written = false; return; } if (i >= 1 && i <= spp) printf("Can't flip there. "); else printf("No such node. "); } /* flip */ void changeoutgroup() { long i; boolean ok; oldoutgrno = outgrno; do { printf("Which node should be the new outgroup? "); inpnum(&i, &ok); ok = (ok && in_tree[i - 1] && i >= 1 && i <= nonodes && i != root->index); if (ok) outgrno = i; } while (!ok); if (in_tree[outgrno - 1]) reroot(treenode[outgrno - 1]); changed = true; lastop = reroott; dolmove_printree(); oldwritten = written; written = false; } /* changeoutgroup */ void redisplay() { boolean done; char input[100]; done = false; waswritten = false; do { fprintf(stderr, "NEXT? (R # + - S . T U W O F H J K L C ? X Q) "); fprintf(stderr, "(? for Help): "); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); ch = input[0]; uppercase(&ch); if (strchr("RSWH#.O?+TFX-UCQHJKL",ch) != NULL){ switch (ch) { case 'R': rearrange(); break; case '#': nextinc(&dispchar, &dispword, &dispbit, chars, bits, &display, numsteps, weight); dolmove_printree(); break; case '+': nextchar(&dispchar, &dispword, &dispbit, chars, bits, &display); dolmove_printree(); break; case '-': prevchar(&dispchar, &dispword, &dispbit, chars, bits, &display); dolmove_printree(); break; case 'S': show(&dispchar, &dispword, &dispbit, chars, bits, &display); dolmove_printree(); break; case '.': dolmove_printree(); break; case 'T': try(); break; case 'U': undo(); break; case 'W': treewrite(done); break; case 'O': changeoutgroup(); break; case 'F': flip(); break; case 'H': window(left, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dolmove_printree(); break; case 'J': window(downn, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dolmove_printree(); break; case 'K': window(upp, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dolmove_printree(); break; case 'L': window(right, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dolmove_printree(); break; case 'C': clade(); break; case '?': help("character"); dolmove_printree(); break; case 'X': done = true; break; case 'Q': done = true; break; } } } while (!done); if (!written) { do { fprintf(stderr,"Do you want to write out the tree to a file? (Y or N): "); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); ch = input[0]; } while (ch != 'Y' && ch != 'y' && ch != 'N' && ch != 'n'); } if (ch == 'Y' || ch == 'y') treewrite(done); } /* redisplay */ void treeconstruct() { /* constructs a binary tree from the pointers in treenode. */ restoring = false; subtree = false; display = false; dispchar = 0; fullset = (1L << (bits + 1)) - (1L << 1); guess = (Char *)Malloc(chars*sizeof(Char)); numsteps = (steptr)Malloc(chars*sizeof(long)); earlytree = true; buildtree(); waswritten = false; printf("\nComputing steps needed for compatibility in sites ...\n\n"); newtree = true; earlytree = false; dolmove_printree(); bestyet = -like; gotlike = -like; lastop = none; newtree = false; written = false; lastop = none; redisplay(); } /* treeconstruct */ int main(int argc, Char *argv[]) { /* Interactive Dollo/polymorphism parsimony */ /* reads in spp, chars, and the data. Then calls treeconstruct to construct the tree and query the user */ #ifdef MAC argc = 1; /* macsetup("Dolmove",""); */ argv[0] = "Dolmove"; #endif init(argc, argv); emboss_getoptions("fdolmove",argc,argv); progname = argv[0]; topedge = 1; leftedge = 1; ibmpc = IBMCRT; ansi = ANSICRT; root = NULL; bits = 8*sizeof(long) - 1; doinput(); configure(); treeconstruct(); if (waswritten) { FClose(outtree); #ifdef MAC fixmacfile(outtreename); #endif } FClose(infile); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Interactive Dollo/polymorphism parsimony */ PHYLIPNEW-3.69.650/src/dnamlk.c0000664000175000017500000016434511616234204012503 00000000000000/* version 3.6. (c) Copyright 1986-2007 by the University of Washington and by Joseph Felsenstein. Written by Joseph Felsenstein. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include #include #include "phylip.h" #include "seq.h" #include "mlclock.h" #include "printree.h" #define over 60 /* Maximum xcoord of tip nodes */ /* These are redefined from phylip.h */ /* Fractional accuracy to which node tymes are optimized */ #undef epsilon double epsilon = 1e-3; /* Number of (failed) passes over the tree before giving up */ #undef smoothings #define smoothings 4 #undef initialv #define initialv 0.3 typedef struct valrec { double rat, ratxi, ratxv, orig_zz, z1, y1, z1zz, z1yy, xiz1, xiy1xv; double *ww, *zz, *wwzz, *vvzz; } valrec; struct options { boolean auto_; boolean ctgry; long categs; long rcategs; boolean freqsfrom; boolean gama; boolean invar; boolean global; boolean hypstate; boolean jumble; long njumble; double lambda; double lambda1; boolean lengthsopt; boolean trout; double ttratio; boolean ttr; boolean usertree; boolean weights; boolean printdata; boolean dotdiff; boolean progress; boolean treeprint; boolean interleaved; }; typedef double contribarr[maxcategs]; valrec ***tbl; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ //void getoptions(void); static void emboss_getoptions(char *pgm, int argc, char *argv[]); static void initmemrates(void); static void allocrest(void); static void doinit(void); static void inputoptions(void); static void makeweights(void); static void getinput(void); static void inittable_for_usertree (char *); static void inittable(void); static void alloc_nvd(long, nuview_data *); static void free_nvd(nuview_data *); static boolean nuview(node *); static double dnamlk_evaluate(node *); static boolean update(node *); static boolean smooth(node *); static void restoradd(node *, node *, node *, double); static void dnamlk_add(node *, node *, node *); static void dnamlk_re_move(node **, node **, boolean); static void tryadd(node *, node **, node **); static void addpreorder(node *, node *, node *, boolean, boolean); static boolean tryrearr(node *); static boolean repreorder(node *); static void rearrange(node **); static void initdnamlnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); static boolean tymetrav(node *); static void reconstr(node *, long); static void rectrav(node *, long, long); static void summarize(FILE *fp); static void dnamlk_treeout(node *); static void init_tymes(node *p, double minlength); static void treevaluate(void); static void maketree(void); static void reallocsites(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], intreename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; double *rrate; long sites, weightsum, categs, datasets, ith, njumble, jumb, numtrees, shimotrees; /* sites = number of sites in actual sequences numtrees = number of user-defined trees */ long inseed, inseed0, mx, mx0, mx1; boolean freqsfrom, global, global2=0, jumble, trout, usertree, weights, rctgry, ctgry, ttr, auto_, progress, mulsets, firstset, hypstate, smoothit, polishing, justwts, gama, invar; boolean lengthsopt = false; /* Use lengths in user tree option */ boolean lngths = false; /* Actually use lengths (depends on each input tree) */ tree curtree, bestree, bestree2; node *qwhere, *grbg; double *tymes; double xi, xv, ttratio, ttratio0, freqa, freqc, freqg, freqt, freqr, freqy, freqar, freqcy, freqgr, freqty, fracchange, sumrates, cv, alpha, lambda, lambda1, invarfrac; long *enterorder; steptr aliasweight; double *rate; longer seed; double *probcat; long iprobcat; contribarr *contribution; char *progname; long rcategs; long **mp; char basechar[16] = "acmgrsvtwyhkdbn"; /* Local variables for maketree, propagated globally for C version: */ long k, maxwhich, col; double like, bestyet, maxlogl; boolean lastsp; boolean smoothed; /* set true before each smoothing run, and set false each time a branch cannot be completely optimized. */ double *l0gl; double expon1i[maxcategs], expon1v[maxcategs], expon2i[maxcategs], expon2v[maxcategs]; node *there; double **l0gf; Char ch, ch2; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr gammamethod = NULL; ajint i; AjPFloat basefreq; AjPFloat hmmrates; AjPFloat hmmprob; AjPFloat arrayval; double probsum=0.0; auto_ = false; ctgry = false; rctgry = false; categs = 1; rcategs = 1; freqsfrom = true; gama = false; invar = false; global = false; hypstate = false; jumble = false; njumble = 1; lambda = 1.0; lambda1 = 0.0; lngths = false; trout = true; ttratio = 2.0; ttr = false; usertree = false; weights = false; printdata = false; progress = true; treeprint = true; interleaved = true; datasets = 1; mulsets = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; lngths = ajAcdGetBoolean("lengths"); } numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; datasets = numseqs; } else if (numwts > 1) { mulsets = true; datasets = numwts; justwts = true; } categs = ajAcdGetInt("ncategories"); if (categs > 1) { ctgry = true; rate = (double *) Malloc(categs * sizeof(double)); arrayval = ajAcdGetArray("rate"); emboss_initcategs(arrayval, categs, rate); } else{ rate = (double *) Malloc(categs*sizeof(double)); rate[0] = 1.0; } phyloratecat = ajAcdGetProperties("categories"); gammamethod = ajAcdGetListSingle("gammatype"); if(ajStrMatchC(gammamethod, "n")) { rrate = (double *) Malloc(rcategs*sizeof(double)); probcat = (double *) Malloc(rcategs*sizeof(double)); iprobcat = rcategs; rrate[0] = 1.0; probcat[0] = 1.0; } else { rctgry = true; auto_ = ajAcdGetBoolean("adjsite"); if(auto_) { lambda = ajAcdGetFloat("lambda"); lambda = 1 / lambda; lambda1 = 1.0 - lambda; } } if(ajStrMatchC(gammamethod, "g")) { gama = true; rcategs = ajAcdGetInt("ngammacat"); cv = ajAcdGetFloat("gammacoefficient"); alpha = 1.0 / (cv*cv); initmemrates(); initgammacat(rcategs, alpha, rrate, probcat); } else if(ajStrMatchC(gammamethod, "i")) { invar = true; rcategs = ajAcdGetInt("ninvarcat"); cv = ajAcdGetFloat("invarcoefficient"); alpha = 1.0 / (cv*cv); invarfrac = ajAcdGetFloat("invarfrac"); initmemrates(); initgammacat(rcategs-1, alpha, rrate, probcat); for (i=0; i < rcategs-1 ; i++) probcat[i] = probcat[i]*(1.0-invarfrac); probcat[rcategs-1] = invarfrac; rrate[rcategs-1] = 0.0; } else if(ajStrMatchC(gammamethod, "h")) { rcategs = ajAcdGetInt("nhmmcategories"); initmemrates(); hmmrates = ajAcdGetArray("hmmrates"); emboss_initcategs(hmmrates, rcategs,rrate); hmmprob = ajAcdGetArray("hmmprobabilities"); for (i=0; i < rcategs; i++){ probcat[i] = ajFloatGet(hmmprob, i); probsum += probcat[i]; } } ttratio = ajAcdGetFloat("ttratio"); if(!usertree) { global = ajAcdGetBoolean("global"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); hypstate = ajAcdGetBoolean("hypstate"); freqsfrom = ajAcdGetToggle("freqsfrom"); if(!freqsfrom) { basefreq = ajAcdGetArray("basefreq"); freqa = ajFloatGet(basefreq, 0); freqc = ajFloatGet(basefreq, 1); freqg = ajFloatGet(basefreq, 2); freqt = ajFloatGet(basefreq, 3); } embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nNucleic acid sequence Maximum Likelihood"); fprintf(outfile, " method, version %s\n\n",VERSION); /* printf("\n mulsets: %s",(mulsets ? "true" : "false")); printf("\n datasets : %ld",(datasets)); printf("\n rctgry : %s",(rctgry ? "true" : "false")); printf("\n gama : %s",(gama ? "true" : "false")); printf("\n invar : %s",(invar ? "true" : "false")); printf("\n\n ctgry: %s",(ctgry ? "true" : "false")); printf("\n categs : %ld",(categs)); printf("\n rcategs : %ld",(rcategs)); printf("\n auto_: %s",(auto_ ? "true" : "false")); printf("\n freqsfrom : %s",(freqsfrom ? "true" : "false")); printf("\n global : %s",(global ? "true" : "false")); printf("\n hypstate : %s",(hypstate ? "true" : "false")); printf("\n invar : %s",(invar ? "true" : "false")); printf("\n jumble : %s",(jumble ? "true" : "false")); printf("\n njumble : %ld",(njumble)); printf("\n lngths : %s",(lngths ? "true" : "false")); printf("\n lambda : %f",(lambda)); printf("\n lambda1 : %f",(lambda1)); printf("\n cv : %f",(cv)); printf("\n freqa : %f",(freqa)); printf("\n freqc : %f",(freqc)); printf("\n freqg : %f",(freqg)); printf("\n freqt : %f",(freqt)); printf("\n trout : %s",(trout ? "true" : "false")); printf("\n ttratio : %f",(ttratio)); printf("\n probsum : %f",(probsum)); printf("\n ttr : %s",(ttr ? "true" : "false")); printf("\n usertree : %s",(usertree ? "true" : "false")); printf("\n weights: %s",(weights ? "true" : "false")); printf("\n printdata : %s",(printdata ? "true" : "false")); printf("\n progress : %s",(progress ? "true" : "false")); printf("\n treeprint: %s",(treeprint ? "true" : "false")); printf("\n interleaved : %s \n\n",(interleaved ? "true" : "false")); for (i=0;iStr[ith-1], sites, weight, &weights); weightsum = 0; for (i = 0; i < sites; i++) weightsum += weight[i]; if (ctgry && categs > 1) { inputcategsstr(phyloratecat->Str[0], 0, sites, category, categs, "DnaMLK"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } if (weights && printdata) printweights(outfile, 0, sites, weight, "Sites"); } /* inputoptions */ static void makeweights(void) { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = 0; aliasweight[i - 1] = weight[i - 1]; location[i - 1] = 0; } sitesort2(sites, aliasweight); sitecombine2(sites, aliasweight); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { if (aliasweight[i - 1] > 0) endsite = i; } for (i = 1; i <= endsite; i++) { ally[alias[i - 1] - 1] = alias[i - 1]; location[alias[i - 1] - 1] = i; } contribution = (contribarr *) Malloc( endsite*sizeof(contribarr)); } /* makeweights */ static void getinput(void) { /* reads the input data */ inputoptions(); if (!freqsfrom) getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, true); if (!justwts || firstset) seq_inputdata(seqsets[ith-1], sites); makeweights(); setuptree2(&curtree); if (!usertree) { setuptree2(&bestree); if (njumble > 1) setuptree2(&bestree2); } allocx(nonodes, rcategs, curtree.nodep, usertree); if (!usertree) { allocx(nonodes, rcategs, bestree.nodep, 0); if (njumble > 1) allocx(nonodes, rcategs, bestree2.nodep, 0); } makevalues2(rcategs, curtree.nodep, endsite, spp, y, alias); if (freqsfrom) { empiricalfreqs(&freqa, &freqc, &freqg, &freqt, aliasweight, curtree.nodep); getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, true); } if (!justwts || firstset) fprintf(outfile, "\nTransition/transversion ratio = %10.6f\n\n", ttratio); } /* getinput */ static void inittable_for_usertree (char* treestr) { /* If there's a user tree, then the ww/zz/wwzz/vvzz elements need to be allocated appropriately. */ long num_comma; long i, j; /* First, figure out the largest possible furcation, i.e. the number of commas plus one */ countcomma (treestr, &num_comma); num_comma++; for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { /* Free the stuff allocated assuming bifurcations */ free (tbl[i][j]->ww); free (tbl[i][j]->zz); free (tbl[i][j]->wwzz); free (tbl[i][j]->vvzz); /* Then allocate for worst-case multifurcations */ tbl[i][j]->ww = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->zz = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->wwzz = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->vvzz = (double *) Malloc( num_comma * sizeof (double)); } } } /* inittable_for_usertree */ static void freetable(void) { long i, j; for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { free(tbl[i][j]->ww); free(tbl[i][j]->zz); free(tbl[i][j]->wwzz); free(tbl[i][j]->vvzz); } } for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) free(tbl[i][j]); free(tbl[i]); } free(tbl); } static void inittable(void) { /* Define a lookup table. Precompute values and print them out in tables */ long i, j; double sumrates; tbl = (valrec ***) Malloc( rcategs * sizeof(valrec **)); for (i = 0; i < rcategs; i++) { tbl[i] = (valrec **) Malloc( categs*sizeof(valrec *)); for (j = 0; j < categs; j++) tbl[i][j] = (valrec *) Malloc( sizeof(valrec)); } for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { tbl[i][j]->rat = rrate[i]*rate[j]; tbl[i][j]->ratxi = tbl[i][j]->rat * xi; tbl[i][j]->ratxv = tbl[i][j]->rat * xv; /* Allocate assuming bifurcations, will be changed later if neccesarry (i.e. there's a user tree) */ tbl[i][j]->ww = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->zz = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->wwzz = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->vvzz = (double *) Malloc( 2 * sizeof (double)); } } sumrates = 0.0; for (i = 0; i < endsite; i++) { for (j = 0; j < rcategs; j++) sumrates += aliasweight[i] * probcat[j] * tbl[j][category[alias[i] - 1] - 1]->rat; } sumrates /= (double)sites; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->rat /= sumrates; tbl[i][j]->ratxi /= sumrates; tbl[i][j]->ratxv /= sumrates; } if(jumb > 1) return; if (gama || invar) { fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); fprintf(outfile, " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); } if (rcategs > 1) { fprintf(outfile, "\nState in HMM Rate of change Probability\n\n"); for (i = 0; i < rcategs; i++) if (probcat[i] < 0.0001) fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.001) fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.01) fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); else fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); putc('\n', outfile); if (auto_) { fprintf(outfile, "Expected length of a patch of sites having the same rate = %8.3f\n", 1/lambda); putc('\n', outfile); } } if (categs > 1) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (i = 0; i < categs; i++) fprintf(outfile, "%9ld%16.3f\n", i+1, rate[i]); fprintf(outfile, "\n\n"); } } /* inittable */ static void alloc_nvd(long num_sibs, nuview_data *local_nvd) { /* Allocate blocks of memory appropriate for the number of siblings a given node has */ local_nvd->yy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->wwzz = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vvzz = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vzsumr = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vzsumy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sum = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sumr = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sumy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->xx = (sitelike *) Malloc( num_sibs * sizeof (sitelike)); } /* alloc_nvd */ static void free_nvd(nuview_data *local_nvd) { /* The natural complement to the alloc version */ free (local_nvd->yy); free (local_nvd->wwzz); free (local_nvd->vvzz); free (local_nvd->vzsumr); free (local_nvd->vzsumy); free (local_nvd->sum); free (local_nvd->sumr); free (local_nvd->sumy); free (local_nvd->xx); } /* free_nvd */ static boolean nuview(node *p) { /* Recursively update summary data for subtree rooted at p. Returns true if * view has changed. */ long i, j, k, l, num_sibs = 0, sib_index; nuview_data *local_nvd; node *q; node *sib_ptr, *sib_back_ptr; sitelike p_xx; double lw; double correction; double maxx; assert(p != NULL); if ( p == NULL ) return false; if ( p->tip ) return false; /* Tips do not need to be initialized */ for (q = p->next; q != p; q = q->next) { num_sibs++; if ( q->back != NULL && !q->tip) { if ( nuview(q->back) ) p->initialized = false; } } if ( p->initialized ) return false; /* At this point, all views downstream should be initialized. * If not, we have a problem. */ /*assert( invalid_descendant_view(p) == NULL );*/ /* Allocate the structure and blocks therein for variables used in this function */ local_nvd = (nuview_data *) Malloc( sizeof (nuview_data)); alloc_nvd (num_sibs, local_nvd); /* Loop 1: makes assignments to tbl based on some combination of what's already in tbl and the children's value of v */ sib_ptr = p; for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (sib_back_ptr != NULL) lw = -fabs(p->tyme - sib_back_ptr->tyme); else lw = 0.0; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->ww[sib_index] = exp(tbl[i][j]->ratxi * lw); tbl[i][j]->zz[sib_index] = exp(tbl[i][j]->ratxv * lw); tbl[i][j]->wwzz[sib_index] = tbl[i][j]->ww[sib_index] * tbl[i][j]->zz[sib_index]; tbl[i][j]->vvzz[sib_index] = (1.0 - tbl[i][j]->ww[sib_index]) * tbl[i][j]->zz[sib_index]; } } /* Loop 2: */ for (i = 0; i < endsite; i++) { correction = 0; maxx = 0; k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { /* Loop 2.1 */ sib_ptr = p; for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; local_nvd->wwzz[sib_index] = tbl[j][k]->wwzz[sib_index]; local_nvd->vvzz[sib_index] = tbl[j][k]->vvzz[sib_index]; local_nvd->yy[sib_index] = 1.0 - tbl[j][k]->zz[sib_index]; if (sib_back_ptr != NULL) { memcpy(local_nvd->xx[sib_index], sib_back_ptr->x[i][j], sizeof(sitelike)); if ( j == 0) correction += sib_back_ptr->underflows[i]; } else { local_nvd->xx[sib_index][0] = 1.0; local_nvd->xx[sib_index][(long)C - (long)A] = 1.0; local_nvd->xx[sib_index][(long)G - (long)A] = 1.0; local_nvd->xx[sib_index][(long)T - (long)A] = 1.0; } } /* Loop 2.2 */ for (sib_index=0; sib_index < num_sibs; sib_index++) { local_nvd->sum[sib_index] = local_nvd->yy[sib_index] * (freqa * local_nvd->xx[sib_index][(long)A] + freqc * local_nvd->xx[sib_index][(long)C] + freqg * local_nvd->xx[sib_index][(long)G] + freqt * local_nvd->xx[sib_index][(long)T]); local_nvd->sumr[sib_index] = freqar * local_nvd->xx[sib_index][(long)A] + freqgr * local_nvd->xx[sib_index][(long)G]; local_nvd->sumy[sib_index] = freqcy * local_nvd->xx[sib_index][(long)C] + freqty * local_nvd->xx[sib_index][(long)T]; local_nvd->vzsumr[sib_index] = local_nvd->vvzz[sib_index] * local_nvd->sumr[sib_index]; local_nvd->vzsumy[sib_index] = local_nvd->vvzz[sib_index] * local_nvd->sumy[sib_index]; } /* Initialize to one, multiply incremental values for every sibling a node has */ p_xx[(long)A] = 1 ; p_xx[(long)C] = 1 ; p_xx[(long)G] = 1 ; p_xx[(long)T] = 1 ; for (sib_index=0; sib_index < num_sibs; sib_index++) { p_xx[(long)A] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)A] + local_nvd->vzsumr[sib_index]; p_xx[(long)C] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)C] + local_nvd->vzsumy[sib_index]; p_xx[(long)G] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)G] + local_nvd->vzsumr[sib_index]; p_xx[(long)T] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)T] + local_nvd->vzsumy[sib_index]; } for ( l = 0 ; l < ((long)T - (long)A + 1 ) ; l++ ) { if ( p_xx[l] > maxx ) maxx = p_xx[l]; } /* And the final point of this whole function: */ memcpy(p->x[i][j], p_xx, sizeof(sitelike)); } p->underflows[i] = 0; if ( maxx < MIN_DOUBLE) fix_x(p, i, maxx,rcategs); p->underflows[i] += correction; } free_nvd (local_nvd); free (local_nvd); p->initialized = true; return true; } /* nuview */ static double dnamlk_evaluate(node *p) { /* Evaluate and return the log likelihood of the current tree * as seen from the branch from p to p->back. If p is the root node, * the first child branch is used instead. Views are updated as needed. */ contribarr tterm; static contribarr like, nulike, clai; double sum, sum2, sumc=0, y, lz, y1, z1zz, z1yy, prod12, prod1, prod2, prod3, sumterm, lterm; long i, j, k, lai; node *q, *r; double *x1, *x2; /* pointers to sitelike elements in node->x */ sum = 0.0; assert( all_tymes_valid(curtree.root, 0.0, false) ); /* Root node has no branch, so use branch to first child */ if (p == curtree.root) p = p->next; r = p; q = p->back; nuview(r); nuview(q); y = fabs(r->tyme - q->tyme); lz = -y; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->orig_zz = exp(tbl[i][j]->ratxi * lz); tbl[i][j]->z1 = exp(tbl[i][j]->ratxv * lz); tbl[i][j]->z1zz = tbl[i][j]->z1 * tbl[i][j]->orig_zz; tbl[i][j]->z1yy = tbl[i][j]->z1 - tbl[i][j]->z1zz; } for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { if (y > 0.0) { y1 = 1.0 - tbl[j][k]->z1; z1zz = tbl[j][k]->z1zz; z1yy = tbl[j][k]->z1yy; } else { y1 = 0.0; z1zz = 1.0; z1yy = 0.0; } x1 = r->x[i][j]; prod1 = freqa * x1[0] + freqc * x1[(long)C - (long)A] + freqg * x1[(long)G - (long)A] + freqt * x1[(long)T - (long)A]; x2 = q->x[i][j]; prod2 = freqa * x2[0] + freqc * x2[(long)C - (long)A] + freqg * x2[(long)G - (long)A] + freqt * x2[(long)T - (long)A]; prod3 = (x1[0] * freqa + x1[(long)G - (long)A] * freqg) * (x2[0] * freqar + x2[(long)G - (long)A] * freqgr) + (x1[(long)C - (long)A] * freqc + x1[(long)T - (long)A] * freqt) * (x2[(long)C - (long)A] * freqcy + x2[(long)T - (long)A] * freqty); prod12 = freqa * x1[0] * x2[0] + freqc * x1[(long)C - (long)A] * x2[(long)C - (long)A] + freqg * x1[(long)G - (long)A] * x2[(long)G - (long)A] + freqt * x1[(long)T - (long)A] * x2[(long)T - (long)A]; tterm[j] = z1zz * prod12 + z1yy * prod3 + y1 * prod1 * prod2; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * tterm[j]; lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) clai[j] = tterm[j] / sumterm; memcpy(contribution[i], clai, sizeof(contribarr)); if (!auto_ && usertree && (which <= shimotrees)) l0gf[which - 1][i] = lterm; sum += aliasweight[i] * lterm; } if (auto_) { for (j = 0; j < rcategs; j++) like[j] = 1.0; for (i = 0; i < sites; i++) { sumc = 0.0; for (k = 0; k < rcategs; k++) sumc += probcat[k] * like[k]; sumc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, contribution[lai - 1], sizeof(contribarr)); for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; } else { for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc); } memcpy(like, nulike, sizeof(contribarr)); } sum2 = 0.0; for (i = 0; i < rcategs; i++) sum2 += probcat[i] * like[i]; sum += log(sum2); } curtree.likelihood = sum; if (auto_ || !usertree) return sum; if(which <= shimotrees) l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; return sum; } if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } return sum; } /* dnamlk_evaluate */ static boolean update(node *p) { /* Conditionally optimize tyme at a node. Return true if successful. */ if (p == NULL) return false; if ( (!usertree) || (usertree && !lngths) ) return makenewv(p); return false; } /* update */ static boolean smooth(node *p) { node *q = NULL; boolean success; if (p == NULL) return false; if (p->tip) return false; /* optimize tyme here */ success = update(p); if (smoothit || polishing) { for (q = p->next; q != p; q = q->next) { /* smooth subtrees */ success = smooth(q->back) || success; /* optimize tyme again after each subtree */ success = update(p) || success; } } return success; } /* smooth */ static void restoradd(node *below, node *newtip, node *newfork, double prevtyme) { /* restore "new" tip and fork to place "below". restore tymes */ /* assumes bifurcation */ hookup(newfork, below->back); hookup(newfork->next, below); hookup(newtip, newfork->next->next); curtree.nodep[newfork->index-1] = newfork; setnodetymes(newfork,prevtyme); } /* restoradd */ static void dnamlk_add(node *below, node *newtip, node *newfork) { /* inserts the nodes newfork and its descendant, newtip, into the tree. */ long i; node *p; node *above; double newtyme; boolean success; assert( all_tymes_valid(curtree.root, 0.98*MIN_BRANCH_LENGTH, false) ); /*assert( floating_fork(newfork) );*/ assert( newtip->back == NULL ); /* Get parent nodelets */ below = pnode(&curtree, below); newfork = pnode(&curtree, newfork); newtip = pnode(&curtree, newtip); /* Join above node to newfork */ if (below->back == NULL) newfork->back = NULL; else { above = below->back; /* unhookup(below, above); */ hookup(newfork, above); } /* Join below to newfork->next->next */ hookup(below, newfork->next->next); /* Join newtip to newfork->next */ hookup(newfork->next, newtip); /* Move root if inserting there */ if (curtree.root == below) curtree.root = newfork; /* p = child with lowest tyme */ p = newtip->tyme < below->tyme ? newtip : below; /* If not at root, set newfork tyme to average below/above */ if (newfork->back != NULL) { if (p->tyme > newfork->back->tyme) newtyme = (p->tyme + newfork->back->tyme) / 2.0; else newtyme = p->tyme - INSERT_MIN_TYME; if (p->tyme - newtyme < MIN_BRANCH_LENGTH) newtyme = p->tyme - MIN_BRANCH_LENGTH; setnodetymes(newfork, newtyme); /* Now move from newfork to root, setting parent tymes older than children * by at least MIN_BRANCH_LENGTH */ p = newfork; while (p != curtree.root) { if (p->back->tyme > p->tyme - MIN_BRANCH_LENGTH) setnodetymes(p->back, p->tyme - MIN_BRANCH_LENGTH); else break; /* get parent node */ p = pnode(&curtree, p->back); } } else { /* root == newfork */ /* make root 2x older */ setnodetymes(newfork, p->tyme - 2*INSERT_MIN_TYME); } assert( all_tymes_valid(curtree.root, 0.98*MIN_BRANCH_LENGTH, false) ); /* Adjust branch lengths throughout */ for ( i = 1; i < smoothings; i++ ) { success = smooth(newfork); success = smooth(newfork->back) || success; if ( !success ) break; } } /* dnamlk_add */ static void dnamlk_re_move(node **item, node **fork, boolean tempadd) { /* removes nodes *item and its parent (returned in *fork), from the tree. the new descendant of fork's ancestor is made to be fork's descendant other than item. Item must point to node*, but *fork is not read */ node *p, *q; long i; boolean success; if ((*item)->back == NULL) { *fork = NULL; return; } *item = curtree.nodep[(*item)->index-1]; *fork = curtree.nodep[(*item)->back->index - 1]; if (curtree.root == *fork) { if (*item == (*fork)->next->back) curtree.root = (*fork)->next->next->back; else curtree.root = (*fork)->next->back; } p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; inittrav(p); inittrav(q); if (tempadd) return; for ( i = 1; i <= smoothings; i++ ) { success = smooth(q); if ( smoothit ) success = smooth(q->back) || success; if ( !success ) break; } } /* dnamlk_re_move */ static void tryadd(node *p, node **item, node **nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater likelihood than other locations tested up to that time, then keeps that location as there */ if ( !global2 ) save_tymes(&curtree,tymes); dnamlk_add(p, *item, *nufork); like = dnamlk_evaluate(p); if (lastsp) { if (like >= bestree.likelihood || bestree.likelihood == UNDEFINED) { copy_(&curtree, &bestree, nonodes, rcategs); if ( global2 ) /* To be restored in maketree() */ save_tymes(&curtree,tymes); } } if (like > bestyet || bestyet == UNDEFINED) { bestyet = like; there = p; } dnamlk_re_move(item, nufork, true); if ( !global2 ) { restore_tymes(&curtree,tymes); } } /* tryadd */ static void addpreorder(node *p, node *item_, node *nufork_, boolean contin, boolean continagain) { /* Traverse tree, adding item at different locations until we * find a better likelihood. Afterwards, global 'there' will be * set to the best add location, or will be left alone if no * better could be found. */ node *item, *nufork; item = item_; nufork = nufork_; if (p == NULL) return; tryadd(p, &item, &nufork); contin = continagain; if ((!p->tip) && contin) { /* assumes bifurcation (OK) */ addpreorder(p->next->back, item, nufork, contin, continagain); addpreorder(p->next->next->back, item, nufork, contin, continagain); } } /* addpreorder */ static boolean tryrearr(node *p) { /* evaluates one rearrangement of the tree. if the new tree has greater likelihood than the old keeps the new tree and returns true. otherwise, restores the old tree and returns false. */ node *forknode; /* parent fork of p */ node *frombelow; /* other child of forknode */ node *whereto; /* parent fork of forknode */ double oldlike; /* likelihood before rearrangement */ double prevtyme; /* forknode->tyme before rearrange */ double like_delta; /* improvement in likelihood */ boolean wasonleft; /* true if p first child of forknode */ if (p == curtree.root) return false; /* forknode = parent fork of p */ forknode = curtree.nodep[p->back->index - 1]; if (forknode == curtree.root) return false; oldlike = bestyet; prevtyme = forknode->tyme; /* assumes bifurcation (OK) */ /* frombelow = other child of forknode (not p) */ if (forknode->next->back == p) { frombelow = forknode->next->next->back; wasonleft = true; } else { frombelow = forknode->next->back; wasonleft = false; } whereto = curtree.nodep[forknode->back->index - 1]; /* remove forknode and p */ dnamlk_re_move(&p, &forknode, true); /* add p and forknode as parent of whereto */ dnamlk_add(whereto, p, forknode); like = dnamlk_evaluate(p); like_delta = like - oldlike; if ( like_delta < LIKE_EPSILON && oldlike != UNDEFINED) { dnamlk_re_move(&p, &forknode, true); restoradd(frombelow, p, forknode, prevtyme); if (wasonleft && (forknode->next->next->back == p)) { hookup (forknode->next->back, forknode->next->next); hookup (forknode->next, p); } curtree.likelihood = oldlike; /* assumes bifurcation (OK) */ inittrav(forknode); inittrav(forknode->next); inittrav(forknode->next->next); return false; } else { bestyet = like; } return true; } /* tryrearr */ static boolean repreorder(node *p) { /* traverses a binary tree, calling function tryrearr at a node before calling tryrearr at its descendants. Returns true the first time rearrangement increases the tree's likelihood. */ if (p == NULL) return false; if ( !tryrearr(p) ) return false; if (p->tip) return true; /* assumes bifurcation */ if ( !repreorder(p->next->back) ) return false; if ( !repreorder(p->next->next->back) ) return false; return true; } /* repreorder */ static void rearrange(node **r) { /* traverses the tree (preorder), finding any local rearrangement which increases the likelihood. if traversal succeeds in increasing the tree's likelihood, function rearrange runs traversal again */ while ( repreorder(*r) ) /* continue */; } /* rearrange */ static void initdnamlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* Initializes each node as it is read from user tree by treeread(). * whichinit specifies which type of initialization is to be done. */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; malloc_pheno((*p), endsite, rcategs); nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); malloc_pheno(*p, endsite, rcategs); (*p)->index = nodei; break; case tip: match_names_to_data (str, nodep, p, spp); break; case iter: (*p)->initialized = false; /* Initial branch lengths start at 0.0. tymetrav() enforces * MIN_BRANCH_LENGTH */ (*p)->v = 0.0; (*p)->iter = true; if ((*p)->back != NULL) (*p)->back->iter = true; break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); (*p)->v = valyew / divisor / fracchange; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; case hslength: break; case hsnolength: if (usertree && lengthsopt && lngths) { printf("Warning: one or more lengths not defined in user tree number %ld.\n", which); printf("DNAMLK will attempt to optimize all branch lengths.\n\n"); lngths = false; } break; case treewt: break; case unittrwt: break; } } /* initdnamlnode */ static boolean tymetrav(node *p) { /* Recursively convert branch lengths to node tymes. Returns the maximum * branch length p's parent can have, which is p->tyme - max(p->v, * MIN_BRANCH_LENGTH) */ node *q; double xmax; double x; xmax = 0.0; if (!p->tip) { for (q = p->next; q != p; q = q->next) { x = tymetrav(q->back); if (xmax > x) xmax = x; } } else { x = 0.0; } setnodetymes(p,xmax); if (p->v < MIN_BRANCH_LENGTH) return xmax - MIN_BRANCH_LENGTH; else return xmax - p->v; } /* tymetrav */ static void reconstr(node *p, long n) { /* reconstruct and print out base at site n+1 at node p */ long i, j, k, m, first, second, num_sibs; double f, sum, xx[4]; node *q; if ((ally[n] == 0) || (location[ally[n]-1] == 0)) putc('.', outfile); else { j = location[ally[n]-1] - 1; for (i = 0; i < 4; i++) { f = p->x[j][mx-1][i]; num_sibs = count_sibs(p); q = p; for (k = 0; k < num_sibs; k++) { q = q->next; f *= q->x[j][mx-1][i]; } f = sqrt(f); xx[i] = f; } xx[0] *= freqa; xx[1] *= freqc; xx[2] *= freqg; xx[3] *= freqt; sum = xx[0]+xx[1]+xx[2]+xx[3]; for (i = 0; i < 4; i++) xx[i] /= sum; first = 0; for (i = 1; i < 4; i++) if (xx [i] > xx[first]) first = i; if (first == 0) second = 1; else second = 0; for (i = 0; i < 4; i++) if ((i != first) && (xx[i] > xx[second])) second = i; m = 1 << first; if (xx[first] < 0.4999995) m = m + (1 << second); if (xx[first] > 0.95) putc(toupper((int)basechar[m - 1]), outfile); else putc(basechar[m - 1], outfile); if (rctgry && rcategs > 1) mx = mp[n][mx - 1]; else mx = 1; } } /* reconstr */ static void rectrav(node *p, long m, long n) { /* print out segment of reconstructed sequence for one branch */ long num_sibs, i; node *sib_ptr; putc(' ', outfile); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, " "); mx = mx0; for (i = m; i <= n; i++) { if ((i % 10 == 0) && (i != m)) putc(' ', outfile); if (p->tip) putc(y[p->index-1][i], outfile); else reconstr(p, i); } putc('\n', outfile); if (!p->tip) { num_sibs = count_sibs(p); sib_ptr = p; for (i = 0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; rectrav(sib_ptr->back, m, n); } } mx1 = mx; } /* rectrav */ static void summarize(FILE *fp) { long i, j, mm; double mode, sum; double like[maxcategs], nulike[maxcategs]; double **marginal; mp = (long **)Malloc(sites * sizeof(long *)); for (i = 0; i <= sites-1; ++i) mp[i] = (long *)Malloc(sizeof(long)*rcategs); fprintf(fp, "\nLn Likelihood = %11.5f\n\n", curtree.likelihood); fprintf(fp, " Ancestor Node Node Height Length\n"); fprintf(fp, " -------- ---- ---- ------ ------\n"); mlk_describe(fp, &curtree, fracchange); putc('\n', fp); if (rctgry && rcategs > 1) { for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; mp[i][j] = j + 1; for (k = 1; k <= rcategs; k++) { if (k != j + 1) { if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { nulike[j] = lambda * probcat[k - 1] * like[k - 1]; mp[i][j] = k; } } } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); } mode = 0.0; mx = 1; for (i = 1; i <= rcategs; i++) { if (probcat[i - 1] * like[i - 1] > mode) { mx = i; mode = probcat[i - 1] * like[i - 1]; } } mx0 = mx; fprintf(fp, "Combination of categories that contributes the most to the likelihood:\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', fp); for (i = 1; i <= sites; i++) { fprintf(fp, "%ld", mx); if (i % 10 == 0) putc(' ', fp); if (i % 60 == 0 && i != sites) { putc('\n', fp); for (j = 1; j <= nmlngth + 3; j++) putc(' ', fp); } mx = mp[i - 1][mx - 1]; } fprintf(fp, "\n\n"); marginal = (double **) Malloc( sites*sizeof(double *)); for (i = 0; i < sites; i++) marginal[i] = (double *) Malloc( rcategs*sizeof(double)); for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) { nulike[j] /= sum; marginal[i][j] = nulike[j]; } memcpy(like, nulike, rcategs * sizeof(double)); } for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = 0; i < sites; i++) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } marginal[i][j] *= like[j] * probcat[j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); sum = 0.0; for (j = 0; j < rcategs; j++) sum += marginal[i][j]; for (j = 0; j < rcategs; j++) marginal[i][j] /= sum; } fprintf( fp, "Most probable category at each site if > 0.95 probability " "(\".\" otherwise)\n\n" ); for (i = 1; i <= nmlngth + 3; i++) putc(' ', fp); for (i = 0; i < sites; i++) { mm = 0; sum = 0.0; for (j = 0; j < rcategs; j++) if (marginal[i][j] > sum) { sum = marginal[i][j]; mm = j; } if (sum >= 0.95) fprintf(fp, "%ld", mm+1); else putc('.', fp); if ((i+1) % 60 == 0) { if (i != 0) { putc('\n', fp); for (j = 1; j <= nmlngth + 3; j++) putc(' ', fp); } } else if ((i+1) % 10 == 0) putc(' ', fp); } putc('\n', fp); for (i = 0; i < sites; i++) free(marginal[i]); free(marginal); } putc('\n', fp); putc('\n', fp); if (hypstate) { fprintf(fp, "Probable sequences at interior nodes:\n\n"); fprintf(fp, " node "); for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) putc(' ', fp); fprintf(fp, "Reconstructed sequence (caps if > 0.95)\n\n"); if (!rctgry || (rcategs == 1)) mx0 = 1; for (i = 0; i < sites; i += 60) { k = i + 59; if (k >= sites) k = sites - 1; rectrav(curtree.root, i, k); putc('\n', fp); mx0 = mx1; } } for (i = 0; i < sites; ++i) free(mp[i]); free(mp); } /* summarize */ static void dnamlk_treeout(node *p) { /* write out file with representation of final tree */ node *sib_ptr; long i, n, w, num_sibs; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { sib_ptr = p; num_sibs = count_sibs(p); putc('(', outtree); col++; for (i=0; i < (num_sibs - 1); i++) { sib_ptr = sib_ptr->next; dnamlk_treeout(sib_ptr->back); putc(',', outtree); col++; if (col > 55) { putc('\n', outtree); col = 0; } } sib_ptr = sib_ptr->next; dnamlk_treeout(sib_ptr->back); putc(')', outtree); col++; } if (p == curtree.root) { fprintf(outtree, ";\n"); return; } x = fracchange * (p->tyme - curtree.nodep[p->back->index - 1]->tyme); if (x > 0.0) w = (long)(0.4342944822 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.4342944822 * log(-x)) + 1; if (w < 0) w = 0; fprintf(outtree, ":%*.5f", (int)(w + 7), x); col += w + 8; } /* dnamlk_treeout */ static void init_tymes(node *p, double minlength) { /* Set all node tymes closest to the tips but with no branches shorter than * minlength */ long i, num_sibs; node *sib_ptr, *sib_back_ptr; /* traverse to set up times in subtrees */ if (p->tip) return; sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; init_tymes(sib_back_ptr, minlength); } /* set time at this node */ setnodetymes(p, min_child_tyme(p) - minlength); } /* init_tymes */ static void treevaluate(void) { /* evaluate likelihood of tree, after iterating branch lengths */ long i; if ( !usertree || (usertree && !lngths) ) { polishing = true; smoothit = true; for (i = 0; i < smoothings; ) { if ( !smooth(curtree.root) ) i++; } } dnamlk_evaluate(curtree.root); } /* treevaluate */ static void maketree(void) { /* constructs a binary tree from the pointers in curtree.nodep, adds each node at location which yields highest likelihood then rearranges the tree for greatest likelihood */ long i, j; node *item, *nufork, *dummy, *q, *root=NULL; boolean succeded, dummy_haslengths, dummy_first, goteof; long max_nonodes; /* Maximum number of nodes required to * express all species in a bifurcating tree * */ long nextnode; pointarray dummy_treenode=NULL; double oldbest; node *tmp; char* treestr; inittable(); if (!usertree) { for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); curtree.root = curtree.nodep[spp]; curtree.root->back = NULL; for (i = 0; i < spp; i++) curtree.nodep[i]->back = NULL; for (i = spp; i < nonodes; i++) { q = curtree.nodep[i]; q->back = NULL; while ((q = q->next) != curtree.nodep[i]) q->back = NULL; } polishing = false; dnamlk_add(curtree.nodep[enterorder[0] - 1], curtree.nodep[enterorder[1] - 1], curtree.nodep[spp]); if (progress) { printf("\nAdding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastsp = false; smoothit = false; for (i = 3; i <= spp; i++) { bestree.likelihood = UNDEFINED; bestyet = UNDEFINED; there = curtree.root; item = curtree.nodep[enterorder[i - 1] - 1]; nufork = curtree.nodep[spp + i - 2]; lastsp = (i == spp); addpreorder(curtree.root, item, nufork, true, true); dnamlk_add(there, item, nufork); like = dnamlk_evaluate(curtree.root); copy_(&curtree, &bestree, nonodes, rcategs); rearrange(&curtree.root); if (curtree.likelihood > bestree.likelihood) { copy_(&curtree, &bestree, nonodes, rcategs); } if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (lastsp && global) { /* perform global rearrangements */ if (progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = 1; j <= nonodes; j++) if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); } global2 = false; do { succeded = false; if (progress) printf(" "); /* FIXME: tymes gets clobbered by tryadd() */ /* save_tymes(&curtree, tymes); */ for (j = 0; j < nonodes; j++) { oldbest = bestree.likelihood; bestyet = UNDEFINED; item = curtree.nodep[j]; if (item != curtree.root) { nufork = pnode(&curtree, item->back); /* parent fork */ if (nufork != curtree.root) { tmp = nufork->next->back; if (tmp == item) tmp = nufork->next->next->back; /* can't figure out why we never get here */ } else { if (nufork->next->back != item) tmp = nufork->next->back; else tmp = nufork->next->next->back; } /* if we add item at tmp we have done nothing */ assert( all_tymes_valid(curtree.root, 0.98*MIN_BRANCH_LENGTH, false) ); dnamlk_re_move(&item, &nufork, false); /* there = curtree.root; */ there = tmp; addpreorder(curtree.root, item, nufork, true, true); if ( tmp != there && bestree.likelihood > oldbest) succeded = true; dnamlk_add(there, item, nufork); if (global2) restore_tymes(&curtree,tymes); } if (progress) { if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); } } if (progress) putchar('\n'); } while ( succeded ); } } if (njumble > 1 && lastsp) { for (i = 0; i < spp; i++ ) dnamlk_re_move(&curtree.nodep[i], &dummy, false); if (jumb == 1 || bestree2.likelihood < bestree.likelihood) copy_(&bestree, &bestree2, nonodes, rcategs); } if (jumb == njumble) { if (njumble > 1) copy_(&bestree2, &curtree, nonodes, rcategs); else copy_(&bestree, &curtree, nonodes, rcategs); fprintf(outfile, "\n\n"); treevaluate(); curtree.likelihood = dnamlk_evaluate(curtree.root); if (treeprint) mlk_printree(outfile, &curtree); summarize(outfile); if (trout) { col = 0; dnamlk_treeout(curtree.root); } } } else { /* if ( usertree ) */ /* Open in binary: ftell() is broken for UNIX line-endings under WIN32 */ treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); inittable_for_usertree (treestr); if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); l0gl = (double *)Malloc(shimotrees * sizeof(double)); l0gf = (double **)Malloc(shimotrees * sizeof(double *)); for (i=0; i < shimotrees; ++i) l0gf[i] = (double *)Malloc(endsite * sizeof(double)); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } fprintf(outfile, "\n\n"); which = 1; max_nonodes = nonodes; while (which <= numtrees) { /* These initializations required each time through the loop since multiple trees require re-initialization */ dummy_haslengths = true; nextnode = 0; dummy_first = true; goteof = false; lngths = lengthsopt; nonodes = max_nonodes; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, dummy_treenode, &goteof, &dummy_first, curtree.nodep, &nextnode, &dummy_haslengths, &grbg, initdnamlnode, false, nonodes); if (goteof && (which <= numtrees)) { /* if we hit the end of the file prematurely */ printf ("\n"); printf ("ERROR: trees missing at end of file.\n"); printf ("\tExpected number of trees:\t\t%ld\n", numtrees); printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1); exxit(-1); } nonodes = nextnode; root = curtree.nodep[root->index - 1]; curtree.root = root; if (lngths) tymetrav(curtree.root); else init_tymes(curtree.root, initialv); treevaluate(); if (treeprint) mlk_printree(outfile, &curtree); summarize(outfile); if (trout) { col = 0; dnamlk_treeout(curtree.root); } if(which < numtrees){ freex_notip(nonodes, curtree.nodep); gdispose(curtree.root, &grbg, curtree.nodep); } which++; } FClose(intree); if (!auto_ && numtrees > 1 && weightsum > 1 ) standev2(numtrees, maxwhich, 0, endsite, maxlogl, l0gl, l0gf, aliasweight, seed); } if (jumb == njumble) { if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n", outtreename); } free(contribution); freex(nonodes, curtree.nodep); if (!usertree) { freex(nonodes, bestree.nodep); if (njumble > 1) freex(nonodes, bestree2.nodep); } } free(root); freetable(); } /* maketree */ /*?? Dnaml has a clean-up function for freeing memory, closing files, etc. Put one here too? */ int main(int argc, Char *argv[]) { /* DNA Maximum Likelihood with molecular clock */ /* Initialize mlclock.c */ mlclock_init(&curtree, &dnamlk_evaluate); #ifdef MAC argc = 1; /* macsetup("Dnamlk", "Dnamlk"); */ argv[0] = "Dnamlk"; #endif init(argc,argv); emboss_getoptions("fdnamlk", argc, argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); ttratio0 = ttratio; /* Data set loop */ for (ith = 1; ith <= datasets; ith++) { ttratio = ttratio0; if (datasets > 1) { fprintf(outfile, "Data set # %ld:\n\n", ith); if (progress) printf("\nData set # %ld:\n", ith); } getinput(); if (ith == 1) firstset = false; /* Jumble loop */ if (usertree) maketree(); else for (jumb = 1; jumb <= njumble; jumb++) maketree(); } /* Close files */ FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* DNA Maximum Likelihood with molecular clock */ PHYLIPNEW-3.69.650/src/dnadist.c0000664000175000017500000007537611616234203012667 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define iterationsd 100 /* number of iterates of EM for each distance */ typedef struct valrec { double rat, ratxv, z1, y1, z1zz, z1yy, z1xv; } valrec; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; ajint numseqs; ajint numwts; extern sequence y; Char infilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; long sites, categs, weightsum, datasets, ith, rcategs; boolean freqsfrom, jukes, kimura, logdet, gama, invar, similarity, lower, f84, weights, progress, ctgry, mulsets, justwts, firstset, baddists, human; boolean matrix_flags; /* Matrix output format */ node **nodep; double xi, xv, ttratio, ttratio0, freqa, freqc, freqg, freqt, freqr, freqy, freqar, freqcy, freqgr, freqty, cvi, invarfrac, sumrates, fracchange; steptr oldweight; double rate[maxcategs]; double **d; double sumweightrat; /* these values were propagated */ double *weightrat; /* to global values from */ valrec tbl[maxcategs]; /* function makedists. */ #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void reallocsites(void); void doinit(void); void printcategories(void); void inputoptions(void); void dnadist_sitesort(void); void dnadist_sitecombine(void); void dnadist_sitescrunch(void); void makeweights(void); void dnadist_makevalues(void); void dnadist_empiricalfreqs(void); void getinput(void); void inittable(void); double lndet(double (*a)[4]); void makev(long, long, double *); void makedists(void); void writedists(void); /* function prototypes */ #endif void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr method = NULL; AjPStr gammamethod = NULL; AjPFloat basefreq; AjPFloat arrayval; /*boolean ttr;*/ ctgry = false; categs = 1; cvi = 1.0; rcategs = 1; rate[0] = 1.0; freqsfrom = true; gama = false; invar = false; invarfrac = 0.0; jukes = false; justwts = false; kimura = false; logdet = false; f84 = false; lower = false; human = false; similarity = false; ttratio = 2.0; /*ttr = false;*/ weights = false; printdata = false; progress = true; mulsets = false; datasets = 1; matrix_flags = MAT_MACHINE; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; datasets = numseqs; } else if (numwts > 1) { mulsets = true; datasets = numwts; justwts = true; } method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "f")) { f84 = true; ttratio = ajAcdGetFloat("ttratio"); freqsfrom = ajAcdGetToggle("freqsfrom"); } else if(ajStrMatchC(method, "k")) { kimura = true; ttratio = ajAcdGetFloat("ttratio"); } else if(ajStrMatchC(method, "j")) jukes = true; else if(ajStrMatchC(method, "l")) logdet = true; else if(ajStrMatchC(method, "s")) similarity = true; if( (f84) || (kimura) || (jukes) ) { gammamethod = ajAcdGetListSingle("gammatype"); if(ajStrMatchC(gammamethod, "g")) { gama = true; cvi = ajAcdGetFloat("gammacoefficient"); cvi = 1.0 / (cvi * cvi); } else if(ajStrMatchC(gammamethod, "i")) { invar = true; cvi = ajAcdGetFloat("gammacoefficient"); cvi = 1.0 / (cvi * cvi); invarfrac = ajAcdGetFloat("invarfrac"); } else if(ajStrMatchC(gammamethod, "n")) { categs = ajAcdGetInt("ncategories"); } } if (categs > 1) { ctgry = true; arrayval = ajAcdGetArray("rate"); emboss_initcategs(arrayval, categs, rate); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); lower = ajAcdGetBoolean("lower"); if(lower) matrix_flags = MAT_LOWER; human = ajAcdGetBoolean("humanreadable"); if(human) matrix_flags |= MAT_HUMAN; if(!freqsfrom) { basefreq = ajAcdGetArray("basefreq"); freqa = ajFloatGet(basefreq, 0); freqc = ajFloatGet(basefreq, 1); freqg = ajFloatGet(basefreq, 2); freqt = ajFloatGet(basefreq, 3); } embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); /* fprintf(outfile, "\nNucleic acid sequence Distance Matrix program,");*/ /* fprintf(outfile, " version %s\n\n",VERSION);*/ } /* emboss_getoptions */ void allocrest(void) { long i; y = (Char **)Malloc(spp*sizeof(Char *)); nodep = (node **)Malloc(spp*sizeof(node *)); for (i = 0; i < spp; i++) { y[i] = (Char *)Malloc(sites*sizeof(Char)); nodep[i] = (node *)Malloc(sizeof(node)); } d = (double **)Malloc(spp*sizeof(double *)); for (i = 0; i < spp; i++) d[i] = (double*)Malloc(spp*sizeof(double)); nayme = (naym *)Malloc(spp*sizeof(naym)); category = (steptr)Malloc(sites*sizeof(long)); oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); alias = (steptr)Malloc(sites*sizeof(long)); ally = (steptr)Malloc(sites*sizeof(long)); location = (steptr)Malloc(sites*sizeof(long)); weightrat = (double *)Malloc(sites*sizeof(double)); } /* allocrest */ void reallocsites(void) {/* The amount of sites can change between runs this function reallocates all the variables whose size depends on the amount of sites */ long i; for (i = 0; i < spp; i++) { free(y[i]); y[i] = (Char *)Malloc(sites*sizeof(Char)); } free(category); free(oldweight); free(weight); free(alias); free(ally); free(location); free(weightrat); category = (steptr)Malloc(sites*sizeof(long)); oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); alias = (steptr)Malloc(sites*sizeof(long)); ally = (steptr)Malloc(sites*sizeof(long)); location = (steptr)Malloc(sites*sizeof(long)); weightrat = (double *)Malloc(sites*sizeof(double)); } /* reallocsites */ void doinit(void) { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &sites, &nonodes, 1); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); allocrest(); } /* doinit */ void printcategories(void) { /* print out list of categories of sites */ long i, j; fprintf(outfile, "Rate categories\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 1; i <= sites; i++) { fprintf(outfile, "%ld", category[i - 1]); if (i % 60 == 0) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } else if (i % 10 == 0) putc(' ', outfile); } fprintf(outfile, "\n\n"); } /* printcategories */ void inputoptions(void) { /* read options information */ long i; if (!firstset && !justwts) { samenumspseq(seqsets[ith-1], &sites, ith); reallocsites(); } for (i = 0; i < sites; i++) { category[i] = 1; oldweight[i] = 1; } if (justwts || weights) inputweightsstr(phyloweights->Str[ith-1], sites, oldweight, &weights); if (printdata) putc('\n', outfile); if (jukes && printdata) fprintf(outfile, " Jukes-Cantor Distance\n"); if (kimura && printdata) fprintf(outfile, " Kimura 2-parameter Distance\n"); if (f84 && printdata) fprintf(outfile, " F84 Distance\n"); if (similarity) fprintf(outfile, " \n Table of similarity between sequences\n"); if (firstset && printdata && (kimura || f84)) fprintf(outfile, "\nTransition/transversion ratio = %10.6f\n", ttratio); if (ctgry && categs > 1) { inputcategsstr(phyloratecat->Str[0], 0, sites, category, categs, "DnaDist"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } else if (printdata && (categs > 1)) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (i = 1; i <= categs; i++) fprintf(outfile, "%12ld%13.3f\n", i, rate[i - 1]); putc('\n', outfile); printcategories(); } if (jukes) ttratio = 0.5000001; if (weights && printdata) printweights(outfile, 0, sites, oldweight, "Sites"); } /* inputoptions */ void dnadist_sitesort(void) { /* Shell sort of sites lexicographically */ long gap, i, j, jj, jg, k, itemp; boolean flip, tied; gap = sites / 2; while (gap > 0) { for (i = gap + 1; i <= sites; i++) { j = i - gap; flip = true; while (j > 0 && flip) { jj = alias[j - 1]; jg = alias[j + gap - 1]; tied = (oldweight[jj - 1] == oldweight[jg - 1]); flip = (oldweight[jj - 1] < oldweight[jg - 1] || (tied && category[jj - 1] > category[jg - 1])); tied = (tied && category[jj - 1] == category[jg - 1]); k = 1; while (k <= spp && tied) { flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); k++; } if (!flip) break; itemp = alias[j - 1]; alias[j - 1] = alias[j + gap - 1]; alias[j + gap - 1] = itemp; j -= gap; } } gap /= 2; } } /* dnadist_sitesort */ void dnadist_sitecombine(void) { /* combine sites that have identical patterns */ long i, j, k; boolean tied; i = 1; while (i < sites) { j = i + 1; tied = true; while (j <= sites && tied) { tied = (oldweight[alias[i - 1] - 1] == oldweight[alias[j - 1] - 1] && category[alias[i - 1] - 1] == category[alias[j - 1] - 1]); k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); k++; } if (!tied) break; ally[alias[j - 1] - 1] = alias[i - 1]; j++; } i = j; } } /* dnadist_sitecombine */ void dnadist_sitescrunch(void) { /* move so one representative of each pattern of sites comes first */ long i, j, itemp; boolean done, found, completed; done = false; i = 1; j = 2; while (!done) { if (ally[alias[i - 1] - 1] != alias[i - 1]) { if (j <= i) j = i + 1; if (j <= sites) { do { found = (ally[alias[j - 1] - 1] == alias[j - 1]); j++; completed = (j > sites); if (j <= sites) completed = (oldweight[alias[j - 1] - 1] == 0); } while (!(found || completed)); if (found) { j--; itemp = alias[i - 1]; alias[i - 1] = alias[j - 1]; alias[j - 1] = itemp; } else done = true; } else done = true; } i++; done = (done || i >= sites); } } /* dnadist_sitescrunch */ void makeweights(void) { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = i; weight[i - 1] = 0; } dnadist_sitesort(); dnadist_sitecombine(); dnadist_sitescrunch(); endsite = 0; for (i = 1; i <= sites; i++) { if (ally[i - 1] == i && oldweight[i - 1] > 0) endsite++; } for (i = 1; i <= endsite; i++) location[alias[i - 1] - 1] = i; weightsum = 0; for (i = 0; i < sites; i++) weightsum += oldweight[i]; sumrates = 0.0; for (i = 0; i < sites; i++) sumrates += oldweight[i] * rate[category[i] - 1]; for (i = 0; i < categs; i++) rate[i] *= weightsum / sumrates; for (i = 0; i < sites; i++) weight[location[ally[i] - 1] - 1] += oldweight[i]; } /* makeweights */ void dnadist_makevalues(void) { /* set up fractional likelihoods at tips */ long i, j, k; bases b; for (i = 0; i < spp; i++) { nodep[i]->x = (phenotype)Malloc(endsite*sizeof(ratelike)); for (j = 0; j < endsite; j++) nodep[i]->x[j] = (ratelike)Malloc(rcategs*sizeof(sitelike)); } for (k = 0; k < endsite; k++) { j = alias[k]; for (i = 0; i < spp; i++) { for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) nodep[i]->x[k][0][(long)b - (long)A] = 0.0; switch (y[i][j - 1]) { case 'A': nodep[i]->x[k][0][0] = 1.0; break; case 'C': nodep[i]->x[k][0][(long)C - (long)A] = 1.0; break; case 'G': nodep[i]->x[k][0][(long)G - (long)A] = 1.0; break; case 'T': nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'U': nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'M': nodep[i]->x[k][0][0] = 1.0; nodep[i]->x[k][0][(long)C - (long)A] = 1.0; break; case 'R': nodep[i]->x[k][0][0] = 1.0; nodep[i]->x[k][0][(long)G - (long)A] = 1.0; break; case 'W': nodep[i]->x[k][0][0] = 1.0; nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'S': nodep[i]->x[k][0][(long)C - (long)A] = 1.0; nodep[i]->x[k][0][(long)G - (long)A] = 1.0; break; case 'Y': nodep[i]->x[k][0][(long)C - (long)A] = 1.0; nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'K': nodep[i]->x[k][0][(long)G - (long)A] = 1.0; nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'B': nodep[i]->x[k][0][(long)C - (long)A] = 1.0; nodep[i]->x[k][0][(long)G - (long)A] = 1.0; nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'D': nodep[i]->x[k][0][0] = 1.0; nodep[i]->x[k][0][(long)G - (long)A] = 1.0; nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'H': nodep[i]->x[k][0][0] = 1.0; nodep[i]->x[k][0][(long)C - (long)A] = 1.0; nodep[i]->x[k][0][(long)T - (long)A] = 1.0; break; case 'V': nodep[i]->x[k][0][0] = 1.0; nodep[i]->x[k][0][(long)C - (long)A] = 1.0; nodep[i]->x[k][0][(long)G - (long)A] = 1.0; break; case 'N': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) nodep[i]->x[k][0][(long)b - (long)A] = 1.0; break; case 'X': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) nodep[i]->x[k][0][(long)b - (long)A] = 1.0; break; case '?': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) nodep[i]->x[k][0][(long)b - (long)A] = 1.0; break; case 'O': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) nodep[i]->x[k][0][(long)b - (long)A] = 1.0; break; case '-': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) nodep[i]->x[k][0][(long)b - (long)A] = 1.0; break; } } } } /* dnadist_makevalues */ void dnadist_empiricalfreqs(void) { /* Get empirical base frequencies from the data */ long i, j, k; double sum, suma, sumc, sumg, sumt, w; freqa = 0.25; freqc = 0.25; freqg = 0.25; freqt = 0.25; for (k = 1; k <= 8; k++) { suma = 0.0; sumc = 0.0; sumg = 0.0; sumt = 0.0; for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) { w = weight[j]; sum = freqa * nodep[i]->x[j][0][0]; sum += freqc * nodep[i]->x[j][0][(long)C - (long)A]; sum += freqg * nodep[i]->x[j][0][(long)G - (long)A]; sum += freqt * nodep[i]->x[j][0][(long)T - (long)A]; suma += w * freqa * nodep[i]->x[j][0][0] / sum; sumc += w * freqc * nodep[i]->x[j][0][(long)C - (long)A] / sum; sumg += w * freqg * nodep[i]->x[j][0][(long)G - (long)A] / sum; sumt += w * freqt * nodep[i]->x[j][0][(long)T - (long)A] / sum; } } sum = suma + sumc + sumg + sumt; freqa = suma / sum; freqc = sumc / sum; freqg = sumg / sum; freqt = sumt / sum; } } /* dnadist_empiricalfreqs */ void getinput(void) { /* reads the input data */ inputoptions(); if ((!freqsfrom) && !logdet && !similarity) { if (kimura || jukes) { freqa = 0.25; freqc = 0.25; freqg = 0.25; freqt = 0.25; } getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, printdata); if (freqa < 0.00000001) { freqa = 0.000001; freqc = 0.999999*freqc; freqg = 0.999999*freqg; freqt = 0.999999*freqt; } if (freqc < 0.00000001) { freqa = 0.999999*freqa; freqc = 0.000001; freqg = 0.999999*freqg; freqt = 0.999999*freqt; } if (freqg < 0.00000001) { freqa = 0.999999*freqa; freqc = 0.999999*freqc; freqg = 0.000001; freqt = 0.999999*freqt; } if (freqt < 0.00000001) { freqa = 0.999999*freqa; freqc = 0.999999*freqc; freqg = 0.999999*freqg; freqt = 0.000001; } } if (!justwts || firstset) seq_inputdata(seqsets[ith-1],sites); makeweights(); dnadist_makevalues(); if (freqsfrom) { dnadist_empiricalfreqs(); getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, printdata); } } /* getinput */ void inittable(void) { /* Define a lookup table. Precompute values and store in a table */ long i; for (i = 0; i < categs; i++) { tbl[i].rat = rate[i]; tbl[i].ratxv = rate[i] * xv; } } /* inittable */ double lndet(double (*a)[4]) { long i, j, k; double temp, ld; /*Gauss-Jordan reduction -- invert matrix a in place, overwriting previous contents of a. On exit, matrix a contains the inverse, lndet contains the log of the determinant */ ld = 1.0; for (i = 0; i < 4; i++) { ld *= a[i][i]; temp = 1.0 / a[i][i]; a[i][i] = 1.0; for (j = 0; j < 4; j++) a[i][j] *= temp; for (j = 0; j < 4; j++) { if (j != i) { temp = a[j][i]; a[j][i] = 0.0; for (k = 0; k < 4; k++) a[j][k] -= temp * a[i][k]; } } } if (ld <= 0.0) return(99.0); else return(log(ld)); } /* lndet */ void makev(long m, long n, double *v) { /* compute one distance */ long i, j, k, l, it, num1, num2, idx; long numerator = 0, denominator = 0; double sum, sum1, sum2, sumyr, lz, aa, bb, cc, vv=0, p1, p2, p3, q1, q2, q3, tt, delta=0.0, slope, xx1freqa, xx1freqc, xx1freqg, xx1freqt; double *prod, *prod2, *prod3; boolean quick, jukesquick, kimquick, logdetquick, overlap; bases b; node *p, *q; sitelike xx1, xx2; double basetable[4][4]; /* for quick logdet */ double basefreq1[4], basefreq2[4]; p = nodep[m - 1]; q = nodep[n - 1]; /* check for overlap between sequences */ overlap = false; for(i=0 ; i < sites ; i++){ if((strchr("NX?O-",y[m-1][i])==NULL) && (strchr("NX?O-",y[n-1][i])==NULL)){ overlap = true; break; } } if(!overlap){ printf("\nWARNING: NO OVERLAP BETWEEN SEQUENCES %ld AND %ld; -1.0 WAS WRITTEN\n", m, n); baddists = true; return; } quick = (!ctgry || categs == 1); if (jukes || kimura || logdet || similarity) { numerator = 0; denominator = 0; for (i = 0; i < endsite; i++) { memcpy(xx1, p->x[i][0], sizeof(sitelike)); memcpy(xx2, q->x[i][0], sizeof(sitelike)); sum = 0.0; sum1 = 0.0; sum2 = 0.0; for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) { sum1 += xx1[(long)b - (long)A]; sum2 += xx2[(long)b - (long)A]; sum += xx1[(long)b - (long)A] * xx2[(long)b - (long)A]; } quick = (quick && (sum1 == 1.0 || sum1 == 4.0) && (sum2 == 1.0 || sum2 == 4.0)); if (sum1 == 1.0 && sum2 == 1.0) { numerator += (long)(weight[i] * sum); denominator += weight[i]; } } } jukesquick = ((jukes || similarity) && quick); kimquick = (kimura && quick); logdetquick = (logdet && quick); if (logdet && !quick) { printf(" WARNING: CANNOT CALCULATE LOGDET DISTANCE\n"); printf(" WITH PRESENT PROGRAM IF PARTIALLY AMBIGUOUS NUCLEOTIDES\n"); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } if (jukesquick && jukes && (numerator * 4 <= denominator)) { printf("\nWARNING: INFINITE DISTANCE BETWEEN "); printf(" SPECIES %3ld AND %3ld\n", m, n); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } if (jukesquick && invar && (4 * (((double)numerator / denominator) - invarfrac) <= (1.0 - invarfrac))) { printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n); printf(" TOO LARGE FOR INVARIABLE SITES\n"); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } if (jukesquick) { if (!gama && !invar) vv = -0.75 * log((4.0*((double)numerator / denominator) - 1.0) / 3.0); else if (!invar) vv = 0.75 * cvi * (exp(-(1/cvi)* log((4.0 * ((double)numerator / denominator) - 1.0) / 3.0)) - 1.0); else vv = 0.75 * cvi * (exp(-(1/cvi)* log((4.0 * ((double)numerator / denominator - invarfrac)/ (1.0-invarfrac) - 1.0) / 3.0)) - 1.0); } if (kimquick) { num1 = 0; num2 = 0; denominator = 0; for (i = 0; i < endsite; i++) { memcpy(xx1, p->x[i][0], sizeof(sitelike)); memcpy(xx2, q->x[i][0], sizeof(sitelike)); sum = 0.0; sum1 = 0.0; sum2 = 0.0; for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) { sum1 += xx1[(long)b - (long)A]; sum2 += xx2[(long)b - (long)A]; sum += xx1[(long)b - (long)A] * xx2[(long)b - (long)A]; } sumyr = (xx1[0] + xx1[(long)G - (long)A]) * (xx2[0] + xx2[(long)G - (long)A]) + (xx1[(long)C - (long)A] + xx1[(long)T - (long)A]) * (xx2[(long)C - (long)A] + xx2[(long)T - (long)A]); if (sum1 == 1.0 && sum2 == 1.0) { num1 += (long)(weight[i] * sum); num2 += (long)(weight[i] * (sumyr - sum)); denominator += weight[i]; } } tt = ((1.0 - (double)num1 / denominator)-invarfrac)/(1.0-invarfrac); if (tt > 0.0) { delta = 0.1; tt = delta; it = 0; while (fabs(delta) > 0.0000002 && it < iterationsd) { it++; if (!gama) { p1 = exp(-tt); p2 = exp(-xv * tt) - exp(-tt); p3 = 1.0 - exp(-xv * tt); } else { p1 = exp(-cvi * log(1 + tt / cvi)); p2 = exp(-cvi * log(1 + xv * tt / cvi)) - exp(-cvi * log(1 + tt / cvi)); p3 = 1.0 - exp(-cvi * log(1 + xv * tt / cvi)); } q1 = p1 + p2 / 2.0 + p3 / 4.0; q2 = p2 / 2.0 + p3 / 4.0; q3 = p3 / 2.0; q1 = q1 * (1.0-invarfrac) + invarfrac; q2 *= (1.0 - invarfrac); q3 *= (1.0 - invarfrac); if (!gama && !invar) slope = 0.5 * exp(-tt) * (num2 / q2 - num1 / q1) + 0.25 * xv * exp(-xv * tt) * ((denominator - num1 - num2) * 2 / q3 - num2 / q2 - num1 / q1); else slope = 0.5 * (1 / (1 + tt / cvi)) * exp(-cvi * log(1 + tt / cvi)) * (num2 / q2 - num1 / q1) + 0.25 * (xv / (1 + xv * tt / cvi)) * exp(-cvi * log(1 + xv * tt / cvi)) * ((denominator - num1 - num2) * 2 / q3 - num2 / q2 - num1 / q1); slope *= (1.0-invarfrac); if (slope < 0.0) delta = fabs(delta) / -2.0; else delta = fabs(delta); tt += delta; } } if ((delta >= 0.1) && (!similarity)) { printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n); if (invar) printf(" TOO LARGE FOR INVARIABLE SITES\n"); else printf(" TOO LARGE TO ESTIMATE DISTANCE\n"); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } vv = fracchange * tt; } if (!(jukesquick || kimquick || logdet)) { prod = (double *)Malloc(sites*sizeof(double)); prod2 = (double *)Malloc(sites*sizeof(double)); prod3 = (double *)Malloc(sites*sizeof(double)); for (i = 0; i < endsite; i++) { memcpy(xx1, p->x[i][0], sizeof(sitelike)); memcpy(xx2, q->x[i][0], sizeof(sitelike)); xx1freqa = xx1[0] * freqa; xx1freqc = xx1[(long)C - (long)A] * freqc; xx1freqg = xx1[(long)G - (long)A] * freqg; xx1freqt = xx1[(long)T - (long)A] * freqt; sum1 = xx1freqa + xx1freqc + xx1freqg + xx1freqt; sum2 = freqa * xx2[0] + freqc * xx2[(long)C - (long)A] + freqg * xx2[(long)G - (long)A] + freqt * xx2[(long)T - (long)A]; prod[i] = sum1 * sum2; prod2[i] = (xx1freqa + xx1freqg) * (xx2[0] * freqar + xx2[(long)G - (long)A] * freqgr) + (xx1freqc + xx1freqt) * (xx2[(long)C - (long)A] * freqcy + xx2[(long)T - (long)A] * freqty); prod3[i] = xx1freqa * xx2[0] + xx1freqc * xx2[(long)C - (long)A] + xx1freqg * xx2[(long)G - (long)A] + xx1freqt * xx2[(long)T - (long)A]; } tt = 0.1; delta = 0.1; it = 1; while (it < iterationsd && fabs(delta) > 0.0000002) { slope = 0.0; if (tt > 0.0) { lz = -tt; for (i = 0; i < categs; i++) { if (!gama) { tbl[i].z1 = exp(tbl[i].ratxv * lz); tbl[i].z1zz = exp(tbl[i].rat * lz); } else { tbl[i].z1 = exp(-cvi*log(1.0-tbl[i].ratxv * lz/cvi)); tbl[i].z1zz = exp(-cvi*log(1.0-tbl[i].rat * lz/cvi)); } tbl[i].y1 = 1.0 - tbl[i].z1; tbl[i].z1yy = tbl[i].z1 - tbl[i].z1zz; tbl[i].z1xv = tbl[i].z1 * xv; } for (i = 0; i < endsite; i++) { idx = category[alias[i] - 1]; cc = prod[i]; bb = prod2[i]; aa = prod3[i]; if (!gama && !invar) slope += weightrat[i] * (tbl[idx - 1].z1zz * (bb - aa) + tbl[idx - 1].z1xv * (cc - bb)) / (aa * tbl[idx - 1].z1zz + bb * tbl[idx - 1].z1yy + cc * tbl[idx - 1].y1); else slope += (1.0-invarfrac) * weightrat[i] * ( ((tbl[idx-1].rat)/(1.0-tbl[idx-1].rat * lz/cvi)) * tbl[idx - 1].z1zz * (bb - aa) + ((tbl[idx-1].ratxv)/(1.0-tbl[idx-1].ratxv * lz/cvi)) * tbl[idx - 1].z1 * (cc - bb)) / (aa * ((1.0-invarfrac)*tbl[idx - 1].z1zz + invarfrac) + bb * (1.0-invarfrac)*tbl[idx - 1].z1yy + cc * (1.0-invarfrac)*tbl[idx - 1].y1); } } if (slope < 0.0) delta = fabs(delta) / -2.0; else delta = fabs(delta); tt += delta; it++; } if ((delta >= 0.1) && (!similarity)) { printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n); if (invar) printf(" TOO LARGE FOR INVARIABLE SITES\n"); else printf(" TOO LARGE TO ESTIMATE DISTANCE\n"); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } vv = tt * fracchange; free(prod); free(prod2); free(prod3); } if (logdetquick) { /* compute logdet when no ambiguous nucleotides */ for (i = 0; i < 4; i++) { basefreq1[i] = 0.0; basefreq2[i] = 0.0; for (j = 0; j < 4; j++) basetable[i][j] = 0.0; } for (i = 0; i < endsite; i++) { k = 0; while (p->x[i][0][k] == 0.0) k++; basefreq1[k] += weight[i]; l = 0; while (q->x[i][0][l] == 0.0) l++; basefreq2[l] += weight[i]; basetable[k][l] += weight[i]; } vv = lndet(basetable); if (vv == 99.0) { printf("\nNegative or zero determinant for distance between species"); printf(" %ld and %ld\n", m, n); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } vv = -0.25*(vv - 0.5*(log(basefreq1[0])+log(basefreq1[1]) +log(basefreq1[2])+log(basefreq1[3]) +log(basefreq2[0])+log(basefreq2[1]) +log(basefreq2[2])+log(basefreq2[3]))); } if (similarity) { if (denominator < 1.0) { printf("\nWARNING: SPECIES %3ld AND %3ld HAVE NO BASES THAT", m, n); printf(" CAN BE COMPARED\n"); printf(" -1.0 WAS WRITTEN\n"); baddists = true; } vv = (double)numerator / denominator; } *v = vv; } /* makev */ void makedists(void) { /* compute distance matrix */ long i, j; double v; inittable(); for (i = 0; i < endsite; i++) weightrat[i] = weight[i] * rate[category[alias[i] - 1] - 1]; if (progress) { printf("Distances calculated for species\n"); #ifdef WIN32 phyFillScreenColor(); #endif } for (i = 0; i < spp; i++) if (similarity) d[i][i] = 1.0; else d[i][i] = 0.0; baddists = false; for (i = 1; i < spp; i++) { if (progress) { printf(" "); for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); printf(" "); } for (j = i + 1; j <= spp; j++) { makev(i, j, &v); d[i - 1][j - 1] = v; d[j - 1][i - 1] = v; if (progress) { putchar('.'); fflush(stdout); } } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } } if (progress) { printf(" "); for (j = 0; j < nmlngth; j++) putchar(nayme[spp - 1][j]); putchar('\n'); } for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(nodep[i]->x[j]); free(nodep[i]->x); } } /* makedists */ void writedists(void) { /* write out distances */ char **names; names = stringnames_new(); output_matrix_d(outfile, d, spp, spp, names, names, matrix_flags); stringnames_delete(names); if (progress) printf("\nDistances written to file \"%s\"\n\n", outfilename); } /* writedists */ int main(int argc, Char *argv[]) { /* DNA Distances by Maximum Likelihood */ #ifdef MAC argc = 1; /* macsetup("Dnadist",""); */ argv[0] = "Dnadist"; #endif init(argc, argv); emboss_getoptions("fdnadist", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); ttratio0 = ttratio; for (ith = 1; ith <= datasets; ith++) { ttratio = ttratio0; getinput(); if (ith == 1) firstset = false; if (datasets > 1 && progress) printf("Data set # %ld:\n\n",ith); makedists(); writedists(); } FClose(infile); FClose(outfile); #ifdef MAC fixmacfile(outfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* DNA Distances by Maximum Likelihood */ PHYLIPNEW-3.69.650/src/dnamove.c0000664000175000017500000015745711616234204012674 00000000000000 #include "phylip.h" #include "moves.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2002 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxsz 999 /* size of pointer array for the undo trees */ /* this can be large without eating memory */ typedef struct treeset_t { node *root; pointarray nodep; pointarray treenode; long nonodes; boolean waswritten, hasmult, haslengths, nolengths, initialized; } treeset_t; treeset_t treesets[2]; node **treeone, **treetwo; typedef enum { horiz, vert, up, overt, upcorner, midcorner, downcorner, aa, cc, gg, tt, question } chartype; typedef enum { rearr, flipp, reroott, none } rearrtype; typedef struct gbase2 { baseptr2 base2; struct gbase2 *next; } gbase2; typedef enum { arb, use, spec } howtree; typedef enum {beforenode, atnode} movet; movet fromtype; typedef node **pointptr; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ void dnamove_gnu(gbases **); void dnamove_chuck(gbases *); void emboss_getoptions(char *pgm, int argc, char *argv[]); void inputoptions(void); void allocrest(void); void doinput(void); void configure(void); void prefix(chartype); void postfix(chartype); void makechar(chartype); void dnamove_add(node *, node *, node *); void dnamove_re_move(node **, node **); void evaluate(node *); void dnamove_reroot(node *); void firstrav(node *, long); void dnamove_hyptrav(node *, long *, long, boolean *); void grwrite(chartype, long, long *); void dnamove_drawline(long); void dnamove_printree(void); void arbitree(void); void yourtree(void); void initdnamovenode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void buildtree(void); void setorder(void); void mincomp(void); void rearrange(void); void dnamove_nextinc(void); void dnamove_nextchar(void); void dnamove_prevchar(void); void dnamove_show(void); void tryadd(node *, node **, node **, double *); void addpreorder(node *, node *, node *, double *); void try(void); void undo(void); void treewrite(boolean); void clade(void); void flip(long); void changeoutgroup(void); void redisplay(void); void treeconstruct(void); void maketriad(node **, long); void newdnamove_hyptrav(node *, long *, long, long, boolean, pointarray); void prepare_node(node *p); void dnamove_copynode(node *fromnode, node *tonode); node *copytrav(node *p); void chucktree(node *p); void numdesctrav(node *p); void copytree(void); void makeweights(void); void add_at(node *below, node *newtip, node *newfork); void add_before(node *atnode, node *newtip); void add_child(node *parent, node *newchild); void newdnamove_hyptrav(node *r_, long *hypset_, long b1, long b2, boolean bottom_, pointarray treenode); void newdnamove_hypstates(long chars, node *root, pointarray treenode); void consolidatetree(long index); void fliptrav(node *p, boolean recurse); /* function prototypes */ #endif char infilename[FNMLNGTH],intreename[FNMLNGTH], weightfilename[FNMLNGTH]; node *root; const char* outtreename; AjPFile embossouttree; long chars, screenlines, col, treelines, leftedge, topedge, vmargin, hscroll, vscroll, scrollinc, screenwidth, farthest, whichtree, othertree; boolean weights, thresh, waswritten; boolean usertree, goteof, firsttree, haslengths; /*treeread variables*/ pointarray nodep; /*treeread variables*/ node *grbg = NULL; /*treeread variables*/ long *zeros; /*treeread variables*/ pointptr treenode; /* pointers to all nodes in tree */ double threshold; double *threshwt; boolean reversed[(long)question - (long)horiz + 1]; boolean graphic[(long)question - (long)horiz + 1]; unsigned char chh[(long)question - (long)horiz + 1]; howtree how; gbases *garbage; char *progname; /* Local variables for treeconstruct, propogated global for C version: */ long dispchar, atwhat, what, fromwhere, towhere, oldoutgrno, compatible; double like, bestyet, gotlike; boolean display, newtree, changed, subtree, written, oldwritten, restoring, wasleft, oldleft, earlytree; steptr necsteps; boolean *in_tree; long sett[31]; steptr numsteps; node *nuroot; rearrtype lastop; Char ch; boolean *names; void maketriad(node **p, long index) { /* Initiate an internal node with stubs for two children */ long i, j; node *q; q = NULL; for (i = 1; i <= 3; i++) { gnu(&grbg, p); (*p)->index = index; (*p)->hasname = false; (*p)->haslength = false; (*p)->deleted=false; (*p)->deadend=false; (*p)->onebranch=false; (*p)->onebranchhaslength=false; if(!(*p)->base) (*p)->base = (baseptr)Malloc(chars*sizeof(long)); if(!(*p)->numnuc) (*p)->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); if(!(*p)->numsteps) (*p)->numsteps = (steptr)Malloc(endsite*sizeof(long)); for (j=0;jnayme[j] = '\0'; (*p)->next = q; q = *p; } (*p)->next->next->next = *p; q = (*p)->next; while (*p != q) { (*p)->back = NULL; (*p)->tip = false; *p = (*p)->next; } treenode[index - 1] = *p; } /* maketriad */ void prepare_node(node *p) { /* This function allocates the base, numnuc and numsteps arrays for a node. Because a node can change roles between tip, internal and ring member, all nodes need to have these in case they are used. */ p->base = (baseptr)Malloc(chars*sizeof(long)); p->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); p->numsteps = (steptr)Malloc(endsite*sizeof(long)); } /* prepare_tip */ void dnamove_gnu(gbases **p) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (garbage != NULL) { *p = garbage; garbage = garbage->next; } else { *p = (gbases *)Malloc(sizeof(gbases)); (*p)->base = (baseptr2)Malloc(chars*sizeof(long)); } (*p)->next = NULL; } /* dnamove_gnu */ void dnamove_chuck(gbases *p) { /* collect garbage on p -- put it on front of garbage list */ p->next = garbage; garbage = p; } /* dnamove_chuck */ void dnamove_copynode(node *fromnode, node *tonode) { /* Copy the contents of a node from fromnode to tonode. */ int i = 0; /* printf("copynode: fromnode = %d, tonode = %d\n", fromnode->index,tonode->index); printf("copynode: fromnode->base = %ld, tonode->base = %ld\n", fromnode->base,tonode->base); */ memcpy(tonode->base, fromnode->base, chars*sizeof(long)); /* printf("copynode: fromnode->numnuc = %ld, tonode->numnuc = %ld\n", fromnode->numnuc,tonode->numnuc); */ if (fromnode->numnuc != NULL) memcpy(tonode->numnuc, fromnode->numnuc, endsite*sizeof(nucarray)); if (fromnode->numsteps != NULL) memcpy(tonode->numsteps, fromnode->numsteps, endsite*sizeof(long)); tonode->numdesc = fromnode->numdesc; tonode->state = fromnode->state; tonode->index = fromnode->index; tonode->tip = fromnode->tip; for (i=0;inayme[i] = fromnode->nayme[i]; } /* dnamove_copynode */ node *copytrav(node *p) { /* Traverse the tree from p on down, copying nodes to the other tree */ node *q, *newnode, *newnextnode, *temp; gnu(&grbg, &newnode); if(!newnode->base) newnode->base = (baseptr)Malloc(chars*sizeof(long)); if(!newnode->numnuc) newnode->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); if(!newnode->numsteps) newnode->numsteps = (steptr)Malloc(endsite*sizeof(long)); dnamove_copynode(p,newnode); if (treenode[p->index-1] == p) treesets[othertree].treenode[p->index-1] = newnode; /* if this is a tip, return now */ if (p->tip) return newnode; /* go around the ring, copying as we go */ q = p->next; gnu(&grbg, &newnextnode); if(!newnextnode->base) newnextnode->base = (baseptr)Malloc(chars*sizeof(long)); if(!newnextnode->numnuc) newnextnode->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); if(!newnextnode->numsteps) newnextnode->numsteps = (steptr)Malloc(endsite*sizeof(long)); dnamove_copynode(q, newnextnode); newnode->next = newnextnode; do { newnextnode->back = copytrav(q->back); newnextnode->back->back = newnextnode; q = q->next; if (q == p) newnextnode->next = newnode; else { temp = newnextnode; gnu(&grbg, &newnextnode); if(!newnextnode->base) newnextnode->base = (baseptr)Malloc(chars*sizeof(long)); if(!newnextnode->numnuc) newnextnode->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); if(!newnextnode->numsteps) newnextnode->numsteps = (steptr)Malloc(endsite*sizeof(long)); dnamove_copynode(q, newnextnode); temp->next = newnextnode; } } while (q != p); return newnode; } /* copytrav */ void numdesctrav(node *p) { node *q; long childcount = 0; if (p->tip) { p->numdesc = 0; return; } q = p->next; do { numdesctrav(q->back); childcount++; q = q->next; } while (q != p); p->numdesc = childcount; } /* numdesctrav */ void chucktree(node *p) { /* recursively run through a tree and chuck all of its nodes, putting them on the garbage list */ int i, numNodes = 1; node *q, *r; /* base case -- tip */ if(p->tip){ chuck(&grbg, p); return; } /* recursively callchuck tree on all decendants */ q = p->next; while(q != p){ chucktree(q->back); numNodes++; q = q->next; } /* now chuck all sub-nodes in the node ring */ for(i=0 ; i < numNodes ; i++){ r = q->next; chuck(&grbg, q); q = r; } } /* chucktree */ void copytree(void) { /* Make a complete copy of the current tree for undo purposes */ if (whichtree == 1) othertree = 0; else othertree = 1; if(treesets[othertree].root){ chucktree(treesets[othertree].root); } treesets[othertree].root = copytrav(root); treesets[othertree].nonodes = nonodes; treesets[othertree].waswritten = waswritten; treesets[othertree].initialized = true; } /* copytree */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr initialtree = NULL; how = arb; usertree = false; goteof = false; outgrno = 1; outgropt = false; thresh = false; weights = false; screenlines = 24; scrollinc = 20; screenwidth = 80; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; thresh = ajAcdGetToggle("dothreshold"); if(thresh) threshold = ajAcdGetFloat("threshold"); initialtree = ajAcdGetListSingle("initialtree"); if(ajStrMatchC(initialtree, "a")) how = arb; if(ajStrMatchC(initialtree, "u")) how = use; if(ajStrMatchC(initialtree, "s")) { how = spec; phylotrees = ajAcdGetTree("intreefile"); usertree = true; } screenwidth = ajAcdGetInt("screenwidth"); screenlines = ajAcdGetInt("screenlines"); if (scrollinc < screenwidth / 2.0) hscroll = scrollinc; else hscroll = screenwidth / 2; if (scrollinc < screenlines / 2.0) vscroll = scrollinc; else vscroll = screenlines / 2; embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } /* emboss_getoptions */ void inputoptions(void) { /* input the information on the options */ long i; for (i = 0; i < (chars); i++) weight[i] = 1; if (weights){ inputweightsstr(phyloweights->Str[0], chars, weight, &weights); printweights(stdout, 0, chars, weight, "Sites"); } if (!thresh) threshold = spp; for (i = 0; i < (chars); i++) threshwt[i] = threshold * weight[i]; } /* inputoptions */ void allocrest(void) { long i; nayme = (naym *)Malloc(spp*sizeof(naym)); in_tree = (boolean *)Malloc(nonodes*sizeof(boolean)); weight = (steptr)Malloc(chars*sizeof(long)); numsteps = (steptr)Malloc(chars*sizeof(long)); necsteps = (steptr)Malloc(chars*sizeof(long)); threshwt = (double *)Malloc(chars*sizeof(double)); alias = (long *)Malloc(chars*sizeof(long)); /* from dnapars */ ally = (long *)Malloc(chars*sizeof(long)); /* from dnapars */ y = (Char **)Malloc(spp*sizeof(Char *)); /* from dnapars */ for (i = 0; i < spp; i++) /* from dnapars */ y[i] = (Char *)Malloc(chars*sizeof(Char)); /* from dnapars */ location = (long *)Malloc(chars*sizeof(long)); /* from dnapars */ } /* allocrest */ void makeweights(void) { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= chars; i++) { alias[i - 1] = i; ally[i - 1] = i; } endsite = 0; for (i = 1; i <= chars; i++) { if (ally[i - 1] == i) endsite++; } for (i = 1; i <= endsite; i++) location[alias[i - 1] - 1] = i; if (!thresh) threshold = spp; zeros = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) zeros[i] = 0; } /* makeweights */ void doinput(void) { /* reads the input data */ inputnumbersseq(seqsets[0], &spp, &chars, &nonodes, 1); printf("%2ld species, %3ld sites\n", spp, chars); allocrest(); inputoptions(); alloctree(&treenode, nonodes, usertree); setuptree(treenode, nonodes, usertree); seq_inputdata(seqsets[0], chars); makeweights(); makevalues(treenode, zeros, usertree); } /* doinput */ void configure(void) { /* configure to machine -- set up special characters */ chartype a; for (a = horiz; (long)a <= (long)question; a = (chartype)((long)a + 1)) reversed[(long)a] = false; for (a = horiz; (long)a <= (long)question; a = (chartype)((long)a + 1)) graphic[(long)a] = false; if (ibmpc) { chh[(long)horiz] = 205; graphic[(long)horiz] = true; chh[(long)vert] = 186; graphic[(long)vert] = true; chh[(long)up] = 186; graphic[(long)up] = true; chh[(long)overt] = 205; graphic[(long)overt] = true; chh[(long)upcorner] = 200; graphic[(long)upcorner] = true; chh[(long)midcorner] = 204; graphic[(long)midcorner] = true; chh[(long)downcorner] = 201; graphic[(long)downcorner] = true; chh[(long)aa] = 176; chh[(long)cc] = 178; chh[(long)gg] = 177; chh[(long)tt] = 219; chh[(long)question] = '\001'; return; } if (ansi) { chh[(long)horiz] = ' '; reversed[(long)horiz] = true; chh[(long)vert] = chh[(long)horiz]; reversed[(long)vert] = true; chh[(long)up] = 'x'; graphic[(long)up] = true; chh[(long)overt] = 'q'; graphic[(long)overt] = true; chh[(long)upcorner] = 'm'; graphic[(long)upcorner] = true; chh[(long)midcorner] = 't'; graphic[(long)midcorner] = true; chh[(long)downcorner] = 'l'; graphic[(long)downcorner] = true; chh[(long)aa] = 'a'; reversed[(long)aa] = true; chh[(long)cc] = 'c'; reversed[(long)cc] = true; chh[(long)gg] = 'g'; reversed[(long)gg] = true; chh[(long)tt] = 't'; reversed[(long)tt] = true; chh[(long)question] = '?'; reversed[(long)question] = true; return; } chh[(long)horiz] = '='; chh[(long)vert] = ' '; chh[(long)up] = '!'; chh[(long)upcorner] = '`'; chh[(long)midcorner] = '+'; chh[(long)downcorner] = ','; chh[(long)overt] = '-'; chh[(long)aa] = 'a'; chh[(long)cc] = 'c'; chh[(long)gg] = 'g'; chh[(long)tt] = 't'; chh[(long)question] = '.'; } /* configure */ void prefix(chartype a) { /* give prefix appropriate for this character */ if (reversed[(long)a]) prereverse(ansi); if (graphic[(long)a]) pregraph2(ansi); } /* prefix */ void postfix(chartype a) { /* give postfix appropriate for this character */ if (reversed[(long)a]) postreverse(ansi); if (graphic[(long)a]) postgraph2(ansi); } /* postfix */ void makechar(chartype a) { /* print out a character with appropriate prefix or postfix */ prefix(a); putchar(chh[(long)a]); postfix(a); } /* makechar */ void add_at(node *below, node *newtip, node *newfork) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant */ node *leftdesc, *rtdesc; if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (newfork == NULL) { nonodes++; maketriad (&newfork, nonodes); } if (below->back != NULL) { below->back->back = newfork; } newfork->back = below->back; leftdesc = newtip; rtdesc = below; rtdesc->back = newfork->next->next; newfork->next->next->back = rtdesc; newfork->next->back = leftdesc; leftdesc->back = newfork->next; if (root == below) root = newfork; root->back = NULL; } /* add_at */ void add_before(node *atnode, node *newtip) { /* inserts the node newtip together with its ancestral fork into the tree next to the node atnode. */ node *q; if (atnode != treenode[atnode->index - 1]) atnode = treenode[atnode->index - 1]; q = treenode[newtip->index-1]->back; if (q != NULL) { q = treenode[q->index-1]; if (newtip == q->next->next->back) { q->next->back = newtip; newtip->back = q->next; q->next->next->back = NULL; } } if (newtip->back != NULL) { add_at(atnode, newtip, treenode[newtip->back->index-1]); } else { add_at(atnode, newtip, NULL); } } /* add_before */ void add_child(node *parent, node *newchild) { /* adds the node newchild into the tree as the last child of parent */ int i; node *newnode, *q; if (parent != treenode[parent->index - 1]) parent = treenode[parent->index - 1]; gnu(&grbg, &newnode); newnode->tip = false; newnode->deleted=false; newnode->deadend=false; newnode->onebranch=false; newnode->onebranchhaslength=false; for (i=0;inayme[i] = '\0'; newnode->index = parent->index; if(!newnode->base) newnode->base = (baseptr)Malloc(chars*sizeof(long)); if(!newnode->numnuc) newnode->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); if(!newnode->numsteps) newnode->numsteps = (steptr)Malloc(endsite*sizeof(long)); q = parent; do { q = q->next; } while (q->next != parent); newnode->next = parent; q->next = newnode; newnode->back = newchild; newchild->back = newnode; if (newchild->haslength) { newnode->length = newchild->length; newnode->haslength = true; } else newnode->haslength = false; } /* add_child */ void dnamove_add(node *below, node *newtip, node *newfork) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant */ boolean putleft; node *leftdesc, *rtdesc; if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; putleft = true; if (restoring) putleft = wasleft; if (putleft) { leftdesc = newtip; rtdesc = below; } else { leftdesc = below; rtdesc = newtip; } rtdesc->back = newfork->next->next; newfork->next->next->back = rtdesc; newfork->next->back = leftdesc; leftdesc->back = newfork->next; if (root == below) root = newfork; root->back = NULL; newfork->numdesc = 2; } /* dnamove_add */ void dnamove_re_move(node **item, node **fork) { /* Removes node item from the tree. If item has one sibling, removes its ancestor, fork, from the tree as well and attach item's sib to fork's ancestor. In this case, it returns a pointer to the removed fork node which is still attached to item. */ node *p=NULL, *q; int nodecount; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = treenode[(*item)->back->index - 1]; nodecount = 0; if ((*fork)->next->back == *item) p = *fork; q = (*fork)->next; do { nodecount++; if (q->next->back == *item) p = q; q = q->next; } while (*fork != q); if (nodecount > 2) { fromtype = atnode; p->next = (*item)->back->next; chuck(&grbg, (*item)->back); (*item)->back = NULL; *fork = NULL; } else { /* traditional (binary tree) remove code */ if (*item == (*fork)->next->back) { if (root == *fork) root = (*fork)->next->next->back; } else { if (root == *fork) root = (*fork)->next->back; } fromtype = beforenode; /* stitch nodes together, leaving out item */ p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; if (haslengths) { if (p != NULL && q != NULL) { p->length += q->length; q->length = p->length; } else (*item)->length = (*fork)->next->length + (*fork)->next->next->length; } (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; } /* endif nodecount > 2 else */ } /* dnamove_re_move */ void evaluate(node *r) { /* determines the number of steps needed for a tree. this is the minimum number of steps needed to evolve sequences on this tree */ long i, steps; double sum; compatible = 0; sum = 0.0; for (i = 0; i < (chars); i++) numsteps[i] = 0; /* set numdesc at each node to reflect current number of descendants */ numdesctrav(root); postorder(r); for (i = 0; i < endsite; i++) { steps = r->numsteps[i]; if (steps <= threshwt[i]) { sum += steps; } else { sum += threshwt[i]; } if (steps <= necsteps[i] && !earlytree) compatible += weight[i]; } like = -sum; } /* evaluate */ void dnamove_reroot(node *outgroup) { /* Reorient tree so that outgroup is by itself on the left of the root */ node *p, *q, *r; long nodecount = 0; double templen; if(outgroup->back->index == root->index) return; q = root->next; do { /* when this loop exits, p points to the internal */ p = q; /* node to the right of root */ nodecount++; q = p->next; } while (q != root); r = p; /* reorient nodep array The nodep array must point to the ring member of each ring that is closest to the root. The while loop changes the ring member pointed to by treenode[] for those nodes that will have their orientation changed by the reroot operation. */ p = outgroup->back; while (p->index != root->index) { q = treenode[p->index - 1]->back; treenode[p->index - 1] = p; p = q; } if (nodecount > 2) treenode[p->index - 1] = p; /* If nodecount > 2, the current node ring to which root is pointing will remain in place and root will point somewhere else. */ /* detach root from old location */ if (nodecount > 2) { r->next = root->next; root->next = NULL; nonodes++; maketriad(&root, nonodes); if (haslengths) { /* root->haslength remains false, or else treeout() will generate a bogus extra length */ root->next->haslength = true; root->next->next->haslength = true; } } else { /* if (nodecount > 2) else */ q = root->next; q->back->back = r->back; r->back->back = q->back; if (haslengths) { r->back->length = r->back->length + q->back->length; q->back->length = r->back->length; } } /* if (nodecount > 2) endif */ /* tie root into new location */ root->next->back = outgroup; root->next->next->back = outgroup->back; outgroup->back->back = root->next->next; outgroup->back = root->next; /* place root equidistant between left child (outgroup) and right child by deviding outgroup's length */ if (haslengths) { templen = outgroup->length / 2.0; outgroup->length = templen; outgroup->back->length = templen; root->next->next->length = templen; root->next->next->back->length = templen; } } /* dnamove_reroot */ void newdnamove_hyptrav(node *r_, long *hypset_, long b1, long b2, boolean bottom_, pointarray treenode) { /* compute, print out states at one interior node */ struct LOC_hyptrav Vars; long i, j, k; long largest; gbases *ancset; nucarray *tempnuc; node *p, *q; Vars.bottom = bottom_; Vars.r = r_; Vars.hypset = hypset_; dnamove_gnu(&ancset); tempnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); Vars.maybe = false; Vars.nonzero = false; if (!Vars.r->tip) zeronumnuc(Vars.r, endsite); for (i = b1 - 1; i < b2; i++) { j = location[ally[i] - 1]; Vars.anc = Vars.hypset[j - 1]; if (!Vars.r->tip) { p = Vars.r->next; for (k = (long)A; k <= (long)O; k++) if (Vars.anc & (1 << k)) Vars.r->numnuc[j - 1][k]++; do { for (k = (long)A; k <= (long)O; k++) if (p->back->base[j - 1] & (1 << k)) Vars.r->numnuc[j - 1][k]++; p = p->next; } while (p != Vars.r); largest = getlargest(Vars.r->numnuc[j - 1]); Vars.tempset = 0; for (k = (long)A; k <= (long)O; k++) { if (Vars.r->numnuc[j - 1][k] == largest) Vars.tempset |= (1 << k); } Vars.r->base[j - 1] = Vars.tempset; } if (!Vars.bottom) Vars.anc = treenode[Vars.r->back->index - 1]->base[j - 1]; Vars.nonzero = (Vars.nonzero || (Vars.r->base[j - 1] & Vars.anc) == 0); Vars.maybe = (Vars.maybe || Vars.r->base[j - 1] != Vars.anc); } j = location[ally[dispchar - 1] - 1]; Vars.tempset = Vars.r->base[j - 1]; Vars.anc = Vars.hypset[j - 1]; if (!Vars.bottom) Vars.anc = treenode[Vars.r->back->index - 1]->base[j - 1]; r_->state = '?'; if (Vars.tempset == (1 << A)) r_->state = 'A'; if (Vars.tempset == (1 << C)) r_->state = 'C'; if (Vars.tempset == (1 << G)) r_->state = 'G'; if (Vars.tempset == (1 << T)) r_->state = 'T'; Vars.bottom = false; if (!Vars.r->tip) { memcpy(tempnuc, Vars.r->numnuc, endsite*sizeof(nucarray)); q = Vars.r->next; do { memcpy(Vars.r->numnuc, tempnuc, endsite*sizeof(nucarray)); for (i = b1 - 1; i < b2; i++) { j = location[ally[i] - 1]; for (k = (long)A; k <= (long)O; k++) if (q->back->base[j - 1] & (1 << k)) Vars.r->numnuc[j - 1][k]--; largest = getlargest(Vars.r->numnuc[j - 1]); ancset->base[j - 1] = 0; for (k = (long)A; k <= (long)O; k++) if (Vars.r->numnuc[j - 1][k] == largest) ancset->base[j - 1] |= (1 << k); if (!Vars.bottom) Vars.anc = ancset->base[j - 1]; } newdnamove_hyptrav(q->back, ancset->base, b1, b2, Vars.bottom, treenode); q = q->next; } while (q != Vars.r); } dnamove_chuck(ancset); } /* newdnamove_hyptrav */ void newdnamove_hypstates(long chars, node *root, pointarray treenode) { /* fill in and describe states at interior nodes */ /* used in dnacomp, dnapars, & dnapenny */ long i, n; baseptr nothing; /* garbage is passed along without usage to newdnamove_hyptrav, which also does not use it. */ nothing = (baseptr)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) nothing[i] = 0; for (i = 1; i <= ((chars - 1) / 40 + 1); i++) { putc('\n', outfile); n = i * 40; if (n > chars) n = chars; newdnamove_hyptrav(root, nothing, i * 40 - 39, n, true, treenode); } free(nothing); } /* newdnamove_hypstates */ void grwrite(chartype c, long num, long *pos) { long i; prefix(c); for (i = 1; i <= num; i++) { if ((*pos) >= leftedge && (*pos) - leftedge + 1 < screenwidth) putchar(chh[(long)c]); (*pos)++; } postfix(c); } /* grwrite */ void dnamove_drawline(long i) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first =NULL, *last =NULL; long n, j, pos; boolean extra, done; Char st; chartype c, d; pos = 1; p = nuroot; q = nuroot; extra = false; if (i == p->ycoord && (p == root || subtree)) { extra = true; c = overt; if (display) { switch (p->state) { case 'A': c = aa; break; case 'C': c = cc; break; case 'G': c = gg; break; case 'T': c = tt; break; case '?': c = question; break; } } if ((subtree)) stwrite("Subtree:", 8, &pos, leftedge, screenwidth); if (p->index >= 100) nnwrite(p->index, 3, &pos, leftedge, screenwidth); else if (p->index >= 10) { grwrite(c, 1, &pos); nnwrite(p->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(p->index, 1, &pos, leftedge, screenwidth); } } else { if ((subtree)) stwrite(" ", 10, &pos, leftedge, screenwidth); else stwrite(" ", 2, &pos, leftedge, screenwidth); } do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = p->xcoord - q->xcoord; if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if (q->ycoord == i && !done) { c = overt; if (q == first) d = downcorner; else if (q == last) d = upcorner; else if ((long)q->ycoord == (long)p->ycoord) d = c; else d = midcorner; if (display) { switch (q->state) { case 'A': c = aa; break; case 'C': c = cc; break; case 'G': c = gg; break; case 'T': c = tt; break; case '?': c = question; break; } d = c; } if (n > 1) { grwrite(d, 1, &pos); grwrite(c, n - 3, &pos); } if (q->index >= 100) nnwrite(q->index, 3, &pos, leftedge, screenwidth); else if (q->index >= 10) { grwrite(c, 1, &pos); nnwrite(q->index, 2, &pos, leftedge, screenwidth); } else { grwrite(c, 2, &pos); nnwrite(q->index, 1, &pos, leftedge, screenwidth); } extra = true; } else if (!q->tip) { if (last->ycoord > i && first->ycoord < i && i != p->ycoord) { c = up; if (i < p->ycoord) st = p->next->back->state; else st = p->next->next->back->state; if (display) { switch (st) { case 'A': c = aa; break; case 'C': c = cc; break; case 'G': c = gg; break; case 'T': c = tt; break; case '?': c = question; break; } } grwrite(c, 1, &pos); chwrite(' ', n - 1, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); } else chwrite(' ', n, &pos, leftedge, screenwidth); if (p != q) p = q; } while (!done); if (p->ycoord == i && p->tip) { n = 0; for (j = 1; j <= nmlngth; j++) { if (nayme[p->index - 1][j - 1] != '\0') n = j; } chwrite(':', 1, &pos, leftedge, screenwidth); for (j = 0; j < n; j++) chwrite(nayme[p->index - 1][j], 1, &pos, leftedge, screenwidth); } putchar('\n'); } /* dnamove_drawline */ void dnamove_printree(void) { /* prints out diagram of the tree */ long tipy; long i, dow; if (!subtree) nuroot = root; if (changed || newtree) evaluate(root); if (display) { outfile = stdout; newdnamove_hypstates(chars, root, treenode); } #ifdef WIN32 if(ibmpc || ansi) phyClearScreen(); else printf("\n"); #else printf((ansi || ibmpc) ? "\033[2J\033[H" : "\n"); #endif tipy = 1; dow = down; if (spp * dow > screenlines && !subtree) dow--; printf(" (unrooted)"); if (display) { printf(" "); makechar(aa); printf(":A "); makechar(cc); printf(":C "); makechar(gg); printf(":G "); makechar(tt); printf(":T "); makechar(question); printf(":?"); } else printf(" "); if (!earlytree) { printf("%10.1f Steps", -like); } if (display) printf(" SITE%4ld", dispchar); else printf(" "); if (!earlytree) { printf(" %3ld sites compatible\n", compatible); } printf(" "); if (changed && !earlytree) { if (-like < bestyet) { printf(" BEST YET!"); bestyet = -like; } else if (fabs(-like - bestyet) < 0.000001) printf(" (as good as best)"); else { if (-like < gotlike) printf(" better"); else if (-like > gotlike) printf(" worse!"); } } printf("\n"); farthest = 0; coordinates(nuroot, &tipy, 1.5, &farthest); vmargin = 4; treelines = tipy - dow; if (topedge != 1) { printf("** %ld lines above screen **\n", topedge - 1); vmargin++; } if ((treelines - topedge + 1) > (screenlines - vmargin)) vmargin++; for (i = 1; i <= treelines; i++) { if (i >= topedge && i < topedge + screenlines - vmargin) dnamove_drawline(i); } if ((treelines - topedge + 1) > (screenlines - vmargin)) { printf("** %ld", treelines - (topedge - 1 + screenlines - vmargin)); printf(" lines below screen **\n"); } if (treelines - topedge + vmargin + 1 < screenlines) putchar('\n'); gotlike = -like; changed = false; } /* dnamove_printree */ void arbitree(void) { long i; root = treenode[0]; dnamove_add(treenode[0], treenode[1], treenode[spp]); for (i = 3; i <= (spp); i++) { dnamove_add(treenode[spp + i - 3], treenode[i - 1], treenode[spp + i - 2]); } } /* arbitree */ void yourtree(void) { long i, j; boolean ok; root = treenode[0]; dnamove_add(treenode[0], treenode[1], treenode[spp]); i = 2; do { i++; dnamove_printree(); printf("Add species%3ld: ", i); for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); do { printf("\n at or before which node (type number): "); inpnum(&j, &ok); ok = (ok && ((j >= 1 && j < i) || (j > spp && j < spp + i - 1))); if (!ok) printf("Impossible number. Please try again:\n"); } while (!ok); if (j >= i) { /* has user chosen a non-tip? if so, offer choice */ do { printf(" Insert at node (A) or before node (B)? "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = isupper((int)ch) ? ch : toupper((int)ch); } while (ch != 'A' && ch != 'B'); } else ch = 'B'; /* if user has chosen a tip, set Before */ if (j != 0) { if (ch == 'A') { if (!treenode[j - 1]->tip) { add_child(treenode[j - 1], treenode[i - 1]); } } else { printf("dnamove_add(below %ld, newtip %ld, newfork %ld)\n",j-1,i-1,spp+i-2); dnamove_add(treenode[j - 1], treenode[i - 1], treenode[spp + i - 2]); } /* endif (before or at node) */ } } while (i != spp); } /* yourtree */ void initdnamovenode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ /* LM 7/27 I added this function and the commented lines around */ /* treeread() to get the program running, but all 4 move programs*/ /* are improperly integrated into the v4.0 support files. As is */ /* endsite = chars and this is a patchwork function */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnutreenode(grbg, p, nodei, endsite, zeros); treenode[nodei - 1] = *p; break; case nonbottom: gnutreenode(grbg, p, nodei, endsite, zeros); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); /* process lengths and discard */ default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/ break; } } /* initdnamovenode */ void buildtree(void) { long i, nextnode; node *p; long j; char* treestr; treeone = (node **)Malloc(maxsz*sizeof(node *)); treetwo = (node **)Malloc(maxsz*sizeof(node *)); treesets[othertree].treenode = treetwo; changed = false; newtree = false; switch (how) { case arb: treesets[othertree].treenode = treetwo; arbitree(); break; case use: names = (boolean *)Malloc(spp*sizeof(boolean)); firsttree = true; nodep = NULL; nextnode = 0; haslengths = 0; for (i = 0; i < endsite; i++) zeros[i] = 0; treesets[whichtree].nodep = nodep; treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdnamovenode,true,nonodes); for (i = spp; i < (nextnode); i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->base = (baseptr2)Malloc(chars*sizeof(long)); p = p->next; } } /* debug: see comment at initdnamovenode() */ free(names); FClose(intree); break; case spec: treesets[othertree].treenode = treetwo; yourtree(); break; } if (!outgropt) outgrno = root->next->back->index; if (outgropt) dnamove_reroot(treenode[outgrno - 1]); } /* buildtree */ void setorder(void) { /* sets in order of number of members */ sett[0] = 1L << ((long)A); sett[1] = 1L << ((long)C); sett[2] = 1L << ((long)G); sett[3] = 1L << ((long)T); sett[4] = 1L << ((long)O); sett[5] = (1L << ((long)A)) | (1L << ((long)C)); sett[6] = (1L << ((long)A)) | (1L << ((long)G)); sett[7] = (1L << ((long)A)) | (1L << ((long)T)); sett[8] = (1L << ((long)A)) | (1L << ((long)O)); sett[9] = (1L << ((long)C)) | (1L << ((long)G)); sett[10] = (1L << ((long)C)) | (1L << ((long)T)); sett[11] = (1L << ((long)C)) | (1L << ((long)O)); sett[12] = (1L << ((long)G)) | (1L << ((long)T)); sett[13] = (1L << ((long)G)) | (1L << ((long)O)); sett[14] = (1L << ((long)T)) | (1L << ((long)O)); sett[15] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)G)); sett[16] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)T)); sett[17] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)O)); sett[18] = (1L << ((long)A)) | (1L << ((long)G)) | (1L << ((long)T)); sett[19] = (1L << ((long)A)) | (1L << ((long)G)) | (1L << ((long)O)); sett[20] = (1L << ((long)A)) | (1L << ((long)T)) | (1L << ((long)O)); sett[21] = (1L << ((long)C)) | (1L << ((long)G)) | (1L << ((long)T)); sett[22] = (1L << ((long)C)) | (1L << ((long)G)) | (1L << ((long)O)); sett[23] = (1L << ((long)C)) | (1L << ((long)T)) | (1L << ((long)O)); sett[24] = (1L << ((long)G)) | (1L << ((long)T)) | (1L << ((long)O)); sett[25] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)G)) | (1L << ((long)T)); sett[26] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)G)) | (1L << ((long)O)); sett[27] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)T)) | (1L << ((long)O)); sett[28] = (1L << ((long)A)) | (1L << ((long)G)) | (1L << ((long)T)) | (1L << ((long)O)); sett[29] = (1L << ((long)C)) | (1L << ((long)G)) | (1L << ((long)T)) | (1L << ((long)O)); sett[30] = (1L << ((long)A)) | (1L << ((long)C)) | (1L << ((long)G)) | (1L << ((long)T)) | (1L << ((long)O)); } /* setorder */ void mincomp(void) { /* computes for each site the minimum number of steps necessary to accomodate those species already in the analysis */ long i, j, k; boolean done; for (i = 0; i < (chars); i++) { done = false; j = 0; while (!done) { j++; done = true; k = 1; do { if (k < nonodes) done = (done && (treenode[k - 1]->base[i] & sett[j - 1]) != 0); k++; } while (k <= spp && done); } if (j == 31) necsteps[i] = 4; if (j <= 30) necsteps[i] = 3; if (j <= 25) necsteps[i] = 2; if (j <= 15) necsteps[i] = 1; if (j <= 5) necsteps[i] = 0; necsteps[i] *= weight[i]; } } /* mincomp */ void consolidatetree(long index) { node *start, *r, *q; int i; i = 0; start = treenode[index - 1]; q = start->next; while (q != start) { r = q; q = q->next; chuck(&grbg, r); } chuck(&grbg, q); i = index; while (i <= nonodes) { r = treenode[i - 1]; if (!(r->tip)) r->index--; if (!(r->tip)) { q = r->next; do { q->index--; q = q->next; } while (r != q && q != NULL); } treenode[i - 1] = treenode[i]; i++; } nonodes--; } /* consolidatetree */ void rearrange(void) { long i, j, maxinput; boolean ok1, ok2; node *p, *q; char ch; printf("Remove everything to the right of which node? "); inpnum(&i, &ok1); ok1 = (ok1 && i >= 1 && i <= (spp * 2 - 1) && i != root->index); if (ok1) ok1 = !treenode[i - 1]->deleted; if (ok1) { printf("Add at or before which node? "); inpnum(&j, &ok2); ok2 = (ok2 && j >= 1 && j <= (spp * 2 - 1)); if (ok2) { if (j != root->index) ok2 = !treenode[treenode[j - 1]->back->index - 1]->deleted; } if (ok2) { /*xx This edit says "j must not be i's parent." Is this necessary anymore? */ /* ok2 = (nodep[j - 1] != nodep[nodep[i - 1]->back->index - 1]);*/ p = treenode[j - 1]; /* make sure that j is not a descendent of i */ while (p != root) { ok2 = (ok2 && p != treenode[i - 1]); p = treenode[p->back->index - 1]; } if (ok1 && ok2) { maxinput = 1; do { printf("Insert at node (A) or before node (B)? "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; ch = isupper((int)ch) ? ch : toupper((int)ch); maxinput++; if (maxinput == 100) { printf("ERROR: too many tries at choosing option\n"); embExitBad(); } } while (ch != 'A' && ch != 'B'); if (ch == 'A') { if (!(treenode[j - 1]->deleted) && !treenode[j - 1]->tip) { changed = true; copytree(); dnamove_re_move(&treenode[i - 1], &q); add_child(treenode[j - 1], treenode[i - 1]); if (fromtype == beforenode) consolidatetree(q->index); } else ok2 = false; } else { if (j != root->index) { /* can't insert at root */ changed = true; copytree(); dnamove_re_move(&treenode[i - 1], &q); if (q != NULL) { treenode[q->index-1]->next->back = treenode[i-1]; treenode[i-1]->back = treenode[q->index-1]->next; } add_before(treenode[j - 1], treenode[i - 1]); } else ok2 = false; } /* endif (before or at node) */ } /* endif (ok to do move) */ } /* endif (destination node ok) */ } /* endif (from node ok) */ dnamove_printree(); if (!(ok1 && ok2)) printf("Not a possible rearrangement. Try again: \n"); else { written = false; } } /* rearrange */ void dnamove_nextinc(void) { /* show next incompatible site */ long disp0; boolean done; display = true; disp0 = dispchar; done = false; do { dispchar++; if (dispchar > chars) { dispchar = 1; done = (disp0 == 0); } } while (!(necsteps[dispchar - 1] != numsteps[dispchar - 1] || dispchar == disp0 || done)); dnamove_printree(); } /* dnamove_nextinc */ void dnamove_nextchar(void) { /* show next site */ display = true; dispchar++; if (dispchar > chars) dispchar = 1; dnamove_printree(); } /* dnamove_nextchar */ void dnamove_prevchar(void) { /* show previous site */ display = true; dispchar--; if (dispchar < 1) dispchar = chars; dnamove_printree(); } /* dnamove_prevchar */ void dnamove_show(void) { long i; boolean ok; do { printf("SHOW: (Character number or 0 to see none)? "); inpnum(&i, &ok); ok = (ok && (i == 0 || (i >= 1 && i <= chars))); if (ok && i != 0) { display = true; dispchar = i; } if (ok && i == 0) display = false; } while (!ok); dnamove_printree(); } /* dnamove_show */ void tryadd(node *p, node **item, node **nufork, double *place) { /* temporarily adds one fork and one tip to the tree. Records scores in ARRAY place */ dnamove_add(p, *item, *nufork); evaluate(root); place[p->index - 1] = -like; dnamove_re_move(item, nufork); } /* tryadd */ void addpreorder(node *p, node *item_, node *nufork_, double *place) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ node *item, *nufork, *q; item = item_; nufork = nufork_; if (p == NULL) return; tryadd(p,&item,&nufork,place); if (!p->tip) { q = p->next; do { addpreorder(q->back, item,nufork,place); q = q->next; } while (q != p); } } /* addpreorder */ void try(void) { /* Remove node, try it in all possible places */ double *place; long i, j, oldcompat, saveparent; double current; node *q, *dummy, *rute; boolean tied, better, ok, madenode; madenode = false; printf("Try other positions for which node? "); inpnum(&i, &ok); if (!(ok && i >= 1 && i <= nonodes && i != root->index)) { printf("Not a possible choice! "); return; } copytree(); printf("WAIT ...\n"); place = (double *)Malloc(nonodes*sizeof(double)); for (j = 0; j < (nonodes); j++) place[j] = -1.0; evaluate(root); current = -like; oldcompat = compatible; what = i; /* q = ring base of i's parent */ q = treenode[treenode[i - 1]->back->index - 1]; saveparent = q->index; /* if i is a left child, fromwhere = index of right sibling (binary) */ /* if i is a right child, fromwhere = index of left sibling (binary) */ if (q->next->back->index == i) fromwhere = q->next->next->back->index; else fromwhere = q->next->back->index; rute = root; /* if root is i's parent ... */ if (q->next->next->next == q) { if (root == treenode[treenode[i - 1]->back->index - 1]) { /* if i is left child then rute becomes right child, and vice-versa */ if (treenode[treenode[i - 1]->back->index - 1]->next->back == treenode[i - 1]) rute = treenode[treenode[i - 1]->back->index - 1]->next->next->back; else rute = treenode[treenode[i - 1]->back->index - 1]->next->back; } } /* Remove i and perhaps its parent node from the tree. If i is part of a multifurcation, *dummy will come back null. If so, make a new internal node to be i's parent as it is inserted in various places around the tree. */ dnamove_re_move(&treenode[i - 1], &dummy); if (dummy == NULL) { madenode = true; nonodes++; maketriad(&dummy, nonodes); } oldleft = wasleft; root = rute; addpreorder(root, treenode[i - 1], dummy, place); wasleft = oldleft; restoring = true; if (madenode) { add_child(treenode[saveparent - 1], treenode[i - 1]); nonodes--; } else dnamove_add(treenode[fromwhere - 1], treenode[what - 1], q); like = -current; compatible = oldcompat; restoring = false; better = false; printf(" BETTER: "); for (j = 1; j <= (nonodes); j++) { if (place[j - 1] < current && place[j - 1] >= 0.0) { printf("%3ld:%6.2f", j, place[j - 1]); better = true; } } if (!better) printf(" NONE"); printf("\n TIED: "); tied = false; for (j = 1; j <= (nonodes); j++) { if (fabs(place[j - 1] - current) < 1.0e-6 && j != fromwhere) { if (j < 10) printf("%2ld", j); else printf("%3ld", j); tied = true; } } if (tied) printf(":%6.2f\n", current); else printf("NONE\n"); changed = true; free(place); } /* try */ void undo(void) { boolean btemp; /* don't undo to an uninitialized tree */ if (!treesets[othertree].initialized) { dnamove_printree(); printf("Nothing to undo.\n"); return; } treesets[whichtree].root = root; treesets[whichtree].treenode = treenode; treesets[whichtree].nonodes = nonodes; treesets[whichtree].waswritten = waswritten; treesets[whichtree].initialized = true; whichtree = othertree; root = treesets[whichtree].root; treenode = treesets[whichtree].treenode; nonodes = treesets[whichtree].nonodes; waswritten = treesets[whichtree].waswritten; if (othertree == 0) othertree = 1; else othertree = 0; changed = true; dnamove_printree(); btemp = oldwritten; oldwritten = written; written = btemp; } /* undo */ void treewrite(boolean done) { /* write out tree to a file */ //treeoptions(waswritten, &ch, &outtree, outtreename, progname); if (!done) dnamove_printree(); if (waswritten && ch != 'A' && ch != 'R') return; col = 0; treeout(root, 1, &col, root); printf("\nTree written to file \"%s\"\n\n", outtreename); waswritten = true; written = true; FClose(outtree); #ifdef MAC fixmacfile(outtreename); #endif } /* treewrite */ void clade(void) { /* pick a subtree and show only that on screen */ long i; boolean ok; printf("Select subtree rooted at which node (0 for whole tree)? "); inpnum(&i, &ok); ok = (ok && ((unsigned)i) <= ((unsigned)nonodes)); if (ok) { subtree = (i > 0); if (subtree) nuroot = treenode[i - 1]; else nuroot = root; } dnamove_printree(); if (!ok) printf("Not possible to use this node. "); } /* clade */ void fliptrav(node *p, boolean recurse) { node *q, *temp, *r =NULL, *rprev =NULL, *l, *lprev; boolean lprevflag; int nodecount, loopcount, i; if (p->tip) return; q = p->next; l = q; lprev = p; nodecount = 0; do { nodecount++; if (q->next->next == p) { rprev = q; r = q->next; } q = q->next; } while (p != q); if (nodecount == 1) return; loopcount = nodecount / 2; for (i=0; inext = r; rprev->next = l; temp = r->next; r->next = l->next; l->next = temp; if (i < (loopcount - 1)) { lprevflag = false; q = p->next; do { if (q == lprev->next && !lprevflag) { lprev = q; l = q->next; lprevflag = true; } if (q->next == rprev) { rprev = q; r = q->next; } q = q->next; } while (p != q); } } if (recurse) { q = p->next; do { fliptrav(q->back, true); q = q->next; } while (p != q); } } /* fliptrav */ void flip(long atnode) { /* flip at a node left-right */ long i; boolean ok; if (atnode == 0) { printf("Flip branches at which node? "); inpnum(&i, &ok); ok = (ok && i > spp && i <= nonodes); } else { i = atnode; ok = true; } if (ok) { copytree(); fliptrav(treenode[i - 1], true); } if (atnode == 0) dnamove_printree(); if (ok) { written = false; return; } if ((i >= 1 && i <= spp) || (i > spp && i <= nonodes)) printf("Can't flip there. "); else printf("No such node. "); } /* flip */ void changeoutgroup(void) { long i; boolean ok; oldoutgrno = outgrno; do { printf("Which node should be the new outgroup? "); inpnum(&i, &ok); ok = (ok && i >= 1 && i <= nonodes && i != root->index); if (ok) outgrno = i; } while (!ok); copytree(); dnamove_reroot(treenode[outgrno - 1]); changed = true; lastop = reroott; dnamove_printree(); oldwritten = written; written = false; } /* changeoutgroup */ void redisplay(void) { boolean done = false; waswritten = false; do { fprintf(stderr, "NEXT (R # + - S . T U W O F H J K L C ? X Q) "); fprintf(stderr, "(? for Help): "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); uppercase(&ch); if (strchr("HJKLCFORSTUXQ+#-.W?",ch) != NULL){ switch (ch) { case 'R': rearrange(); break; case '#': dnamove_nextinc(); break; case '+': dnamove_nextchar(); break; case '-': dnamove_prevchar(); break; case 'S': dnamove_show(); break; case '.': dnamove_printree(); break; case 'T': try(); break; case 'U': undo(); break; case 'W': treewrite(done); break; case 'O': changeoutgroup(); break; case 'F': flip(0); break; case 'H': window(left, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dnamove_printree(); break; case 'J': window(downn, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dnamove_printree(); break; case 'K': window(upp, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dnamove_printree(); break; case 'L': window(right, &leftedge, &topedge, hscroll, vscroll, treelines, screenlines, screenwidth, farthest, subtree); dnamove_printree(); break; case 'C': clade(); break; case '?': help("site"); dnamove_printree(); break; case 'X': done = true; break; case 'Q': done = true; break; } } } while (!done); if (written) return; do { fprintf(stderr, "Do you want to write out the tree to a file? (Y or N): "); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == 'Y' || ch == 'y') treewrite(done); } while (ch != 'Y' && ch != 'y' && ch != 'N' && ch != 'n'); } /* redisplay */ void treeconstruct(void) { /* constructs a binary tree from the pointers in treenode. */ int i; restoring = false; subtree = false; display = false; dispchar = 0; earlytree = true; waswritten = false; buildtree(); /* get an accurate value for nonodes by finding out where the nodes really stop */ for (i=0;i 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } progress = ajAcdGetBoolean("progress"); printdata = ajAcdGetBoolean("printdata"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nPenny algorithm for DNA, version %s\n",VERSION); fprintf(outfile, " branch-and-bound to find all"); fprintf(outfile, " most parsimonious trees\n\n"); printf("justweights: %s\n", (justwts ? "true" : "false")); printf("numwts: %d\n", numwts); } /* emboss_getoptions */ void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc(chars*sizeof(Char)); weight = (long *)Malloc(chars*sizeof(long)); oldweight = (long *)Malloc(chars*sizeof(long)); alias = (steptr)Malloc(chars*sizeof(long)); ally = (steptr)Malloc(chars*sizeof(long)); location = (steptr)Malloc(chars*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); bestorders = (treenumbers *)Malloc(maxtrees*sizeof(treenumbers)); for (i = 1; i <= maxtrees; i++) bestorders[i - 1] = (treenumbers)Malloc(spp*sizeof(long)); bestrees = (treenumbers *)Malloc(maxtrees*sizeof(treenumbers)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1] = (treenumbers)Malloc(spp*sizeof(long)); current = (treenumbers)Malloc(spp*sizeof(long)); order = (treenumbers)Malloc(spp*sizeof(long)); added = (boolean *)Malloc(nonodes*sizeof(boolean)); } /* allocrest */ void reallocchars(void) {/* The amount of chars can change between runs this function reallocates all the variables whose size depends on the amount of chars */ long i; for (i = 0; i < spp; i++) { free(y[i]); y[i] = (Char *)Malloc(chars*sizeof(Char)); } free(weight); free(oldweight); free(alias); free(ally); free(location); weight = (long *)Malloc(chars*sizeof(long)); oldweight = (long *)Malloc(chars*sizeof(long)); alias = (steptr)Malloc(chars*sizeof(long)); ally = (steptr)Malloc(chars*sizeof(long)); location = (steptr)Malloc(chars*sizeof(long)); } /* reallocchars */ void doinit() { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &chars, &nonodes, 1); if(!threshold) threshold = spp; if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, chars); alloctree(&treenode, nonodes, false); allocrest(); } /* doinit */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= chars; i++) { alias[i - 1] = i; oldweight[i - 1] = weight[i - 1]; ally[i - 1] = i; } sitesort(chars, weight); sitecombine(chars); sitescrunch(chars); endsite = 0; for (i = 1; i <= chars; i++) { if (ally[i - 1] == i) endsite++; } for (i = 1; i <= endsite; i++) location[alias[i - 1] - 1] = i; if (!thresh) threshold = spp; threshwt = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) { weight[i] *= 10; threshwt[i] = (long)(threshold * weight[i] + 0.5); } if ( zeros != NULL ) free(zeros); zeros = (long *)Malloc(endsite*sizeof(long)); /*in makeweights()*/ for (i = 0; i < endsite; i++) zeros[i] = 0; } /* makeweights */ void doinput() { /* reads the input data */ long i; if (justwts) { if (firstset) seq_inputdata(seqsets[ith-1], chars); for (i = 0; i < chars; i++) weight[i] = 1; inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); if (justwts) { fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } else { if (!firstset){ samenumspseq(seqsets[ith-1],&chars, ith); reallocchars(); } seq_inputdata(seqsets[ith-1], chars); for (i = 0; i < chars; i++) weight[i] = 1; if (weights) { inputweightsstr(phyloweights->Str[0], chars, weight, &weights); if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } } makeweights(); makevalues(treenode, zeros, false); alloctemp(&temp, zeros, endsite); alloctemp(&temp1, zeros, endsite); } /* doinput */ void supplement(node *r) { /* determine minimum number of steps more which will be added when rest of species are put in tree */ long i, j, k, has, sum; boolean addedmayhave, nonaddedhave; for (i = 0; i < endsite; i++) { nonaddedhave = 0;; addedmayhave = 0; for (k = 0; k < spp; k++) { has = treenode[k]->base[i]; if (has != 31) { if (added[k]) addedmayhave |= has; else { if ((has == 1) || (has == 2) || (has == 4) || (has == 8) || (has == 16)) nonaddedhave |= has; } } } sum = 0; j = 1; for (k = 1; k <= 5; k++) { if ((j & nonaddedhave) != 0) if ((j & addedmayhave) == 0) sum++; j += j; } r->numsteps[i] += sum * weight[i]; } } /* supplement */ void evaluate(node *r) { /* determines the number of steps needed for a tree. this is the minimum number of steps needed to evolve sequences on this tree */ long i, steps; double sum; sum = 0.0; supplement(r); for (i = 0; i < endsite; i++) { steps = r->numsteps[i]; if ((long)steps <= threshwt[i]) sum += steps; else sum += threshwt[i]; } if (examined == 0 && mults == 0) bestyet = -1.0; like = sum; } /* evaluate */ void addtraverse(node *p, node *item, node *fork, long *m, long *n, valptr valyew, placeptr place) { /* traverse all places to add item */ if (done) return; if (*m <= 2 || (p != root && p != root->next->back)) { if (p == root) fillin(temp, item, p); else { fillin(temp1, item, p); fillin(temp, temp1, p->back); } (*n)++; evaluate(temp); examined++; if (examined == howoften) { examined = 0; mults++; if (mults == howmany) done = true; if (progress) { printf("%7ld", mults); if (bestyet >= 0) printf("%16.1f", bestyet / 10.0); else printf(" - "); printf("%17ld%20.2f\n", nextree - 1, fracdone * 100); #ifdef WIN32 phyFillScreenColor(); #endif } } valyew[(*n) - 1] = like; place[(*n) - 1] = p->index; } if (!p->tip) { addtraverse(p->next->back, item, fork, m,n,valyew,place); addtraverse(p->next->next->back, item, fork,m,n,valyew,place); } } /* addtraverse */ void addit(long m) { /* adds the species one by one, recursively */ long n; valptr valyew; placeptr place; long i, j, n1, besttoadd=0; valptr bestval; placeptr bestplace; double oldfrac, oldfdone, sum, bestsum; valyew = (valptr)Malloc(nonodes*sizeof(double)); bestval = (valptr)Malloc(nonodes*sizeof(double)); place = (placeptr)Malloc(nonodes*sizeof(long)); bestplace = (placeptr)Malloc(nonodes*sizeof(long)); if (simple && !firsttime) { n = 0; added[order[m - 1] - 1] = true; addtraverse(root, treenode[order[m - 1] - 1], treenode[spp + m - 2], &m,&n,valyew,place); besttoadd = order[m - 1]; memcpy(bestplace, place, nonodes*sizeof(long)); memcpy(bestval, valyew, nonodes*sizeof(double)); } else { bestsum = -1.0; for (i = 1; i <= spp; i++) { if (!added[i - 1]) { n = 0; added[i - 1] = true; addtraverse(root, treenode[i - 1], treenode[spp + m - 2], &m,&n,valyew,place); added[i - 1] = false; sum = 0.0; for (j = 0; j < n; j++) sum += valyew[j]; if (sum > bestsum) { bestsum = sum; besttoadd = i; memcpy(bestplace, place, nonodes*sizeof(long)); memcpy(bestval, valyew, nonodes*sizeof(double)); } } } } order[m - 1] = besttoadd; memcpy(place, bestplace, nonodes*sizeof(long)); memcpy(valyew, bestval, nonodes*sizeof(double)); shellsort(valyew, place, n); oldfrac = fracinc; oldfdone = fracdone; n1 = 0; for (i = 0; i < n; i++) { if (valyew[i] <= bestyet || bestyet < 0.0) n1++; } if (n1 > 0) fracinc /= n1; for (i = 0; i < n; i++) { if (valyew[i] <= bestyet || bestyet < 0.0) { current[m - 1] = place[i]; recompute = (m < spp); add(treenode[place[i] - 1], treenode[besttoadd - 1], treenode[spp + m - 2], &root, recompute, treenode, grbg, zeros); added[besttoadd - 1] = true; if (m < spp) addit(m + 1); else { if (valyew[i] < bestyet || bestyet < 0.0) { nextree = 1; bestyet = valyew[i]; } if (nextree <= maxtrees) { memcpy(bestorders[nextree - 1], order, spp*sizeof(long)); memcpy(bestrees[nextree - 1], current, spp*sizeof(long)); } nextree++; firsttime = false; } recompute = (m < spp); re_move(treenode[besttoadd - 1], &treenode[spp + m - 2], &root, recompute, treenode, grbg, zeros); added[besttoadd - 1] = false; } fracdone += fracinc; } fracinc = oldfrac; fracdone = oldfdone; free(valyew); free(bestval); free(place); free(bestplace); } /* addit */ void dnapenny_reroot(node *outgroup) { /* reorients tree, putting outgroup in desired position. */ node *p, *q, *newbottom, *oldbottom; if (outgroup->back->index == root->index) return; newbottom = outgroup->back; p = treenode[newbottom->index - 1]->back; while (p->index != root->index) { oldbottom = treenode[p->index - 1]; treenode[p->index - 1] = p; p = oldbottom->back; } p = root->next; q = root->next->next; p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; outgroup->back->back = root->next->next; outgroup->back = root->next; treenode[newbottom->index - 1] = newbottom; } /* dnapenny_reroot */ void describe() { /* prints ancestors, steps and table of numbers of steps in each site */ if (stepbox) writesteps(chars, weights, oldweight, root); if (ancseq) { hypstates(chars, root, treenode, &garbage, basechar); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout(root, nextree, &col, root); } } /* describe */ void maketree() { /* tree construction recursively by branch and bound */ long i, j, k; node *dummy; if (progress) { printf("\nHow many\n"); printf("trees looked Approximate\n"); printf("at so far Length of How many percentage\n"); printf("(multiples shortest tree trees this short searched\n"); printf("of %4ld): found so far found so far so far\n", howoften); printf("---------- ------------ ------------ ------------\n"); } #ifdef WIN32 phyFillScreenColor(); #endif done = false; mults = 0; examined = 0; nextree = 1; root = treenode[0]; firsttime = true; for (i = 0; i < spp; i++) added[i] = false; added[0] = true; order[0] = 1; k = 2; fracdone = 0.0; fracinc = 1.0; bestyet = -1.0; recompute = true; addit(k); if (done) { if (progress) { printf("Search broken off! Not guaranteed to\n"); printf(" have found the most parsimonious trees.\n"); } if (treeprint) { fprintf(outfile, "Search broken off! Not guaranteed to\n"); fprintf(outfile, " have found the most parsimonious\n"); fprintf(outfile, " trees, but here is what we found:\n"); } } if (treeprint) { fprintf(outfile, "\nrequires a total of %18.3f\n\n", bestyet / 10.0); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i < spp; i++) added[i] = true; for (i = 0; i <= nextree - 2; i++) { root = treenode[0]; for (j = k; j <= spp; j++) add(treenode[bestrees[i][j - 1] - 1], treenode[bestorders[i][j - 1] - 1], treenode[spp + j - 2], &root, recompute, treenode, grbg, zeros); dnapenny_reroot(treenode[outgrno - 1]); postorder(root); evaluate(root); printree(root, 1.0); describe(); for (j = k - 1; j < spp; j++) re_move(treenode[bestorders[i][j] - 1], &dummy, &root, recompute, treenode, grbg, zeros); } if (progress) { printf("\nOutput written to file \"%s\"\n\n", outfilename); if (trout) printf("Trees also written onto file \"%s\"\n\n", outtreename); } freetemp(&temp); freetemp(&temp1); if (ancseq) freegarbage(&garbage); } /* maketree */ int main(int argc, Char *argv[]) { /* Penny's branch-and-bound method for DNA sequences */ #ifdef MAC argc = 1; /* macsetup("Dnapenny",""); */ argv[0] = "Dnapenny"; #endif init(argc, argv); emboss_getoptions("fdnapenny", argc, argv); /* Reads in the number of species, number of characters, options and data. Then finds all most parsimonious trees */ ibmpc = IBMCRT; ansi = ANSICRT; mulsets = false; garbage = NULL; msets = 1; firstset = true; doinit(); for (ith = 1; ith <= msets; ith++) { doinput(); if (ith == 1) firstset = false; if (msets > 1 && !justwts) { fprintf(outfile, "\nData set # %ld:\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } maketree(); free(threshwt); freenodes(nonodes,treenode); } FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Penny's branch-and-bound method for DNA sequences */ PHYLIPNEW-3.69.650/src/draw.c0000664000175000017500000030452011253743724012172 00000000000000#ifdef WIN32 #include #endif #ifdef OSX_CARBON #include #include "interface.h" #endif #include "phylip.h" #include "draw.h" #ifdef QUICKC struct videoconfig myscreen; void setupgraphics(); #endif #ifdef WIN32 extern HDC hdc; extern HPEN hPenTree; extern HPEN hPenLabel; extern void winplotpreview(); struct winpreviewparms_t { char * fn; double *xo, *yo, *scale; long nt; node *root; }; struct winpreviewparms_t winpreviewparms; #endif #ifndef X_DISPLAY_MISSING struct { char* fn; double *xo, *yo, *scale; long nt; node *root; } xpreviewparms; Atom wm_delete_window; Atom wm_delete_window2; Widget dialog; Widget shell=NULL; Window mainwin=0; void init_x(void); void redraw(Widget w,XtPointer client, XExposeEvent *ev); void plot_callback(Widget w,XtPointer client, XtPointer call); void change_callback(Widget w,XtPointer client, XtPointer call); void about_callback(Widget w,XtPointer client, XtPointer call); void quit_callback(Widget w,XtPointer client, XtPointer call); void close_x(void); void do_dialog(void); void delete_callback(Widget w, XEvent* event, String *params, int *num_params); void dismiss_dialog(void); #endif long winheight; long winwidth; extern winactiontype winaction; colortype colors[7] = { {"White ",1.0,1.0,1.0}, {"Red ",1.0,0.3,0.3}, {"Orange ",1.0,0.6,0.6}, {"Yellow ",1.0,0.9,0.4}, {"Green ",0.3,0.8,0.3}, {"Blue ",0.5,0.5,1.0}, {"Violet ",0.6,0.4,0.8}, }; vrmllighttype vrmllights[3] = { {1.0, -100.0, 100.0, 100.0}, {0.5, 100.0, -100.0, -100.0}, {0.3, 0.0, -100.0, 100.0}, }; long vrmltreecolor, vrmlnamecolor, vrmlskycolornear, vrmlskycolorfar, vrmlgroundcolornear, vrmlgroundcolorfar, vrmlplotcolor; char fontname[LARGE_BUF_LENGTH]; /* format of matrix: capheight, length[32],length[33],..length[256]*/ byte *full_pic ; int increment = 0 ; int total_bytes = 0 ; short unknown_metric[256]; static short helvetica_metric[] = { 718, 278,278,355,556,556,889,667,222,333,333,389,584,278,333,278,278,556,556,556, 556,556,556,556,556,556,556,278,278,584,584,584,556,1015,667,667,722,722,667, 611,778,722,278,500,667,556,833,722,778,667,778,722,667,611,722,667,944,667, 667,611,278,278,278,469,556,222,556,556,500,556,556,278,556,556,222,222,500, 222,833,556,556,556,556,333,500,278,556,500,722,500,500,500,334,260,334,584, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,333,556, 556,167,556,556,556,556,191,333,556,333,333,500,500,0,556,556,556,278,0,537, 350,222,333,333,556,1000,1000,0,611,0,333,333,333,333,333,333,333,333,0,333, 333,0,333,333,333,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,0,370,0,0,0,0,556, 778,1000,365,0,0,0,0,0,889,0,0,0,278,0,0,222,611,944,611,0,0,0}; static short helveticabold_metric[] = {718, /* height */ 278,333,474,556,556,889,722,278,333,333,389,584,278,333,278,278,556,556,556, 556,556,556,556,556,556,556,333,333,584,584,584,611,975,722,722,722,722,667, 611,778,722,278,556,722,611,833,722,778,667,778,722,667,611,722,667,944,667, 667,611,333,278,333,584,556,278,556,611,556,611,556,333,611,611,278,278,556, 278,889,611,611,611,611,389,556,333,611,556,778,556,556,500,389,280,389,584, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,333,556, 556,167,556,556,556,556,238,500,556,333,333,611,611,0,556,556,556,278,0,556, 350,278,500,500,556,1000,1000,0,611,0,333,333,333,333,333,333,333,333,0,333, 333,0,333,333,333,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,0,370,0,0,0,0,611, 778,1000,365,0,0,0,0,0,889,0,0,0,278,0,0,278,611,944,611,0,0,0}; static short timesroman_metric[] = {662, 250,333,408,500,500,833,778,333,333,333,500,564,250,333,250,278,500,500,500, 500,500,500,500,500,500,500,278,278,564,564,564,444,921,722,667,667,722,611, 556,722,722,333,389,722,611,889,722,722,556,722,667,556,611,722,722,944,722, 722,611,333,278,333,469,500,333,444,500,444,500,444,333,500,500,278,278,500, 278,778,500,500,500,500,333,389,278,500,500,722,500,500,444,480,200,480,541, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,333,500, 500,167,500,500,500,500,180,444,500,333,333,556,556,0,500,500,500,250,0,453, 350,333,444,444,500,1000,1000,0,444,0,333,333,333,333,333,333,333,333,0,333, 333,0,333,333,333,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,889,0,276,0,0,0,0,611, 722,889,310,0,0,0,0,0,667,0,0,0,278,0,0,278,500,722,500,0,0,0}; static short timesitalic_metric[] = {660, /* height */ 250,333,420,500,500,833,778,333,333,333,500,675,250,333,250,278,500,500,500, 500,500,500,500,500,500,500,333,333,675,675,675,500,920,611,611,667,722,611, 611,722,722,333,444,667,556,833,667,722,611,722,611,500,556,722,611,833,611, 556,556,389,278,389,422,500,333,500,500,444,500,444,278,500,500,278,278,444, 278,722,500,500,500,500,389,389,278,500,444,667,444,444,389,400,275,400,541, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,389,500, 500,167,500,500,500,500,214,556,500,333,333,500,500,0,500,500,500,250,0,523, 350,333,556,556,500,889,1000,0,500,0,333,333,333,333,333,333,333,333,0,333, 333,0,333,333,333,889,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,889,0,276,0,0,0,0,556, 722,944,310,0,0,0,0,0,667,0,0,0,278,0,0,278,500,667,500,0,0,0}; static short timesbold_metric[] = {681, /* height */ 250,333,555,500,500,1000,833,333,333,333,500,570,250,333,250,278,500,500,500, 500,500,500,500,500,500,500,333,333,570,570,570,500,930,722,667,722,722,667, 611,778,778,389,500,778,667,944,722,778,611,778,722,556,667,722,722,1000,722, 722,667,333,278,333,581,500,333,500,556,444,556,444,333,500,556,278,333,556, 278,833,556,500,556,556,444,389,333,556,500,722,500,500,444,394,220,394,520,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,333,500,500, 167,500,500,500,500,278,500,500,333,333,556,556,0,500,500,500,250,0,540,350, 333,500,500,500,1000,1000,0,500,0,333,333,333,333,333,333,333,333,0,333,333, 0,333,333,333,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,0,300,0,0,0,0,667,778, 1000,330,0,0,0,0,0,722,0,0,0,278,0,0,278,500,722,556,0,0,0}; static short timesbolditalic_metric[] = {662, /* height */ 250,389,555,500,500,833,778,333,333,333,500,570,250,333,250,278,500,500,500, 500,500,500,500,500,500,500,333,333,570,570,570,500,832,667,667,667,722,667, 667,722,778,389,500,667,611,889,722,722,611,722,667,556,611,722,667,889,667, 611,611,333,278,333,570,500,333,500,500,444,500,444,333,500,556,278,278,500, 278,778,556,500,500,500,389,389,278,556,444,667,500,444,389,348,220,348,570, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,389,500, 500,167,500,500,500,500,278,500,500,333,333,556,556,0,500,500,500,250,0,500, 350,333,500,500,500,1000,1000,0,500,0,333,333,333,333,333,333,333,333,0,333, 333,0,333,333,333,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,944,0,266,0,0,0,0,611 ,722,944,300,0,0,0,0,0,722,0,0,0,278,0,0,278,500,722,500,0,0,0}; static const char *figfonts[] = {"Times-Roman","Times-Italic","Times-Bold","Times-BoldItalic", "AvantGarde-Book","AvantGarde-BookOblique","AvantGarde-Demi","AvantGarde-DemiOblique", "Bookman-Light","Bookman-LightItalic","Bookman-Demi","Bookman-DemiItalic", "Courier","Courier-Italic","Courier-Bold","Courier-BoldItalic", "Helvetica","Helvetica-Oblique","Helvetica-Bold","Helvetica-BoldOblique", "Helvetica-Narrow","Helvetica-Narrow-Oblique","Helvetica-Narrow-Bold","Helvetica-Narrow-BoldOblique", "NewCenturySchlbk-Roman","NewCenturySchlbk-Italic","NewCenturySchlbk-Bold","NewCenturySchlbk-BoldItalic", "Palatino-Roman","Palatino-Italic","Palatino-Bold","Palatino-BoldItalic", "Symbol","ZapfChancery-MediumItalic","ZapfDingbats"}; double oldx, oldy; boolean didloadmetric; long nmoves,oldpictint,pagecount; double labelline,linewidth,oldxhigh,oldxlow,oldyhigh,oldylow, vrmllinewidth, raylinewidth,treeline,oldxsize,oldysize,oldxunitspercm, oldyunitspercm,oldxcorner,oldycorner,oldxmargin,oldymargin, oldhpmargin,oldvpmargin,clipx0,clipx1,clipy0,clipy1,userxsize,userysize; long rootmatrix[51][51]; long HiMode,GraphDriver,GraphMode,LoMode,bytewrite; /* externals should move to .h file later. */ extern long strpbottom,strptop,strpwide,strpdeep,strpdiv,hpresolution; extern boolean dotmatrix,empty,preview,previewing,pictbold,pictitalic, pictshadow,pictoutline; extern double expand,xcorner,xnow,xsize,xscale,xunitspercm, ycorner,ynow,ysize,yscale,yunitspercm,labelrotation, labelheight,xmargin,ymargin,pagex,pagey,paperx,papery, hpmargin,vpmargin; extern long filesize; extern growth grows; extern enum {yes,no} penchange,oldpenchange; extern FILE *plotfile; extern plottertype plotter,oldplotter,previewer; extern striptype stripe; extern char resopts; pentype lastpen; extern char pltfilename[FNMLNGTH]; extern char progname[FNMLNGTH]; #define NO_PLANE 666 /* To make POVRay happy */ #ifndef OLDC /* function prototypes */ int pointinrect(double, double, double, double, double, double); int rectintersects(double, double, double, double, double, double, double, double); long upbyte(long); long zlobyte(long); void pictoutint(FILE *, long); Local long SFactor(void); long DigitsInt(long); Local boolean IsColumnEmpty(striparray *, long, long); void Skip(long Amount); Local long FirstBlack(striparray *, long, long); Local long FirstWhite(striparray *, long, long); Local boolean IsBlankStrip(striparray *mystripe, long deep); void striprint(long, long); long showvrmlparms(long vrmltreecolor, long vrmlnamecolor, long vrmlskycolornear, long vrmlskycolorfar, long vrmlgroundcolornear); void getvrmlparms(long *vrmltreecolor, long *vrmlnamecolor, long *vrmlskycolornear,long *vrmlskycolorfar, long *vrmlgroundcolornear,long *vrmlgroundcolorfar, long numtochange); #ifdef QUICKC void setupgraphics(void); #endif long showrayparms(long, long, long, long, long, long); void getrayparms(long *, long *, long *, long *, long *,long *, long); int readafmfile(char *, short *); void metricforfont(char *, short *); void plotchar(long *, struct LOC_plottext *); void swap_charptr(char **, char **); void plotpb(void); char *findXfont(char *, double, double *, int *); int macfontid(char *); int figfontid(char *fontname); void makebox(char *, double *, double *, double *, long); /* function prototypes */ #endif int pointinrect(double x,double y,double x0,double y0,double x1,double y1) { double tmp; if (x0 > x1) tmp = x0, x0 = x1, x1 = tmp; if (y0 > y1) tmp = y0, y0 = y1, y1 = tmp; return ((x >= x0 && x <= x1) && (y >= y0 && y <= y1)); } /* pointinrect */ int rectintersects(double xmin1,double ymin1,double xmax1,double ymax1, double xmin2,double ymin2,double xmax2,double ymax2) { double temp; /* check if any of the corners of either square are contained within the * * other one. This catches MOST cases, the last one (two) is two thin * * bands crossing each other (like a '+' ) */ if (xmin1 > xmax1){ temp = xmin1; xmin1 = xmax1; xmax1 = temp;} if (xmin2 > xmax2){ temp = xmin2; xmin2 = xmax2; xmax2 = temp;} if (ymin1 > ymax1){ temp = ymin1; ymin1 = ymax1; ymax1 = temp;} if (ymin2 > ymax2){ temp = ymin2; ymin2 = ymax2; ymax2 = temp;} return (pointinrect(xmin1,ymin1,xmin2,ymin2,xmax2,ymax2) || pointinrect(xmax1,ymin1,xmin2,ymin2,xmax2,ymax2) || pointinrect(xmin1,ymax1,xmin2,ymin2,xmax2,ymax2) || pointinrect(xmax1,ymax1,xmin2,ymin2,xmax2,ymax2) || pointinrect(xmin2,ymin2,xmin1,ymin1,xmax1,ymax1) || pointinrect(xmax2,ymin2,xmin1,ymin1,xmax1,ymax1) || pointinrect(xmin2,ymax2,xmin1,ymin1,xmax1,ymax1) || pointinrect(xmax2,ymax2,xmin1,ymin1,xmax1,ymax1) || (xmin1 >= xmin2 && xmax1 <= xmax2 && ymin2 >= ymin1 && ymax2 <= ymax1) || (xmin2 >= xmin1 && xmax2 <= xmax1 && ymin1 >= ymin2 && ymax1 <= ymax2)); } /* rectintersects */ void clearit() { long i; if (previewer == tek) printf("%c\f", escape); else if (ansi || ibmpc) #ifdef WIN32 phyClearScreen(); #else printf("\033[2J\033[H"); #endif else { for (i = 1; i <= 24; i++) putchar('\n'); } #ifdef WIN32 phyClearScreen(); #endif } /* clearit */ boolean isfigfont(char *fontname) { int i; if (strcmp(fontname,"Hershey") == 0) return 1; for (i=0;i<34;++i) if (strcmp(fontname,figfonts[i]) == 0) break; return (i < 34); } /* isfigfont */ int figfontid(char *fontname) { int i; for (i=0;i<34;++i) if (strcmp(fontname,figfonts[i]) == 0) return i; return -1; } /* figfontid */ const char *figfontname(int id) { return figfonts[id]; } /* figfontname */ void getpreview() { long loopcount; Char ch; clearit(); printf("\nWhich type of screen will it be previewed on?\n\n"); printf(" type: to choose one compatible with:\n\n"); printf(" N will not be previewed\n"); #ifdef DOS printf(" I MSDOS graphics screens\n"); #endif #ifdef MAC printf(" M Macintosh screens\n"); #endif #ifndef X_DISPLAY_MISSING printf(" X X Windows display\n"); #endif #ifdef WIN32 printf(" W MS Windows display\n"); #endif printf(" K TeKtronix 4010 graphics terminal\n"); printf(" D DEC ReGIS graphics (VT240 terminal)\n"); printf(" U other: one you have inserted code for\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); uppercase(&ch); countup(&loopcount, 10); } #undef FOO #ifdef DOS #define FOO while (strchr("NIKDU",ch) == NULL); #endif #ifdef MAC #define FOO while (strchr("NMKDU",ch) == NULL); #endif #ifndef X_DISPLAY_MISSING #define FOO while (strchr("NXKDU",ch) == NULL); #endif #ifdef WIN32 #define FOO while (strchr("NWKDU",ch) == NULL); #endif #ifndef FOO while (strchr("NKDU",ch) == NULL); #endif preview = true; switch (ch) { case 'N': preview = false; previewer = other; /* Added by Dan F. */ break; case 'I': previewer = ibm; break; case 'M': previewer = mac; break; case 'X': previewer = xpreview; break; case 'W': previewer = winpreview; break; case 'K': previewer = tek; break; case 'D': previewer = decregis; break; case 'U': previewer = other; break; } printf("\n\n\n"); } /* getpreview */ void pout(long n) { #ifdef MAC if (previewing) printf("%*ld", (int)((long)(0.434295 * log((double)n) + 0.0001)), n); else fprintf(plotfile, "%*ld", (int)((long)(0.434295 * log((double)n) + 0.0001)), n); #endif #ifndef MAC if (previewing) printf("%*ld", (int)((long)(0.434295 * log((double)n) + 0.0001)), n); else fprintf(plotfile, "%*ld", (int)((long)(0.434295 * log((double)n) + 0.0001)), n); #endif } /* pout */ long upbyte(long num) { /* get upper nibble of byte */ long Result = 0, i, j, bytenum, nibcount; boolean done; bytenum = 0; done = false; nibcount = 0; i = num / 16; i /= 16; j = 1; while (!done) { bytenum += (i & 15) * j; nibcount++; if (nibcount == 2) { Result = bytenum; done = true; } else { j *= 16; i /= 16; } } return Result; } /* upbyte */ long zlobyte(long num) { /* get low order nibble of byte */ long Result = 0, i, j, bytenum, nibcount; boolean done; bytenum = 0; done = false; nibcount = 0; i = num; j = 1; while (!done) { bytenum += (i & 15) * j; nibcount++; if (nibcount == 2) { Result = bytenum; done = true; } else { j *= 16; i /= 16; } } return Result; } /* zlobyte */ void pictoutint(FILE *file, long pictint) { char picthi, pictlo; picthi = (char)(pictint / 256); pictlo = (char)(pictint % 256); fprintf(file, "%c%c", picthi, pictlo); } void initplotter(long ntips, char *fontname) { long i,j, hres, vres; Char picthi, pictlo; long pictint; int padded_width, byte_width; #ifndef X_DISPLAY_MISSING unsigned int dummy1, dummy2; #endif treeline = 0.18 * labelheight * yscale * expand; labelline = 0.06 * labelheight * yscale * expand; linewidth = treeline; if (dotmatrix ) { for (i = 0; i <= 50; i++) { /* for fast circle calculations */ for (j = 0; j <= 50; j++){ rootmatrix[i][j] = (long)floor(sqrt((double)(i * i + j * j)) + 0.5);} } } switch (plotter) { case xpreview: #ifndef X_DISPLAY_MISSING XGetGeometry(display,mainwin, &DefaultRootWindow(display),&x,&y,&width,&height,&dummy1,&dummy2); XClearWindow(display,mainwin); #endif break; case tek: oldxhigh = -1.0; oldxlow = -1.0; oldyhigh = -1.0; oldylow = -1.0; nmoves = 0; /* DLS/JMH -- See function PLOT */ if (previewing) /* DLS/JMH */ printf("%c\f", escape); /* DLS/JMH */ else fprintf(plotfile, "%c\f", escape); break; case hp: fprintf(plotfile, "IN;SP1;VS10.0;\n"); break; case ray: treeline = 0.27 * labelheight * yscale * expand; linewidth = treeline; raylinewidth = treeline; if (grows == vertical) fprintf(plotfile, "plane backcolor 0 0 %2.4f 0 0 1\n", ymargin); else fprintf(plotfile, "plane backcolor 0 0 %2.4f 0 0 1\n", ymargin - ysize / (ntips - 1)); fprintf(plotfile, "\nname tree\n"); fprintf(plotfile, "grid 22 22 22\n"); break; case pov: treeline = 0.27 * labelheight * yscale * expand; linewidth = treeline; raylinewidth = treeline; fprintf(plotfile, "\n// First, the tree\n\n"); break; case vrml: vrmllinewidth = treeline; break; case pict: plotfile = freopen(pltfilename,"wb",plotfile); for (i=0;i<512;++i) putc('\000',plotfile); pictoutint(plotfile,1000); /* size...replaced later with seek */ pictoutint(plotfile,1); /* bbx0 */ pictoutint(plotfile,1); /* bby0 */ pictoutint(plotfile,612); /* bbx1 */ pictoutint(plotfile,792); /* bby1 */ fprintf(plotfile,"%c%c",0x11,0x01); /* version "1" (B&W) pict */ fprintf(plotfile,"%c%c%c",0xa0,0x00,0x82); fprintf(plotfile,"%c",1); /* clip rect */ pictoutint(plotfile,10); /* region size, bytes. */ pictoutint(plotfile,1); /* clip x0 */ pictoutint(plotfile,1); /* clip y0 */ pictoutint(plotfile,612); /* clip x1 */ pictoutint(plotfile,792); /* clip y1 */ bytewrite+=543; oldpictint = 0; pictint = (long)(linewidth + 0.5); if (pictint == 0) pictint = 1; picthi = (Char)(pictint / 256); pictlo = (Char)(pictint % 256); fprintf(plotfile, "\007%c%c%c%c", picthi, pictlo, picthi, pictlo); /* Set pen size for drawing tree. */ break; case bmp: plotfile = freopen(pltfilename,"wb",plotfile); write_bmp_header(plotfile, (int)(xsize*xunitspercm), (int)(ysize*yunitspercm)); byte_width = (int) ceil (xsize / 8.0); padded_width = ((byte_width + 3) / 4) * 4 ; full_pic = (byte *) Malloc ((padded_width *2) * (int) ysize) ; break ; case xbm: /* what a completely verbose data representation format! */ fprintf(plotfile, "#define drawgram_width %5ld\n", (long)(xunitspercm * xsize)); fprintf(plotfile, "#define drawgram_height %5ld\n", (long)(yunitspercm * ysize)); fprintf(plotfile, "static char drawgram_bits[] = {\n"); /*filesize := 53; */ break; case lw: /* write conforming postscript */ fprintf(plotfile,"%%!PS-Adobe-2.0\n"); fprintf(plotfile,"%%%%Title: Phylip Tree Output\n"); fprintf(plotfile,"%%%%DocumentFonts: (atend)\n"); fprintf(plotfile,"%%%%Pages: %d 1\n", ((int)((pagex-hpmargin-0.01)/(paperx-hpmargin))+1)* ((int)((pagey-vpmargin-0.01)/papery-vpmargin)+1)); fprintf(plotfile,"%%%%BoundingBox: 0 0 612 792\n"); fprintf(plotfile,"%%%%DocumentPaperSizes: Letter\n"); /* this may not be right */ fprintf(plotfile,"%%%%Orientation: Portrait\n"); fprintf(plotfile,"%%%%EndComments\n"); fprintf(plotfile,"/l {newpath moveto lineto stroke} def\n"); fprintf(plotfile,"%%%%EndProlog\n%%%%\n"); fprintf(plotfile,"%%%%Page: 1 1\n"); fprintf(plotfile,"%%%%PageBoundingBox: 0 0 %d %d\n", (int)(xunitspercm*paperx),(int)(yunitspercm*papery)); fprintf(plotfile,"%%%%PageFonts: (atend)\n%%%%BeginPageSetup\n"); fprintf(plotfile,"%%%%PaperSize: Letter\n"); fprintf(plotfile," 1 setlinecap \n 1 setlinejoin \n"); fprintf(plotfile, "%8.2f setlinewidth newpath \n", treeline); break; case idraw: fprintf(plotfile, "%%I Idraw 9 Grid 8 \n\n"); fprintf(plotfile,"%%%%Page: 1 1\n\n"); fprintf(plotfile,"Begin\n"); fprintf(plotfile,"%%I b u\n"); fprintf(plotfile,"%%I cfg u\n"); fprintf(plotfile,"%%I cbg u\n"); fprintf(plotfile,"%%I f u\n"); fprintf(plotfile,"%%I p u\n"); fprintf(plotfile,"%%I t\n"); fprintf(plotfile,"[ 0.679245 0 0 0.679245 0 0 ] concat\n"); fprintf(plotfile,"/originalCTM matrix currentmatrix def\n\n"); break; case ibm: #ifdef TURBOC initgraph(&GraphDriver,&HiMode,""); #endif #ifdef QUICKC setupgraphics(); #endif break; case mac: #ifdef MAC gfxmode(); pictint=(long)(linewidth + 0.5); #endif break; case houston: break; case decregis: oldx = (double) 300; oldy = (double) 1; nmoves = 0; if (previewing) printf("%c[2J%cPpW(I3);S(A[0,0][799,479]);S(I(W))S(E);S(C0);W(I(D))\n", escape,escape); else fprintf(plotfile, "%c[2J%cPpW(I3);S(A[0,0][799,479]);S(I(W))S(E);S(C0);W(I(D))\n", escape,escape); break; case epson: plotfile = freopen(pltfilename,"wb",plotfile); fprintf(plotfile, "\0333\030"); break; case oki: plotfile = freopen(pltfilename,"wb",plotfile); fprintf(plotfile, "\033%%9\020"); break; case citoh: plotfile = freopen(pltfilename,"wb",plotfile); fprintf(plotfile, "\033T16"); break; case toshiba: /* reopen in binary since we always need \n\r on the file */ /* and dos in text mode puts it, but unix does not */ plotfile = freopen(pltfilename,"wb",plotfile); fprintf(plotfile, "\033\032I\n\r\n\r"); fprintf(plotfile, "\033L06\n\r"); break; case pcl: plotfile = freopen(pltfilename,"wb",plotfile); if (hpresolution == 150 || hpresolution == 300) fprintf(plotfile, "\033*t%3ldR", hpresolution); else if (hpresolution == 75) fprintf(plotfile, "\033*t75R"); break; case pcx: plotfile = freopen(pltfilename,"wb",plotfile); fprintf(plotfile,"\012\003\001\001%c%c%c%c",0,0,0,0); /* Manufacturer version (1 byte) version (1 byte), encoding (1 byte), bits per pixel (1 byte), xmin (2 bytes) ymin (2 bytes), Version */ hres = strpwide; vres = (long)floor(yunitspercm * ysize + 0.5); fprintf(plotfile, "%c%c", (unsigned char)zlobyte(hres - 1), (unsigned char)upbyte(hres - 1)); /* Xmax */ fprintf(plotfile, "%c%c", (unsigned char)zlobyte(vres - 1), (unsigned char)upbyte(vres - 1)); /* Ymax */ fprintf(plotfile, "%c%c", (unsigned char)zlobyte(hres), (unsigned char)upbyte(hres)); /* Horizontal resolution */ fprintf(plotfile, "%c%c", (unsigned char)zlobyte(vres), (unsigned char)upbyte(vres)); /* Vertical resolution */ for (i = 1; i <= 48; i++) /* Color Map */ putc('\000', plotfile); putc('\000', plotfile); putc('\001', plotfile); /* Num Planes */ putc(hres / 8, plotfile); /* Bytes per line */ putc('\000',plotfile); for (i = 1; i <= 60; i++) /* Filler */ putc('\000',plotfile); break; case fig: fprintf(plotfile, "#FIG 2.0\n"); fprintf(plotfile, "80 2\n"); break; case gif: case other: break; default: /* case vrml not handled */ break; /* initialization code for a new plotter goes here */ } } /* initplotter */ void finishplotter() { int padded_width, byte_width; /* For bmp code */ switch (plotter) { case xpreview: #ifndef X_DISPLAY_MISSING plotter=oldplotter; redraw(NULL,NULL,NULL); XtAppMainLoop(appcontext); #endif break; case tek: if (previewing) { fflush(stdout); scanf("%*c%*[^\n]"); getchar(); printf("%c\f", escape); } else { putc('\n', plotfile); plot(penup, 1.0, 1.0); } break; case hp: plot(penup, 1.0, 1.0); fprintf(plotfile, "SP;\n"); break; case ray: fprintf(plotfile,"end\n\nobject treecolor tree\n"); fprintf(plotfile,"object namecolor species_names\n"); break; case pov: break; case pict: fprintf(plotfile,"%c%c%c%c%c",0xa0,0x00,0x82,0xff,0x00); bytewrite+=5; fseek(plotfile,512L,SEEK_SET); pictoutint(plotfile,bytewrite); break; case lw: fprintf(plotfile, "stroke showpage \n\n"); fprintf(plotfile,"%%%%PageTrailer\n"); fprintf(plotfile,"%%%%PageFonts: %s\n", (strcmp(fontname,"Hershey") == 0) ? "" : fontname); fprintf(plotfile,"%%%%Trailer\n"); fprintf(plotfile,"%%%%DocumentFonts: %s\n", (strcmp(fontname,"Hershey") == 0) ? "" : fontname); break; case idraw: fprintf(plotfile, "\nEnd %%I eop\n\n"); fprintf(plotfile, "showpage\n\n"); fprintf(plotfile, "%%%%Trailer\n\n"); fprintf(plotfile, "end\n"); break; case ibm: #ifdef TURBOC getchar(); restorecrtmode(); #endif #ifdef QUICKC getchar(); _clearscreen(_GCLEARSCREEN); _setvideomode(_DEFAULTMODE); #endif break; case mac: #ifdef MAC plotter=oldplotter; eventloop(); #endif break; case houston: break; case decregis: plot(penup, 1.0, 1.0); if (previewing) printf("%c\\", escape); else fprintf(plotfile, "%c\\", escape); if (previewing) { getchar(); printf("%c[2J",escape); } break; case epson: fprintf(plotfile, "\0333$"); break; case oki: /* blank case */ break; case citoh: fprintf(plotfile, "\033A"); break; case toshiba: fprintf(plotfile, "\033\032I\n\r"); break; case pcl: fprintf(plotfile, "\033*rB"); /* Exit graphics mode */ putc('\f', plotfile); /* just to make sure? */ break; case pcx: /* blank case */ break; case bmp: byte_width = (int) ceil (xsize / 8.0); padded_width = ((byte_width + 3) / 4) * 4 ; turn_rows (full_pic, padded_width, (int) ysize); write_full_pic(full_pic, total_bytes); free (full_pic) ; break; case xbm: fprintf(plotfile, "}\n"); break; case fig: /* blank case */ break; case gif: case other: break; default: /* case vrml not handled */ break; /* termination code for a new plotter goes here */ } } /* finishplotter */ Local long SFactor() { /* the dot-skip is resolution-independent. */ /* this makes all the point-skip instructions skip the same # of dots. */ long Result = 0; if (hpresolution == 150) Result = 2; if (hpresolution == 300) Result = 1; if (hpresolution == 75) return 4; return Result; } /* SFactor */ long DigitsInt(long x) { if (x < 10) return 1; else if (x >= 10 && x < 100) return 2; else return 3; } /* DigistInt */ Local boolean IsColumnEmpty(striparray *mystripe, long pos, long deep) { long j; boolean ok; ok = true; j = 1; while (ok && j <= deep) { ok = (ok && mystripe[j - 1][pos - 1] == null); j++; } return ok; } /* IsColumnEmpty */ void Skip(long Amount) { /* assume we're not in gfx mode. */ fprintf(plotfile, "\033&f1S"); /* Pop the graphics cursor */ #ifdef MAC fprintf(plotfile, "\033*p+%*ldX", (int)DigitsInt(Amount * SFactor()), Amount * SFactor()); #endif #ifndef MAC fprintf(plotfile, "\033*p+%*ldX", (int)DigitsInt(Amount * SFactor()), Amount * SFactor()); #endif fprintf(plotfile, "\033&f0S"); /* Push the cursor to new location */ filesize += 15 + DigitsInt(Amount * SFactor()); } /* Skip */ Local long FirstBlack(striparray *mystripe, long startpos, long deep) { /* returns, given a strip and a position, next x with some y's nonzero */ long i; boolean columnempty; i = startpos; columnempty = true; while (columnempty && i < strpwide / 8) { columnempty = (columnempty && IsColumnEmpty(mystripe, i,deep)); if (columnempty) i++; } return i; } /* FirstBlack */ Local long FirstWhite(striparray *mystripe, long startpos, long deep) { /* returns, given a strip and a position, the next x with all y's zero */ long i; boolean columnempty; i = startpos; columnempty = false; while (!columnempty && i < strpwide / 8) { columnempty = IsColumnEmpty(mystripe, i,deep); if (!columnempty) i++; } return i; } /* FirstWhite */ Local boolean IsBlankStrip(striparray *mystripe, long deep) { long i, j; boolean ok; ok = true; i = 1; while (ok && i <= strpwide / 8) { for (j = 0; j < (deep); j++) ok = (ok && mystripe[j][i - 1] == '\0'); i++; } return ok; } /* IsBlankStrip */ void striprint(long div, long deep) { long i, j, t, x, theend, width; unsigned char counter; boolean done; done = false; width = strpwide; if (plotter != pcx && plotter != pcl && plotter != bmp && plotter != xbm) { while (!done) { for (i = 0; i < div; i++) done = done || (stripe[i] && (stripe[i][width - 1] != null)); if (!done) width--; done = (done || width == 0); } } switch (plotter) { case epson: if (!empty) { fprintf(plotfile, "\033L%c%c", (char) width & 255, (char) width / 256); for (i = 0; i < width; i++) putc(stripe[0][i], plotfile); filesize += width + 4; } putc('\n', plotfile); putc('\r', plotfile); break; case oki: if (!empty) { fprintf(plotfile, "\033%%1%c%c", (char) width / 128, (char) width & 127); for (i = 0; i < width; i++) putc(stripe[0][i], plotfile); filesize += width + 5; } putc('\n', plotfile); putc('\r', plotfile); break; case citoh: if (!empty) { fprintf(plotfile, "\033S%04ld",width); for (i = 0; i < width; i++) putc(stripe[0][i], plotfile); filesize += width + 6; } putc('\n', plotfile); putc('\r', plotfile); break; case toshiba: if (!empty) { for (i = 0; i < width; i++) { for (j = 0; j <= 3; j++) stripe[j][i] += 64; } fprintf(plotfile, "\033;%04ld",width); for (i = 0; i < width; i++) fprintf(plotfile, "%c%c%c%c", stripe[0][i], stripe[1][i], stripe[2][i], stripe[3][i]); filesize += width * 4 + 6; } putc('\n', plotfile); putc('\r', plotfile); break; case pcx: width = strpwide / 8; for (j = 0; j < div; j++) { t = 1; while (1) { i = 0; /* i == RLE count ???? */ while ((stripe[j][t + i - 1]) == (stripe[j][t + i]) && t + i < width && i < 63) i++; if (i > 0) { counter = 192; counter += i; putc(counter, plotfile); putc(255 - stripe[j][t - 1], plotfile); t += i; filesize += 2; } else { if (255 - (stripe[j][t - 1] & 255) >= 192) { putc(193, plotfile); filesize++; } putc(255 - stripe[j][t - 1], plotfile); t++; filesize++; } if (t >width) break; } } break; case pcl: width = strpwide / 8; if (IsBlankStrip(stripe,deep)) { #ifdef MAC fprintf(plotfile, "\033&f1S\033*p0X\033*p+%*ldY\033&f0S", (int)DigitsInt(deep * SFactor()), deep * SFactor()); #endif #ifndef MAC fprintf(plotfile, "\033&f1S\033*p0X\033*p+%*dY\033&f0S", (int)DigitsInt(deep * SFactor()), (int) (deep * SFactor())); #endif filesize += DEFAULT_STRIPE_HEIGHT + DigitsInt(deep * SFactor()); } else { /* plotting the actual strip as bitmap data */ x = 1; theend = 1; while (x < width) { x = FirstBlack(stripe, x,deep); /* all-black strip is now */ Skip((x - theend - 1) * 8); /* x..theend */ theend = FirstWhite(stripe, x,deep) - 1;/* like lastblack */ fprintf(plotfile, "\033*r1A"); /* enter gfx mode */ for (j = 0; j < div; j++) { #ifdef MAC fprintf(plotfile, "\033*b%*ldW", (int)DigitsInt(theend - x + 1), theend - x + 1); #endif #ifndef MAC fprintf(plotfile, "\033*b%*dW", (int)DigitsInt(theend - x + 1), (int) (theend - x + 1)); #endif /* dump theend-x+1 bytes */ for (t = x - 1; t < theend; t++) putc(stripe[j][t], plotfile); filesize += theend - x + DigitsInt(theend - x + 1) + 5; } fprintf(plotfile, "\033*rB"); /* end gfx mode */ Skip((theend - x + 1) * 8); filesize += 9; x = theend + 1; } fprintf(plotfile, "\033&f1S"); /* Pop cursor */ #ifdef MAC fprintf(plotfile, "\033*p0X\033*p+%*ldY", (int)DigitsInt(deep * SFactor()), deep * SFactor()); #endif #ifndef MAC fprintf(plotfile, "\033*p0X\033*p+%*dY", (int)DigitsInt(deep * SFactor()), (int) (deep * SFactor())); #endif filesize += DEFAULT_STRIPE_HEIGHT + DigitsInt(deep * SFactor()); fprintf(plotfile, "\033&f0S"); /* Push cursor */ } break; /* case for hpcl code */ case bmp: width = ((strpwide -1) / 8) +1; translate_stripe_to_bmp (&stripe, full_pic, increment++, width, div, &total_bytes) ; break; /* case for bmp code */ case xbm: x = 0; /* count up # of bytes so we can put returns. */ width = ((strpwide -1) / 8) +1; for (j = 0; j < div; j++) { for (i = 0; i < width; i++) { fprintf(plotfile, "0x%02x,",(unsigned char)stripe[j][i]); filesize += 5; x++; if ((x % 15) == 0) { putc('\n', plotfile); filesize++; } } } putc('\n',plotfile); break; case lw: case hp: case xpreview: case winpreview: case tek: case ibm: case mac: case houston: case decregis: case fig: case pict: case ray: case pov: case gif: case idraw: case other: break; default: /* case vrml not handled */ break; /* graphics print code for a new printer goes here */ } } /* striprint */ #ifdef QUICKC void setupgraphics() { _getvideoconfig(&myscreen); #ifndef WATCOM switch(myscreen.adapter){ case _CGA: case _OCGA: _setvideomode(_HRESBW); break; case _EGA: case _OEGA: _setvideomode(_ERESNOCOLOR); case _VGA: case _OVGA: case _MCGA: _setvideomode(_VRES2COLOR); break; case _HGC: _setvideomode(_HERCMONO); break; default: printf("Your display hardware is unsupported by this program.\n"); break; } #endif #ifdef WATCOM switch(myscreen.adapter){ case _VGA: case _SVGA: _setvideomode(_VRES16COLOR); break; case _MCGA: _setvideomode(_MRES256COLOR); break; case _EGA: _setvideomode(_ERESNOCOLOR); break; case _CGA: _setvideomode(_MRES4COLOR); break; case _HERCULES: _setvideomode(_HERCMONO); break; default: printf("Your display hardware is unsupported by this program.\n"); exxit(-1); break; } #endif _getvideoconfig(&myscreen); _setlinestyle(0xffff); xunitspercm=myscreen.numxpixels / 25; yunitspercm=myscreen.numypixels / 17.5; xsize = 25.0; ysize = 17.5; } /* setupgraphics */ #endif void loadfont(short *font, char *application) { long i, charstart = 0, dummy; Char ch = 'A'; AjPRegexp intexp = NULL; AjPStr rdline = NULL; AjPFile fontfile; AjPStr fontfilename; AjPStr installdir = NULL; ajint inum; AjPStr token = NULL; if (!intexp) intexp = ajRegCompC("[0-9-]+"); i=0; fontfilename = ajAcdGetString("fontfile"); ajStrAssignS(&installdir, ajNamValueInstalldir()); { ajStrAppendC(&installdir, "/share/PHYLIPNEW/data/"); ajFilenameReplacePathS(&fontfilename, installdir); } fontfile = ajFileNewInNameS(fontfilename); if(!fontfile) return; ajReadlineTrim(fontfile, &rdline); while (!((!ajStrGetLen(rdline) || ajFileIsEof(fontfile)) || ch == ' ')) { charstart = i + 1; ajDebug("Next line charstart:%d '%S'\n", charstart, rdline); ajFmtScanS(rdline, "%c%c%ld%hd%hd", &ch, &ch, &dummy, &font[charstart + 1], &font[charstart + 2]); font[charstart] = ch; ajDebug("read [%d] '%d' [%d] '%d' [%d '%d'\n", i, font[i], (i+1), font[i+1], (i+2), font[i+2]); i = charstart + 3; do { if ((i - charstart - 3) % 10 == 0) { ajReadlineTrim(fontfile, &rdline); ajDebug("More on next line at i: %d charstart: %d '%S'\n", i, charstart, rdline); } i++; if (!ajRegExec(intexp, rdline)) ajDie("bad format in fontfile %F at '%S'", fontfile, rdline); ajRegSubI(intexp, 0, &token); ajStrToInt(token, &inum); font[i - 1] = inum; ajRegPost(intexp, &token); ajStrAssignS(&rdline, token); ajDebug("next [%d] '%d'\n", (i-1), font[i-1]); } while (abs(font[i - 1]) < 10000); ajDebug("Done at [%d] '%d'\n", (i-1), font[i-1]); font[charstart - 1] = i + 1; ajDebug("last [%d] '%d'\n", (charstart-1), font[charstart-1]); ajReadlineTrim(fontfile, &rdline); ajDebug("Start next line at font[%d-1] %d ch:'%c' EOF: %B\n", i, font[i-1], ch, ajFileIsEof(fontfile)); } font[charstart - 1] = 0; ajFileClose(&fontfile); ajRegFree(&intexp); ajStrDel(&rdline); ajStrDel(&token); } /* loadfont */ long showrayparms(long treecolor, long namecolor, long backcolor, long bottomcolor, long rx, long ry) { long i, loopcount; Char ch,input[32]; long numtochange; if (previewer == tek) printf("%c\f", escape); else { for (i = 1; i <= 24; i++) putchar('\n'); } if (plotter == ray) { printf("Settings for Rayshade file: \n\n"); printf(" (1) Tree color: %.10s\n",colors[treecolor-1].name); printf(" (2) Species names color: %.10s\n",colors[namecolor-1].name); printf(" (3) Background color: %.10s\n",colors[backcolor-1].name); printf(" (4) Resolution: %2ld X %2ld\n\n",rx,ry); } else if (plotter == pov) { printf("Settings for POVray file: \n\n"); printf(" (1) Tree color: %.10s\n",colors[treecolor-1].name); printf(" (2) Species names color: %.10s\n",colors[namecolor-1].name); printf(" (3) Background color: %.10s\n",colors[backcolor-1].name); printf(" (4) Bottom plane: %.10s\n", bottomcolor == NO_PLANE ? "(none)\0" : colors[bottomcolor-1].name); } printf(" Do you want to accept these? (Yes or No)\n"); loopcount = 0; for (;;) { printf(" Type Y or N or the number (1-4) of the one to change: \n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); numtochange=atoi(input); uppercase(&input[0]); ch=input[0]; if (ch == 'Y' || ch == 'N' || (numtochange >= 1 && numtochange <= 4)) break; countup(&loopcount, 10); } return (ch == 'Y') ? -1 : numtochange; } /* showrayparms */ void getrayparms(long *treecolor, long *namecolor, long *backcolor, long *bottomcolor, long *rx,long *ry, long numtochange) { Char ch; long i, loopcount; if (numtochange == 0) { loopcount = 0; do { printf(" Type the number of one that you want to change (1-4):\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%ld%*[^\n]", &numtochange); getchar(); countup(&loopcount, 10); } while (numtochange < 1 || numtochange > 10); } switch (numtochange) { case 1: printf("\nWhich of these colors will the tree be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*treecolor) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*treecolor) = i; return; } } countup(&loopcount, 10); } while ((*treecolor) == 0); break; case 2: printf("\nWhich of these colors will the species names be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*namecolor) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*namecolor) = i; return; } } countup(&loopcount, 10); } while ((*namecolor) == 0); break; case 3: printf("\nWhich of these colors will the background be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*backcolor) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*backcolor) = i; return; } } countup(&loopcount, 10); } while ((*backcolor) == 0); break; case 4: /* Dimensions for rayshade, bottom plane for povray */ if (plotter == pov) { printf("\nWhich of these colors will the bottom plane be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, Violet, or None (no plane)\n"); printf(" (W, R, O, Y, G, B, V, or N)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); /* If the user doesn't want a bottom plane. . . */ if (ch == 'N') { (*bottomcolor) = NO_PLANE; return; } else { (*bottomcolor) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*bottomcolor) = i; return; } } } countup(&loopcount, 10); } while ((*bottomcolor) == 0); } else if (plotter == ray) { printf("\nEnter the X resolution:\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%ld%*[^\n]", rx); getchar(); printf("Enter the Y resolution:\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%ld%*[^\n]",ry); getchar(); } break; } } /* getrayparms */ long showvrmlparms(long vrmltreecolor, long vrmlnamecolor, long vrmlskycolornear, long vrmlskycolorfar, long vrmlgroundcolornear) { long i, loopcount; Char ch,input[32]; long numtochange; if (previewer == tek) printf("%c\f", escape); else { for (i = 1; i <= 24; i++) putchar('\n'); } printf("Settings for VRML file: \n\n"); printf(" (1) Tree color: %.10s\n",colors[vrmltreecolor-1].name); printf(" (2) Species names color: %.10s\n",colors[vrmlnamecolor-1].name); printf(" (3) Horizon color: %.10s\n",colors[vrmlskycolorfar-1].name); printf(" (4) Zenith color: %.10s\n",colors[vrmlskycolornear-1].name); printf(" (5) Ground color: %.10s\n",colors[vrmlgroundcolornear-1].name); printf(" Do you want to accept these? (Yes or No)\n"); loopcount = 0; for (;;) { printf(" Type Y or N or the number (1-5) of the one to change: \n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); numtochange=atoi(input); uppercase(&input[0]); ch=input[0]; if (ch == 'Y' || ch == 'N' || (numtochange >= 1 && numtochange <= 5)) break; countup(&loopcount, 10); } return (ch == 'Y') ? -1 : numtochange; } /* showvrmlparms */ void getvrmlparms(long *vrmltreecolor, long *vrmlnamecolor, long *vrmlskycolornear, long *vrmlskycolorfar, long *vrmlgroundcolornear, long *vrmlgroundcolorfar, long numtochange) { Char ch; long i, loopcount; if (numtochange == 0) { loopcount = 0; do { printf(" Type the number of one that you want to change (1-4):\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%ld%*[^\n]", &numtochange); getchar(); countup(&loopcount, 10); } while (numtochange < 1 || numtochange > 10); } switch (numtochange) { case 1: printf("\nWhich of these colors will the tree be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*vrmltreecolor) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*vrmltreecolor) = i; return; } } countup(&loopcount, 10); } while ((*vrmltreecolor) == 0); break; case 2: printf("\nWhich of these colors will the species names be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*vrmlnamecolor) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*vrmlnamecolor) = i; return; } } countup(&loopcount, 10); } while ((*vrmlnamecolor) == 0); break; case 3: printf("\nWhich of these colors will the horizon be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*vrmlskycolorfar) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*vrmlskycolorfar) = i; return; } } countup(&loopcount, 10); } while ((*vrmlskycolorfar) == 0); break; case 4: printf("\nWhich of these colors will the zenith be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*vrmlskycolornear) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*vrmlskycolornear) = i; return; } } countup(&loopcount, 10); } while ((*vrmlskycolornear) == 0); break; case 5: printf("\nWhich of these colors will the ground be?:\n"); printf(" White, Red, Orange, Yellow, Green, Blue, or Violet\n"); printf(" (W, R, O, Y, G, B, or V)\n"); loopcount = 0; do { printf(" Choose one: \n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); (*vrmlgroundcolornear) = 0; for (i = 1; i <= 7; i++) { if (ch == colors[i - 1].name[0]) { (*vrmlgroundcolornear) = i; (*vrmlgroundcolorfar) = i; return; } } countup(&loopcount, 10); } while ((*vrmlgroundcolornear) == 0); break; } } /* gevrmlparms */ void plotrparms(long ntips) { /* set up initial characteristics of plotter or printer */ long treecolor, namecolor, backcolor, bottomcolor, rayresx, rayresy; double viewangle; long n, loopcount; double xsizehold, ysizehold; xsizehold = xsize; ysizehold = ysize; penchange = no; xcorner = 0.0; ycorner = 0.0; if (dotmatrix && (!previewing)) strpdiv = 1; switch (plotter) { case ray: penchange = yes; xunitspercm = 1.0; yunitspercm = 1.0; xsize = 10.0; ysize = 10.0; rayresx = 512; rayresy = 512; treecolor = 6; namecolor = 4; backcolor = 1; /* MSVC gave warning that bottomcolor was uninitialized. Unsure what this should be */ bottomcolor = 1; loopcount = 0; do { n=showrayparms(treecolor,namecolor,backcolor,bottomcolor,rayresx,rayresy); if (n != -1) getrayparms(&treecolor,&namecolor,&backcolor,&bottomcolor,&rayresx,&rayresy,n); countup(&loopcount, 10); } while (n != -1); xsize = rayresx; ysize = rayresy; fprintf(plotfile, "report verbose\n"); fprintf(plotfile, "screen %ld %ld\n", rayresx, rayresy); if (ysize >= xsize) { viewangle = 2 * atan(ysize / (2 * 1.21 * xsize)) * 180 / pi; fprintf(plotfile, "fov 45 %3.1f\n", viewangle); fprintf(plotfile, "light 1 point 0 %6.2f %6.2f\n", -xsize * 1.8, xsize * 1.5); fprintf(plotfile, "eyep %6.2f %6.2f %6.2f\n", xsize * 0.5, -xsize * 1.21, ysize * 0.55); } else { viewangle = 2 * atan(xsize / (2 * 1.21 * ysize)) * 180 / pi; fprintf(plotfile, "fov %3.1f 45\n", viewangle); fprintf(plotfile, "light 1 point 0 %6.2f %6.2f\n", -ysize * 1.8, ysize * 1.5); fprintf(plotfile, "eyep %6.2f %6.2f %6.2f\n", xsize * 0.5, -ysize * 1.21, ysize * 0.55); } fprintf(plotfile, "lookp %6.2f 0 %6.2f\n", xsize * 0.5, ysize * 0.5); fprintf(plotfile, "/* %.10s */\n", colors[treecolor - 1].name); fprintf(plotfile, "surface treecolor diffuse %5.2f%5.2f%5.2f specular 1 1 1 specpow 30\n", colors[treecolor - 1].red, colors[treecolor - 1].green, colors[treecolor - 1].blue); fprintf(plotfile, "/* %.10s */\n", colors[namecolor - 1].name); fprintf(plotfile, "surface namecolor diffuse %5.2f%5.2f%5.2f specular 1 1 1 specpow 30\n", colors[namecolor - 1].red, colors[namecolor - 1].green, colors[namecolor - 1].blue); fprintf(plotfile, "/* %.10s */\n", colors[backcolor - 1].name); fprintf(plotfile, "surface backcolor diffuse %5.2f%5.2f%5.2f\n\n", colors[backcolor - 1].red, colors[backcolor - 1].green, colors[backcolor - 1].blue); break; case pov: penchange = yes; xunitspercm = 1.0; yunitspercm = 1.0; xsize = 10.0; ysize = 10.0; rayresx = 512; rayresy = 512; treecolor = 6; namecolor = 4; backcolor = 1; bottomcolor = 1; loopcount = 0; do { n=showrayparms(treecolor,namecolor,backcolor,bottomcolor,rayresx,rayresy); if (n != -1) getrayparms(&treecolor,&namecolor,&backcolor,&bottomcolor,&rayresx,&rayresy,n); countup(&loopcount, 10); } while (n != -1); xsize = rayresx; ysize = rayresy; fprintf(plotfile, "// Declare the colors\n\n"); fprintf(plotfile, "#declare C_Tree = color rgb<%6.2f, %6.2f, %6.2f>\n", colors[treecolor-1].red, colors[treecolor-1].green, colors[treecolor-1].blue); fprintf(plotfile, "#declare C_Name = color rgb<%6.2f, %6.2f, %6.2f>\n\n", colors[namecolor-1].red, colors[namecolor-1].green, colors[namecolor-1].blue); fprintf(plotfile, "// Declare the textures\n\n"); fprintf(plotfile, "#declare %s = texture { pigment { C_Tree }\n", TREE_TEXTURE); fprintf(plotfile, "\t\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "#declare %s = texture { pigment { C_Name }\n", NAME_TEXTURE); fprintf(plotfile, "\t\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "\n#global_settings { assumed_gamma 2.2 }\n\n"); fprintf(plotfile, "light_source { <0, %6.2f, %6.2f> color <1,1,1> }\n\n", xsize * 1.8, xsize * 1.5); /* The camera location */ fprintf(plotfile, "camera {\n"); if (ysize >= xsize) { fprintf(plotfile, "\tlocation <%6.2f, %6.2f, %6.2f>\n", -xsize * 0.5, -xsize * 1.21, ysize * 0.55); } else { fprintf(plotfile, "\tlocation <%6.2f, %6.2f, %6.2f>\n", -xsize * 0.5, -ysize * 1.21, ysize * 0.55); } fprintf(plotfile, "\tlook_at <%6.2f, 0, %6.2f>\n", -xsize * 0.5, ysize * 0.5); /* Handily, we can rotate since the rayshade paradigm ain't exactly congruent to the povray paradigm */ fprintf(plotfile, "\trotate z*180\n"); fprintf(plotfile, "}\n\n"); fprintf(plotfile, "#background { color rgb <%6.2f, %6.2f, %6.2f> }\n\n", colors[backcolor-1].red, colors[backcolor-1].green, colors[backcolor-1].blue); if (bottomcolor != NO_PLANE) { /* The user wants a plane on the bottom... */ if (grows == vertical) fprintf(plotfile, "plane { z, %2.4f\n", 0.0 /*ymargin*/); else fprintf(plotfile, "plane { z, %2.4f\n", ymargin - ysize / (ntips - 1)); fprintf(plotfile, "\tpigment {color rgb <%6.2f, %6.2f, %6.2f> }}\n\n", colors[bottomcolor-1].red, colors[bottomcolor-1].green, colors[bottomcolor-1].blue); } break; case vrml: #ifndef MAC penchange = yes; xunitspercm = 1.0; yunitspercm = 1.0; xsize = 10.0; ysize = 10.0; vrmlplotcolor = treecolor; loopcount = 0; do { n=showvrmlparms(vrmltreecolor, vrmlnamecolor, vrmlskycolornear, vrmlskycolorfar, vrmlgroundcolornear); if (n != -1) getvrmlparms(&vrmltreecolor, &vrmlnamecolor, &vrmlskycolornear, &vrmlskycolorfar, &vrmlgroundcolornear, &vrmlgroundcolorfar, n); countup(&loopcount, 10); } while (n != -1); break; #endif case pict: strcpy(fontname,"Times"); penchange = yes; xunitspercm = 28.346456693; yunitspercm = 28.346456693; /*7.5 x 10 inch default PICT page size*/ xsize = 19.05; ysize = 25.40; break; case lw: penchange = yes; xunitspercm = 28.346456693; yunitspercm = 28.346456693; xsize = pagex; ysize = pagey; break; case idraw: penchange = yes; xunitspercm = 28.346456693; yunitspercm = 28.346456693; xsize = 21.59; ysize = 27.94; break; case hp: penchange = no; xunitspercm = 400.0; yunitspercm = 400.0; xsize = 24.0; ysize = 18.0; break; #ifndef X_DISPLAY_MISSING case xpreview: xunitspercm = 39.37; yunitspercm = 39.37; xsize = width * 0.0254; ysize = height * 0.0254; break; #endif #ifdef WIN32 case winpreview: penchange = yes; xunitspercm = 28.346456693; yunitspercm = 28.346456693; xsize = winwidth / xunitspercm; ysize = winheight / yunitspercm; break; #endif case tek: xunitspercm = 50.0; yunitspercm = 50.0; xsize = 20.46; ysize = 15.6; break; case ibm: #ifdef TURBOC GraphDriver = 0; detectgraph(&GraphDriver,&GraphMode); getmoderange(GraphDriver,&LoMode,&HiMode); initgraph(&GraphDriver,&HiMode,""); xunitspercm = getmaxx()/25; yunitspercm = getmaxy() / 17.5; restorecrtmode(); xsize = 25.0; ysize = 17.5; #endif #ifdef QUICKC setupgraphics(); #endif break; case mac: penchange = yes; penchange = yes; xunitspercm = 28.346456693; yunitspercm = 28.346456693; xsize = winwidth / xunitspercm; ysize = winheight / yunitspercm; break; case houston: penchange = yes; xunitspercm = 100.0; yunitspercm = 100.0; xsize = 24.5; ysize = 17.5; break; case decregis: xunitspercm = 30.0; yunitspercm = 30.0; xsize = 25.0; ysize = 15.0; break; case epson: penchange = yes; xunitspercm = 47.244; yunitspercm = 28.346; xsize = 18.70; ysize = 22.0; strpwide = 960; strpdeep = 8; strpdiv = 1; break; case oki: penchange = yes; xunitspercm = 56.692; yunitspercm = 28.346; xsize = 19.0; ysize = 22.0; strpwide = 1100; strpdeep = 8; strpdiv = 1; break; case citoh: penchange = yes; xunitspercm = 28.346; yunitspercm = 28.346; xsize = 22.3; ysize = 26.0; strpwide = 640; strpdeep = 8; strpdiv = 1; break; case toshiba: penchange = yes; xunitspercm = 70.866; yunitspercm = 70.866; xsize = 19.0; ysize = 25.0; strpwide = 1350; strpdeep = 24; strpdiv = 4; break; case pcl: penchange = yes; xsize = 21.59; ysize = 27.94; xunitspercm = 118.11023622; /* 300 DPI = 118.1 DPC */ yunitspercm = 118.11023622; strpwide = 2550; /* 8.5 * 300 DPI */ strpdeep = DEFAULT_STRIPE_HEIGHT; /* height of the strip */ strpdiv = DEFAULT_STRIPE_HEIGHT; /* in this case == strpdeep */ /* this is information for 300 DPI resolution */ switch (hpresolution) { case 75: strpwide /= 4; xunitspercm /= 4.0; yunitspercm /= 4.0; break; case 150: strpwide /= 2; xunitspercm /= 2.0; yunitspercm /= 2.0; break; case 300: break; } break; case bmp: /* since it's resolution dependent, make 1x1 pixels */ penchange = yes; /* per square cm for easier math. */ xunitspercm = 1.0; yunitspercm = 1.0; xsize = userxsize / xunitspercm; ysize = userysize / yunitspercm; strpdeep = DEFAULT_STRIPE_HEIGHT; strpdiv = DEFAULT_STRIPE_HEIGHT; strpwide = (long)(xsize * xunitspercm); break; case xbm: /* since it's resolution dependent, make 1x1 pixels */ penchange = yes; /* per square cm for easier math. */ xunitspercm = 1.0; yunitspercm = 1.0; xsize = userxsize / xunitspercm; ysize = userysize / yunitspercm; strpdeep = 10; strpdiv = 10; strpwide = (long)(xsize*xunitspercm); break; case pcx: penchange = yes; xsize = 21.16; ysize = 15.88; strpdeep = 10; strpdiv = 10; xunitspercm = strpwide / xsize; switch (resopts) { case 1: strpwide = 640; yunitspercm = 350 / ysize; break; case 2: strpwide = 800; yunitspercm = 600 / ysize; break; case 3: strpwide = 1024; yunitspercm = 768 / ysize; break; } break; case fig: penchange = yes; xunitspercm = 31.011; yunitspercm = 29.78; xsize = 25.4; ysize = 20.32; break; case gif: case other: break; default: break; /* initial parameter settings for a new plotter go here */ } if (xsizehold != 0.0 && ysizehold != 0.0) { xmargin = xmargin * xsize / xsizehold; ymargin = ymargin * ysize / ysizehold; } if (previewing) return; } /* plotrparms */ void getplotter(Char ch) { switch (ch) { case 'L': plotter = lw; strcpy(fontname, "Times-Roman"); break; case 'A': plotter = idraw; strcpy(fontname, "Times-Bold"); break; case 'M': plotter = pict; strcpy(fontname, "Times"); break; case 'R': plotter = ray; strcpy(fontname, "Hershey"); break; case 'V': plotter = pov; strcpy(fontname, "Hershey"); break; case 'Z': plotter = vrml; strcpy(fontname, "Hershey"); break; case 'J': case 'S': case 'Y': plotter = pcl; strcpy(fontname, "Hershey"); /* following pcl init code copied here from plotrparms */ xunitspercm = 118.11023622; /* 300 DPI = 118.1 DPC */ yunitspercm = 118.11023622; strpwide = 2550; /* 8.5 * 300 DPI */ strpdeep = DEFAULT_STRIPE_HEIGHT; /* height of the strip */ strpdiv = DEFAULT_STRIPE_HEIGHT; /* in this case == strpdeep */ /* this is information for 300 DPI resolution */ switch (ch) { case 'J': hpresolution = 75; strpwide /= 4; xunitspercm /= 4.0; yunitspercm /= 4.0; break; case 'S': hpresolution = 150; strpwide /= 2; xunitspercm /= 2.0; yunitspercm /= 2.0; break; case 'Y': hpresolution = 300; break; default: break; } break; case 'K': plotter = tek; strcpy(fontname, "Hershey"); break; case 'H': plotter = hp; strcpy(fontname, "Hershey"); break; case 'I': plotter = ibm; strcpy(fontname, "Hershey"); break; case 'D': plotter = decregis; strcpy(fontname, "Hershey"); break; case 'B': plotter = houston; strcpy(fontname, "Hershey"); break; case 'E': plotter = epson; strcpy(fontname, "Hershey"); break; case 'C': plotter = citoh; strcpy(fontname, "Hershey"); break; case 'O': plotter = oki; strcpy(fontname, "Hershey"); break; case 'T': plotter = toshiba; strcpy(fontname, "Hershey"); break; case 'N': case 'P': case 'Q': plotter = pcx; strcpy(fontname, "Hershey"); switch (ch) { case 'N': strpwide = 640; yunitspercm = 350 / ysize; resopts = 1; break; case 'P': strpwide = 800; yunitspercm = 600 / ysize; resopts = 2; break; case 'Q': strpwide = 1024; yunitspercm = 768 / ysize; resopts = 3; break; } break; case 'W': plotter = bmp; strcpy(fontname, "Hershey"); /* printf("Please select the MS-Windows bitmap file resolution\n"); printf("X resolution?\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%lf%*[^\n]", &userxsize); getchar(); printf("Y resolution?\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%lf%*[^\n]", &userysize); getchar(); xunitspercm = 1.0; yunitspercm = 1.0; Assuming existing reasonable margin values, set the margins to be the same as those in the previous output mode/resolution. This corrects the problem of the tree being hard up against the border when large resolutions are entered. xmargin = userxsize / xsize * xmargin; ymargin = userysize / ysize * ymargin; xsize = userxsize; ysize = userysize; strpdeep = DEFAULT_STRIPE_HEIGHT; strpdiv = DEFAULT_STRIPE_HEIGHT; strpwide = (long)xsize; break; */ case 'X': plotter = xbm; strcpy(fontname, "Hershey"); /* printf("Please select the X-bitmap file resolution\n"); printf("X resolution?\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%lf%*[^\n]", &userxsize); getchar(); printf("Y resolution?\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%lf%*[^\n]", &userysize); getchar(); xunitspercm = 1.0; yunitspercm = 1.0;*/ /* Assuming existing reasonable margin values, set the margins to be the same as those in the previous output mode/resolution. This corrects the problem of the tree being hard up against the border when large resolutions are entered. */ /* xmargin = userxsize / xsize * xmargin; ymargin = userysize / ysize * ymargin; xsize = userxsize; ysize = userysize; strpdeep = DEFAULT_STRIPE_HEIGHT; strpdiv = DEFAULT_STRIPE_HEIGHT; strpwide = (long)xsize; */ break; case 'F': plotter = fig; strcpy(fontname, "Times-Roman"); break; case 'U': plotter = other; break; } dotmatrix = (plotter == epson || plotter == oki || plotter == citoh || plotter == toshiba || plotter == pcx || plotter == pcl || plotter == xbm || plotter == bmp); } /* getplotter */ void changepen(pentype pen) { Char picthi, pictlo; long pictint; lastpen = pen; switch (pen) { case treepen: linewidth = treeline; if (plotter == hp) fprintf(plotfile, "SP1;\n"); if (plotter == lw) { fprintf(plotfile, "stroke %8.2f setlinewidth \n", treeline); fprintf(plotfile, " 1 setlinecap 1 setlinejoin \n"); } #ifdef WIN32 if (plotter == winpreview) SelectObject(hdc, hPenTree); #endif break; case labelpen: linewidth = labelline; if (plotter == hp) fprintf(plotfile, "SP2;\n"); if (plotter == lw) { fprintf(plotfile, " stroke%8.2f setlinewidth \n", labelline); fprintf(plotfile, "1 setlinecap 1 setlinejoin \n"); } #ifdef WIN32 if (plotter == winpreview) SelectObject(hdc, hPenLabel); #endif break; } #ifdef MAC if (plotter == mac){ pictint = ( long)(linewidth + 0.5); if (pictint ==0) pictint = 1; } #endif if (plotter != pict) return; pictint = ( long)(linewidth + 0.5); if (pictint == 0) pictint = 1; picthi = (Char)(pictint / 256); pictlo = (Char)(pictint & 255); fprintf(plotfile, "\007%c%c%c%c", picthi, pictlo, picthi, pictlo); bytewrite += 5; } /* changepen */ int readafmfile(char *filename, short *metric) { char line[256], word1[100], word2[100]; int scanned = 1, nmetrics=0, inmetrics, charnum, charlen, i, capheight=0; FILE *fp; fp = fopen(filename,"r"); if (!fp) return 0; inmetrics = 0; for (i=0;i<256;i++){ metric[i] = (short)0; } for (;;){ /*scan_eoln(fp);*/ /* pmr: this seems to never set line - use old call */ scanned = fscanf(fp,"%[^\n]\n",line); if (scanned != 1 ) break; scanned=sscanf(line,"%s %s",word1,word2); if (scanned == 2 && strcmp(word1,"CapHeight") == 0) capheight = atoi(word2); if (inmetrics){ sscanf(line,"%*s %s %*s %*s %s",word1,word2); charnum = atoi(word1); charlen = atoi(word2); nmetrics--; if (nmetrics == 0) break; if (charnum != -1 && charnum >= 32) metric[charnum-31] = charlen; } else if (scanned == 2 && strcmp(word1,"StartCharMetrics") == 0) nmetrics = atoi(word2), inmetrics = 1; if ((strcmp(word1,"EndCharMetrics") == 0) || (feof(fp))) break; } FClose(fp); metric[0] = capheight; return 1; } /* readafmfile */ void metricforfont(char *fontname, short *fontmetric) { int i; long loopcount; char afmfile[FNMLNGTH]; if ((strcmp(fontname,"Helvetica") == 0) || (strcmp(fontname,"Helvetica-Oblique") == 0)) for (i=31;i<256;++i) fontmetric[i-31] = helvetica_metric[i-31]; else if ((strcmp(fontname,"Helvetica-Bold") == 0) || (strcmp(fontname,"Helvetica-BoldOblique") == 0)) for (i=31;i<256;++i) fontmetric[i-31] = helveticabold_metric[i-31]; else if (strcmp(fontname,"Times-Roman") == 0) for (i=31;i<256;++i) fontmetric[i-31] = timesroman_metric[i-31]; else if (strcmp(fontname,"Times") == 0) for (i=31;i<256;++i) fontmetric[i-31] = timesroman_metric[i-31]; else if (strcmp(fontname,"Times-Italic") == 0) for (i=31;i<256;++i) fontmetric[i-31] = timesitalic_metric[i-31]; else if (strcmp(fontname,"Times-Bold") == 0) for (i=31;i<256;++i) fontmetric[i-31] = timesbold_metric[i-31]; else if (strcmp(fontname,"Times-BoldItalic") == 0) for (i=31;i<256;++i) fontmetric[i-31] = timesbolditalic_metric[i-31]; else if (strncmp(fontname,"Courier",7) == 0){ fontmetric[0] = 562; for (i=32;i<256;++i) fontmetric[i-31] = (short)600; } else { if (didloadmetric){ for (i=31;i<256;++i) fontmetric[i-31] = unknown_metric[i-31];} else { didloadmetric = 1; sprintf(afmfile,"%s.afm",fontname); /* search current dir */ if (readafmfile(afmfile,unknown_metric)){ for (i=31;i<256;++i) fontmetric[i-31] = unknown_metric[i-31]; return;} sprintf(afmfile,"%s%s.afm",AFMDIR,fontname); /* search afm dir */ if (readafmfile(afmfile,unknown_metric)){ for (i=31;i<256;++i) fontmetric[i-31] = unknown_metric[i-31]; return;} #ifdef NeXT sprintf(afmfile,"%s/Library/Fonts/%s.font/%s.afm",getenv("HOME"), fontname,fontname); if (readafmfile(afmfile,unknown_metric)){ for (i=31;i<256;++i) fontmetric[i-31] = unknown_metric[i-31]; return;} sprintf(afmfile,"/LocalLibrary/Fonts/%s.font/%s.afm",fontname,fontname); if (readafmfile(afmfile,unknown_metric)){ for (i=31;i<256;++i) fontmetric[i-31] = unknown_metric[i-31]; return;} #endif loopcount = 0; for (;;){ printf("Enter the path of the %s.afm file, or \"none\" for best guess:", fontname); getstryng(afmfile); if (strcmp(afmfile,"none") == 0){ for (i=31;i<256;++i) fontmetric[i-31] = timesroman_metric[i-31], unknown_metric[i-31] = timesroman_metric[i-31], didloadmetric =1; return; } else { if (readafmfile(afmfile,unknown_metric)){ for (i=31;i<256;++i) fontmetric[i-31] = unknown_metric[i-31]; return;} else printf("Can't read that file. Please re-enter.\n"); } countup(&loopcount, 10); } } } } /* metricforfont */ double heighttext(fonttype font, char *fontname) { short afmetric[256]; #ifdef MAC FontInfo info; #endif if (strcmp(fontname,"Hershey") == 0) return (double)font[2]; #ifdef MAC else if (((plotter == pict || plotter == mac) && (((grows == vertical && labelrotation == 0.0) || (grows == horizontal && labelrotation == 90.0))))){ TextFont(macfontid(fontname)); TextSize((int)(1000)); TextFace((int)((pictbold ? 1: 0) | (pictitalic ? 2 : 0)| (pictoutline ? 8 : 0)|(pictshadow ? 16 : 0))); GetFontInfo(&info); TextFont(macfontid("courier")); TextSize(10); TextFace(0); return (double)info.ascent; } #endif else if (strcmp(fontname,"Hershey") == 0) return (double)font[2]; else{ metricforfont(fontname,afmetric); return (double)afmetric[0];} } /* heighttext */ double lengthtext(char *pstring, long nchars, char *fontname, fonttype font) { /* lengthext */ long i, j, code; static double sumlength; long sumbigunits; short afmetric[256]; sumlength = 0.0; if (strcmp(fontname,"Hershey") == 0) { for (i = 0; i < nchars; i++) { code = pstring[i]; j = 1; while (font[j] != code && font[j - 1] != 0) j = font[j - 1]; if (font[j] == code) sumlength += font[j + 2]; } return sumlength; } #ifdef MAC else if (((plotter == pict || plotter == mac) && (((grows == vertical && labelrotation == 0.0) || (grows == horizontal && labelrotation == 90.0))))){ TextFont(macfontid(fontname)); TextSize((int)(1000)); TextFace((int)((pictbold ? 1: 0) | (pictitalic ? 2 : 0)| (pictoutline ? 8 : 0)|(pictshadow ? 16 : 0))); sumbigunits = 0; for (i = 0; i < nchars; i++) sumbigunits += (long)CharWidth(pstring[i]); TextFace(0); TextSize(10); TextFont(macfontid("courier")); return (double)sumbigunits; } #endif else { metricforfont(fontname,afmetric); sumbigunits = 0; for (i = 0; i < nchars; i++) sumbigunits += afmetric[(int)(1+(unsigned char)pstring[i] - 32)]; sumlength = (double)sumbigunits; } return sumlength; } /* lengthtext */ void plotchar(long *place, struct LOC_plottext *text) { text->heightfont = text->font[*place + 1]; text->yfactor = text->height / text->heightfont; text->xfactor = text->yfactor; *place += 3; do { (*place)++; text->coord = text->font[*place - 1]; if (text->coord > 0) text->penstatus = pendown; else text->penstatus = penup; text->coord = abs(text->coord); text->coord %= 10000; text->xfont = (text->coord / 100 - xstart) * text->xfactor; text->yfont = (text->coord % 100 - ystart) * text->yfactor; text->xplot = text->xx + (text->xfont * text->cosslope + text->yfont * text->sinslope) * text->compress; text->yplot = text->yy - text->xfont * text->sinslope + text->yfont * text->cosslope; plot(text->penstatus, text->xplot, text->yplot); } while (abs(text->font[*place - 1]) < 10000); text->xx = text->xplot; text->yy = text->yplot; } /* plotchar */ void swap_charptr(char **one, char **two) { char *tmp = (*one); (*one)= (*two); (*two) = tmp; } /* swap */ void plotpb() { pagecount++; fprintf(plotfile,"\n showpage \n%%%%PageTrailer\n"); fprintf(plotfile,"%%%%DocumentFonts: %s\n", (strcmp(fontname,"Hershey") == 0) ? "" : fontname); fprintf(plotfile,"%%%%\n%%%%Page: %ld %ld\n",pagecount,pagecount); fprintf(plotfile,"%%%%PageBoundingBox: 0 0 %d %d\n", (int)(xunitspercm*paperx),(int)(yunitspercm*papery)); fprintf(plotfile,"%%%%PageFonts: (atend)\n%%%%BeginPageSetup\n%%%%PaperSize: Letter\n"); fprintf(plotfile,"0 0 moveto\n"); /* hack to make changepen work w/o errors */ changepen(lastpen); } /* plotpb */ void drawit(char *fontname, double *xoffset, double *yoffset, long numlines, node *root) { long i, j, line, xpag, ypag; long test_long ; /* To get a division out of a loop */ (*xoffset) = 0.0; (*yoffset) = 0.0; xpag = (int)((pagex-hpmargin-0.01)/(paperx - hpmargin))+1; ypag = (int)((pagey-vpmargin-0.01)/(papery - vpmargin))+1; if (dotmatrix){ strptop = (long)(ysize * yunitspercm); strpbottom = numlines*strpdeep + 1; } else { pagecount = 1; for (j=0; j DEFAULT_STRIPE_HEIGHT){ /* large stripe, do in DEFAULT_STRIPE_HEIGHT (20)-line */ for (i=0;i b) ? a : b) #ifdef min #undef min #endif #define min(a,b) ((a > b) ? b : a) #define max4(a,b,c,d) (max(max(a,b),max(c,d))) #define min4(a,b,c,d) (min(min(a,b),min(c,d))) struct LOC_plottext text; long i, j, code; double pointsize; int epointsize; /* effective pointsize before scale in idraw matrix */ double iscale; double textlen; double px0,py0,px1,py1; /* square bounding box of text */ text.heightfont = font_[2]; pointsize = (((height_ / xunitspercm) / 2.54) * 72.0); if (strcmp(fontname,"Hershey") !=0) pointsize *= ((double)1000.0 / heighttext(font_,fontname)); text.height = height_; text.compress = cmpress2; text.font = font_; text.xx = x; text.yy = y; text.sinslope = sin(pi * slope / 180.0); text.cosslope = cos(pi * slope / 180.0); if ((strcmp(fontname,"Hershey") == 0)|| (previewing && (!(((plotter == pict) || (plotter == mac)) && (((grows == vertical) && (labelrotation == 0.0)) || ((grows == horizontal) && (labelrotation == 90.0)) ))))){ for (i = 0; i < nchars; i++) { code = pstring[i]; j = 1; while (text.font[j] != code && text.font[j - 1] != 0) j = text.font[j - 1]; plotchar(&j, &text); } } /* print native font. idraw, PS, pict, and fig. */ else if (plotter == fig) { fprintf(plotfile,"4 0 %d %d 0 -1 0 %1.5f 4 19 163 %d %d %s\001\n", figfontid(fontname), /* font ID */ (int)pointsize, /* font size */ (double)0.0, /* font rotation */ (int)x, /* x position */ (int)(606.0 - y), /* y position */ pstring); } else if (plotter == lw) { /* If there's NO possibility that the line intersects the square bounding * box of the font, leave it out. Otherwise, let postscript clip to region. * Compute text boundary, be REAL generous. */ textlen = (lengthtext(pstring,nchars,fontname,font_)/1000)*pointsize; px0 = min4(x + (text.cosslope * pointsize), x - (text.cosslope * pointsize), x + (text.cosslope * pointsize) + (text.sinslope * textlen), x - (text.cosslope * pointsize) + (text.sinslope * textlen)) /28.346; px1 = max4(x + (text.cosslope * pointsize), x - (text.cosslope * pointsize), x + (text.cosslope * pointsize) + (text.sinslope * textlen), x - (text.cosslope * pointsize) + (text.sinslope * textlen)) /28.346; py0 = min4(y + (text.sinslope * pointsize), y - (text.sinslope * pointsize), y + (text.sinslope * pointsize) + (text.cosslope * textlen), y - (text.sinslope * pointsize) + (text.cosslope * textlen)) /28.346; py1 = max4(y + (text.sinslope * pointsize), y - (text.sinslope * pointsize), y + (text.sinslope * pointsize) + (text.cosslope * textlen), y - (text.sinslope * pointsize) + (text.cosslope * textlen)) /28.346; /* if rectangles intersect, print it. */ if (rectintersects(px0,py0,px1,py1,clipx0,clipy0,clipx1,clipy1)) { fprintf(plotfile,"gsave\n"); fprintf(plotfile,"/%s findfont %f scalefont setfont\n",fontname, pointsize); fprintf(plotfile,"%f %f translate %f rotate\n", x-(clipx0*xunitspercm),y-(clipy0*xunitspercm),-slope); fprintf(plotfile,"0 0 moveto\n"); fprintf(plotfile,"(%s) show\n",pstring); fprintf(plotfile,"grestore\n"); } } else if (plotter == idraw) { iscale = pointsize / 12.0; y += text.height * text.cosslope; x += text.height * text.sinslope; fprintf(plotfile, "Begin %%I Text\n"); fprintf(plotfile, "%%I cfg Black\n"); fprintf(plotfile, "0 0 0 SetCFg\n"); fprintf(plotfile, "%%I f %s\n", findXfont(fontname,pointsize,&iscale,&epointsize)); fprintf(plotfile,"%s %d SetF\n",fontname,epointsize); fprintf(plotfile, "%%I t\n"); fprintf(plotfile, "[ %f %f %f %f %f %f ] concat\n", text.cosslope*iscale, -text.sinslope*iscale, text.sinslope*iscale, text.cosslope*iscale, x+216.0 ,y+285.0); fprintf(plotfile, "%%I\n"); fprintf(plotfile, "[\n(%s)\n] Text\nEnd\n\n",pstring); } else if (plotter == pict || plotter == mac) { if (previewing){ #ifdef MAC TextFont(macfontid(fontname)); TextSize((int)(pointsize+0.5)); TextFace((int)((pictbold ? 1: 0) | (pictitalic ? 2 : 0)| (pictoutline ? 8 : 0)|(pictshadow ? 16 : 0))); MoveTo((int)floor((double)x + 0.5), winheight - (long)floor((double)y + 0.5)+MAC_OFFSET); putstring(pstring); TextFont(macfontid("courier")); TextSize(10); TextFace(0); #endif } else { /* txfont: */ fprintf(plotfile,"%c",(unsigned char)3); pictoutint(plotfile,macfontid(fontname)); /* txsize: */ fprintf(plotfile,"%c",13); pictoutint(plotfile,(int)(pointsize+0.5)); /* txface: */ fprintf(plotfile,"%c%c",4, (int)((pictbold ? 1: 0) | (pictitalic ? 2 : 0)| (pictoutline ? 8 : 0)|(pictshadow ? 16 : 0))); /* txfloc: */ fprintf(plotfile,"%c",40); pictoutint(plotfile,(int)floor(ysize * yunitspercm - y + 0.5)); pictoutint(plotfile,(int)(x+0.5)); fprintf(plotfile,"%c%s",(char)strlen(pstring),pstring); bytewrite+=(14+strlen(pstring)); } } } /* plottext */ void makebox(char *fn,double *xo,double *yo,double *scale,long ntips) /* fn--fontname| xo,yo--x and y offsets */ { /* draw the box on screen which represents plotting area. */ char ch; long xpag,ypag,i,j; double xpagecorrection, ypagecorrection; if (previewer != winpreview && previewer != mac && previewer != xpreview && previewer != other) { printf("\nWe now will preview the tree. The box that will be\n"); printf("plotted on the screen represents the boundary of the\n"); printf("final plotting surface. To see the preview, press on\n"); printf("the ENTER or RETURN key (you may need to do it twice).\n"); printf("When finished viewing it, press on that key again.\n"); } oldpenchange = penchange; oldxsize = xsize; oldysize = ysize; oldxunitspercm = xunitspercm; oldyunitspercm = yunitspercm; oldxcorner = xcorner; oldycorner = ycorner; oldxmargin = xmargin; oldymargin = ymargin; oldhpmargin = hpmargin; oldvpmargin = vpmargin; oldplotter = plotter; plotter = previewer; if (previewer != winpreview && previewer != mac && previewer != xpreview && previewer != other) { #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); (void)getchar(); if (ch == '\n') ch = ' '; } plotrparms(ntips); initplotter(ntips,fn); xcorner += 0.05 * xsize; ycorner += 0.05 * ysize; xsize *= 0.9; ysize *= 0.9; (*scale) = ysize / oldysize; if (xsize / oldxsize < (*scale)) (*scale) = xsize / oldxsize; xpagecorrection = oldxsize / pagex; ypagecorrection = oldysize / pagey; (*xo) = (xcorner + (xsize - oldxsize * (*scale)) / 2.0) / (*scale); (*yo) = (ycorner + (ysize - oldysize * (*scale)) / 2.0) / (*scale); xscale = (*scale) * xunitspercm; yscale = (*scale) * yunitspercm; xmargin *= (*scale); ymargin *= (*scale); hpmargin *= (*scale); vpmargin *= (*scale); xpag = (int)((pagex-hpmargin-0.01)/(paperx - hpmargin))+1; ypag = (int)((pagey-vpmargin-0.01)/(papery - vpmargin))+1; /* draw the outer borders */ plot(penup, xscale * (*xo), yscale * (*yo)); plot(pendown, xscale * (*xo), yscale * ((*yo) + pagey * ypagecorrection)); plot(pendown, xscale * ((*xo) + pagex * xpagecorrection), yscale * ((*yo) + pagey * ypagecorrection)); plot(pendown, xscale * ((*xo) + pagex * xpagecorrection), yscale * (*yo)); plot(pendown, xscale * (*xo), yscale * (*yo)); /* we've done the extent, now draw the dividing lines: */ for (i=0; itype != ClientMessage || event->xclient.data.l[0] != wm_delete_window) return; winaction=changeparms; close_x(); } void close_x() { shell=NULL; XtAppSetExitFlag(appcontext); XtUnrealizeWidget(toplevel); XtDestroyWidget(toplevel); XtCloseDisplay(display); } void dismiss_dialog() { XtDestroyWidget(shell); shell=NULL; } void do_dialog() { if (shell != NULL) return; shell=XtCreatePopupShell("About",transientShellWidgetClass, toplevel,NULL,0); dialog=XtCreateManagedWidget("dialog",dialogWidgetClass,shell,NULL,0); XawDialogAddButton(dialog,"Dismiss",(XtCallbackProc)dismiss_dialog ,NULL); XtRealizeWidget(shell); wm_delete_window2 = XInternAtom(XtDisplay(shell), "WM_DELETE_WINDOW",0); XSetWMProtocols(XtDisplay(shell),XtWindow(shell), &wm_delete_window2,1); XtMapWidget(shell); } static XtActionsRec draw_actions[] = { { "quit", (XtActionProc)delete_callback }, }; void init_x() { Widget paned; Widget menubar; Widget menuButton; Widget menu; Widget entry; Widget drawing_area; XSetWindowAttributes winAttr; Arg wargs[7]; unsigned int dummy1,dummy2; Window dummy3; XGCValues values; toplevel=XtAppInitialize(&appcontext,"phylip",NULL,0,&nargc,nargv,res, NULL,0); /* make the top level window*/ /* this is for closing the window*/ XtAppAddActions(appcontext,draw_actions,1); XtOverrideTranslations(toplevel, XtParseTranslationTable ("WM_PROTOCOLS: quit()")); /* create a form add it to toplevel */ paned = XtCreateManagedWidget("paned",formWidgetClass,toplevel,NULL,0); /* create a menubar add it to the form*/ menubar = XtCreateManagedWidget("menubar",boxWidgetClass,paned,NULL,0); /* create an area to draw in with a size relative to the size of the screen*/ XGetGeometry(XtDisplay(toplevel),XDefaultRootWindow(XtDisplay(toplevel)), &dummy3,&x,&y,&width,&height,&dummy1,&dummy2); height *= 0.7; width = 0.75 * height; XtSetArg(wargs[0],XtNwidth,width); XtSetArg(wargs[1],XtNheight,height); drawing_area = XtCreateManagedWidget("drawing_area",coreWidgetClass, paned,wargs,2); /* create a menubuton add it to the menubar*/ menuButton = XtCreateManagedWidget ("File",menuButtonWidgetClass, menubar,NULL,0); /* create a menu add it to the menubutton */ menu = XtCreatePopupShell("menu",simpleMenuWidgetClass,menuButton,NULL,0); entry=XtCreateManagedWidget("Plot",smeBSBObjectClass,menu,NULL,0); XtAddCallback(entry,XtNcallback,plot_callback,NULL); entry=XtCreateManagedWidget("Change Parameters",smeBSBObjectClass, menu,NULL,0); XtAddCallback(entry,XtNcallback,change_callback,NULL); entry=XtCreateManagedWidget("Quit",smeBSBObjectClass,menu,NULL,0); XtAddCallback(entry,XtNcallback,quit_callback,NULL); menuButton = XtCreateManagedWidget("Help",menuButtonWidgetClass, menubar,NULL,0); menu = XtCreatePopupShell("menu",simpleMenuWidgetClass,menuButton,NULL,0); entry=XtCreateManagedWidget("About",smeBSBObjectClass,menu,NULL,0); XtAddCallback(entry,XtNcallback,about_callback,NULL); /* realize the widgets */ XtRealizeWidget(toplevel); wm_delete_window = XInternAtom(XtDisplay(toplevel), "WM_DELETE_WINDOW",0); XSetWMProtocols(XtDisplay(toplevel),XtWindow(toplevel), &wm_delete_window,1); values.foreground=BlackPixel(XtDisplay(toplevel),0); gc1=XCreateGC (XtDisplay (toplevel), XtWindow (drawing_area), GCForeground,&values); mainwin=XtWindow(drawing_area); XtAddEventHandler(drawing_area,ExposureMask ,FALSE, (XtEventHandler)redraw,NULL); XtAddEventHandler(toplevel,StructureNotifyMask,FALSE, (XtEventHandler)redraw,NULL); display=XtDisplay(toplevel); winAttr.backing_store = Always; winAttr.save_under=1; XChangeWindowAttributes(display,mainwin,CWBackingStore|CWSaveUnder,&winAttr); XGetGeometry(display,mainwin,&DefaultRootWindow(display), &x,&y,&width,&height,&dummy1,&dummy2); } #endif PHYLIPNEW-3.69.650/src/restdist.c0000664000175000017500000003142711616234204013070 00000000000000 #include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1994-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define initialv 0.1 /* starting value of branch length */ #define iterationsr 20 /* how many Newton iterations per distance */ extern sequence y; AjPPhyloState* phylostates = NULL; #ifndef OLDC /* function prototypes */ void restdist_inputnumbers(AjPPhyloState); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(AjPPhyloState); void restdist_inputdata(AjPPhyloState); void restdist_sitesort(void); void restdist_sitecombine(void); void makeweights(void); void makev(long, long, double *); void makedists(void); void writedists(void); void getinput(void); void reallocsites(void); /* function prototypes */ #endif Char infilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; long sites, weightsum, datasets, ith; boolean restsites, neili, gama, weights, lower, progress, mulsets, firstset; double ttratio, fracchange, cvi, sitelength, xi, xv; double **d; steptr aliasweight; char *progname; Char ch; void restdist_inputnumbers(AjPPhyloState state) { /* read and print out numbers of species and sites */ spp = state->Size; sites = state->Len; } /* restdist_inputnumbers */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs; sitelength = 6.0; neili = false; gama = false; cvi = 0.0; weights = false; lower = false; printdata = false; progress = true; restsites = true; ttratio = 2.0; mulsets = false; datasets = 1; printf("\nRestriction site or fragment distances, "); printf("version %s\n\n",VERSION); embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("data"); numseqs = 0; while (phylostates[numseqs]) numseqs++; if (numseqs > 1) { mulsets = true; datasets = numseqs; } restsites = ajAcdGetBoolean("restsites"); neili = ajAcdGetBoolean("neili"); if(!neili) gama = ajAcdGetBoolean("gammatype"); if(gama) { cvi = ajAcdGetFloat("gammacoefficient"); cvi = 1.0 / (cvi * cvi); } ttratio = ajAcdGetFloat("ttratio"); sitelength = ajAcdGetInt("sitelength"); lower = ajAcdGetBoolean("lower"); printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); xi = (ttratio - 0.5)/(ttratio + 0.5); xv = 1.0 - xi; fracchange = xi*0.5 + xv*0.75; embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void reallocsites() { long i; for (i = 0; i < spp; i++){ free(y[i]); y[i] = (Char *)Malloc(sites*sizeof(Char)); } free(weight); free(alias); free(aliasweight); weight = (steptr)Malloc((sites+1)*sizeof(long)); alias = (steptr)Malloc((sites+1)*sizeof(long)); aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); makeweights(); } void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc((sites+1)*sizeof(Char)); nayme = (naym *)Malloc(spp*sizeof(naym)); weight = (steptr)Malloc((sites+1)*sizeof(long)); alias = (steptr)Malloc((sites+1)*sizeof(long)); aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); d = (double **)Malloc(spp*sizeof(double *)); for (i = 0; i < spp; i++) d[i] = (double*)Malloc(spp*sizeof(double)); } /* allocrest */ void doinit() { /* initializes variables */ restdist_inputnumbers(phylostates[0]); if (printdata) fprintf(outfile, "\n %4ld Species, %4ld Sites\n", spp, sites); allocrest(); } /* doinit */ void inputoptions(AjPPhyloState state) { /* read the options information */ long i, /*extranum,*/ cursp, curst; if (!firstset) { cursp = state->Size; curst = state->Len; if (cursp != spp) { printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4ld\n", ith); embExitBad(); } sites = curst; reallocsites(); } for (i = 1; i <= sites; i++) weight[i] = 1; weightsum = sites; /*extranum = 0;*/ /* fscanf(infile, "%*[ 0-9]"); readoptions(&extranum, "W"); for (i = 1; i <= extranum; i++) { matchoptions(&ch, "W"); inputweights2(1, sites+1, &weightsum, weight, &weights, "RESTDIST"); } */ } /* inputoptions */ void restdist_inputdata(AjPPhyloState state) { /* read the species and sites data */ long i, j, k, l /*, sitesread = 0 */; Char ch; boolean allread, done; AjPStr str; if (printdata) putc('\n', outfile); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 39) j = 39; if (printdata) { fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Sites\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "-----\n\n"); } /*sitesread = 0;*/ allread = false; while (!(allread)) { i = 1; while (i <= spp ) { initnamestate(state, i - 1); str = state->Str[i-1]; j = 0; done = false; while (!done) { while (j < sites) { ch = ajStrGetCharPos(str, j); if (ch != '1' && ch != '0' && ch != '+' && ch != '-' && ch != '?') { printf(" ERROR -- Bad symbol %c",ch); printf(" at position %ld of species %ld\n", j+1, i); embExitBad(); } if (ch == '1') ch = '+'; if (ch == '0') ch = '-'; j++; y[i - 1][j - 1] = ch; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (printdata) { for (i = 1; i <= ((sites - 1) / 60 + 1); i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; for (k = (i - 1) * 60 + 1; k <= l; k++) { putc(y[j][k - 1], outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } } /* restdist_inputdata */ void restdist_sitesort() { /* Shell sort keeping alias, aliasweight in same order */ long gap, i, j, jj, jg, k, itemp; boolean flip, tied; gap = sites / 2; while (gap > 0) { for (i = gap + 1; i <= sites; i++) { j = i - gap; flip = true; while (j > 0 && flip) { jj = alias[j]; jg = alias[j + gap]; flip = false; tied = true; k = 1; while (k <= spp && tied) { flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); k++; } if (tied) { aliasweight[j] += aliasweight[j + gap]; aliasweight[j + gap] = 0; } if (!flip) break; itemp = alias[j]; alias[j] = alias[j + gap]; alias[j + gap] = itemp; itemp = aliasweight[j]; aliasweight[j] = aliasweight[j + gap]; aliasweight[j + gap] = itemp; j -= gap; } } gap /= 2; } } /* restdist_sitesort */ void restdist_sitecombine() { /* combine sites that have identical patterns */ long i, j, k; boolean tied; i = 1; while (i < sites) { j = i + 1; tied = true; while (j <= sites && tied) { k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i] - 1] == y[k - 1][alias[j] - 1]); k++; } if (tied && aliasweight[j] > 0) { aliasweight[i] += aliasweight[j]; aliasweight[j] = 0; alias[j] = alias[i]; } j++; } i = j - 1; } } /* restdist_sitecombine */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i] = i; aliasweight[i] = weight[i]; } restdist_sitesort(); restdist_sitecombine(); sitescrunch2(sites + 1, 2, 3, aliasweight); for (i = 1; i <= sites; i++) { weight[i] = aliasweight[i]; if (weight[i] > 0) endsite = i; } weight[0] = 1; } /* makeweights */ void makev(long m, long n, double *v) { /* compute one distance */ long i, ii, it, numerator, denominator; double f, g=0, h, p1, p2, p3, q1, pp, tt, delta, vv; numerator = 0; denominator = 0; for (i = 0; i < endsite; i++) { ii = alias[i + 1]; if ((y[m-1][ii-1] == '+') || (y[n-1][ii-1] == '+')) { denominator += weight[i+1]; if ((y[m-1][ii-1] == '+') && (y[n-1][ii-1] == '+')) { numerator += weight[i + 1]; } } } f = 2*numerator/(double)(denominator+numerator); if (restsites) { if (exp(-sitelength*1.38629436) > f) { printf("\nERROR: Infinite distance between "); printf(" species %3ld and %3ld\n", m, n); embExitBad(); } } if (!restsites) { if (!neili) { f = (sqrt(f*(f+8.0))-f)/2.0; } else { g = initialv; delta = g; it = 0; while (fabs(delta) > 0.00002 && it < iterationsr) { it++; h = g; g = exp(0.25*log(f * (3-2*g))); delta = g - h; } } } if ((!restsites) && neili) vv = - (2.0/sitelength) * log(g); else { if(neili && restsites){ pp = exp(log(f)/(2*sitelength)); vv = -(3.0/2.0)*log((4.0/3.0)*pp - (1.0/3.0)); } else { pp = exp(log(f)/sitelength); delta = initialv; tt = delta; it = 0; while (fabs(delta) > 0.00002 && it < iterationsr) { it++; if (gama) { p1 = exp(-cvi * log(1 + tt / cvi)); p2 = exp(-cvi * log(1 + xv * tt / cvi)) - exp(-cvi * log(1 + tt / cvi)); p3 = 1.0 - exp(-cvi * log(1 + xv * tt / cvi)); } else { p1 = exp(-tt); p2 = exp(-xv * tt) - exp(-tt); p3 = 1.0 - exp(-xv * tt); } q1 = p1 + p2 / 2.0 + p3 / 4.0; g = q1 - pp; if (g < 0.0) delta = fabs(delta) / -2.0; else delta = fabs(delta); tt += delta; } vv = fracchange * tt; } } *v = vv; } /* makev */ void makedists() { /* compute distance matrix */ long i, j; double v; if (progress) printf("Distances calculated for species\n"); for (i = 0; i < spp; i++) d[i][i] = 0.0; for (i = 1; i < spp; i++) { if (progress) { printf(" "); for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); printf(" "); } for (j = i + 1; j <= spp; j++) { makev(i, j, &v); d[i - 1][j - 1] = v; d[j - 1][i - 1] = v; if (progress) putchar('.'); } if (progress) putchar('\n'); } if (progress) { printf(" "); for (j = 0; j < nmlngth; j++) putchar(nayme[spp - 1][j]); putchar('\n'); } } /* makedists */ void writedists() { /* write out distances */ long i, j, k; if (!printdata) fprintf(outfile, "%5ld\n", spp); for (i = 0; i < spp; i++) { for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); if (lower) k = i; else k = spp; for (j = 1; j <= k; j++) { if (d[i][j-1] < 100.0) fprintf(outfile, "%10.6f", d[i][j-1]); else if (d[i][j-1] < 1000.0) fprintf(outfile, " %10.6f", d[i][j-1]); else fprintf(outfile, " %11.6f", d[i][j-1]); if ((j + 1) % 7 == 0 && j < k) putc('\n', outfile); } putc('\n', outfile); } if (progress) printf("\nDistances written to file \"%s\"\n\n", outfilename); } /* writedists */ void getinput() { /* reads the input data */ inputoptions(phylostates[ith-1]); restdist_inputdata(phylostates[ith-1]); makeweights(); } /* getinput */ int main(int argc, Char *argv[]) { /* distances from restriction sites or fragments */ #ifdef MAC argc = 1; /* macsetup("Restdist",""); */ argv[0] = "Restdist"; #endif init(argc,argv); emboss_getoptions("frestdist", argc, argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= datasets; ith++) { getinput(); if (ith == 1) firstset = false; if (datasets > 1 && progress) printf("\nData set # %ld:\n\n",ith); makedists(); writedists(); } FClose(infile); FClose(outfile); #ifdef MAC fixmacfile(outfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* distances from restriction sites or fragments */ PHYLIPNEW-3.69.650/src/config.h.in0000664000175000017500000001213112171071676013106 00000000000000/* src/config.h.in. Generated from configure.in by autoheader. */ /* Define if building universal (internal helper macro) */ #undef AC_APPLE_UNIVERSAL_BUILD /* Define to 1 to compile all deprecated functions */ #undef AJ_COMPILE_DEPRECATED /* Define to 1 to compile deprecated functions used in book texts for 6.2.0 */ #undef AJ_COMPILE_DEPRECATED_BOOK /* Define to 1 to collect AJAX library usage statistics. */ #undef AJ_SAVESTATS /* Define to 1 if the `getpgrp' function requires zero arguments. */ #undef GETPGRP_VOID /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_DIRENT_H /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H /* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ #undef HAVE_DOPRNT /* Define to 1 if you have the `erand48' function. */ #undef HAVE_ERAND48 /* Define to 1 if you have the `fork' function. */ #undef HAVE_FORK /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if the Java Native Interface (JNI) is available. */ #undef HAVE_JAVA /* Define to 1 if you have the `m' library (-lm). */ #undef HAVE_LIBM /* Define to 1 if you have the `mcheck' function. */ #undef HAVE_MCHECK /* Define to 1 if you have the `memmove' function. */ #undef HAVE_MEMMOVE /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Define to 1 if MySQL libraries are available. */ #undef HAVE_MYSQL /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_NDIR_H /* Define to 1 if PostgreSQL libraries are available. */ #undef HAVE_POSTGRESQL /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the `strchr' function. */ #undef HAVE_STRCHR /* Define to 1 if you have the `strdup' function. */ #undef HAVE_STRDUP /* Define to 1 if you have the `strftime' function. */ #undef HAVE_STRFTIME /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the `strstr' function. */ #undef HAVE_STRSTR /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_SYS_DIR_H /* Define to 1 if you have the header file, and it defines `DIR'. */ #undef HAVE_SYS_NDIR_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_TARGETCONFIG_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to 1 if you have the `vfork' function. */ #undef HAVE_VFORK /* Define to 1 if you have the header file. */ #undef HAVE_VFORK_H /* Define to 1 if you have the `vprintf' function. */ #undef HAVE_VPRINTF /* Define to 1 if `fork' works. */ #undef HAVE_WORKING_FORK /* Define to 1 if `vfork' works. */ #undef HAVE_WORKING_VFORK /* Set to 1 if HPUX 64bit ptrs on 32 bit m/c */ #undef HPUX64PTRS /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Define to 1 if PDF support is available */ #undef PLD_pdf /* Define to 1 is PNG support is available */ #undef PLD_png /* Define to 1 if X11 support is available */ #undef PLD_xwin /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Define to 1 if your declares `struct tm'. */ #undef TM_IN_SYS_TIME /* Version number of package */ #undef VERSION /* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel). */ #if defined AC_APPLE_UNIVERSAL_BUILD # if defined __BIG_ENDIAN__ # define WORDS_BIGENDIAN 1 # endif #else # ifndef WORDS_BIGENDIAN # undef WORDS_BIGENDIAN # endif #endif /* Define to 1 if the X Window System is missing or not being used. */ #undef X_DISPLAY_MISSING /* Set to 2 for open args */ #undef _FORTIFY_SOURCE /* Define to empty if `const' does not conform to ANSI C. */ #undef const /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus #undef inline #endif /* Define to `int' if does not define. */ #undef pid_t /* Define to `unsigned int' if does not define. */ #undef size_t /* Define as `fork' if `vfork' does not work. */ #undef vfork PHYLIPNEW-3.69.650/src/treedist.c0000664000175000017500000010575511605067345013070 00000000000000/* version 3.6. (c) Copyright 1993-2005 by the University of Washington. Written by Dan Fineman, Joseph Felsenstein, Mike Palczewski, Hisashi Horino, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include "phylip.h" #include "cons.h" typedef enum { PHYLIPSYMMETRIC, PHYLIPBSD } distance_type; /* The following extern's refer to things declared in cons.c */ extern int tree_pairing; extern Char intreename[FNMLNGTH], intree2name[FNMLNGTH], outtreename[FNMLNGTH]; extern node *root; const char* outfilename; AjPFile embossoutfile; long trees_in_1, trees_in_2; extern long numopts, outgrno, col; extern long maxgrp; /* max. no. of groups in all trees found */ extern boolean trout, firsttree, noroot, outgropt, didreroot, prntsets, progress, treeprint, goteof; extern pointarray treenode, nodep; extern group_type **grouping, **grping2, **group2;/* to store groups found */ extern long **order, **order2, lasti; extern group_type *fullset; extern node *grbg; extern long tipy; extern double **timesseen, **tmseen2, **times2; extern double trweight, ntrees; static distance_type dtype; static long output_scheme; AjPPhyloTree* phylotrees = NULL; #ifndef OLDC /* function prototpes */ void assign_tree(group_type **, pattern_elm ***, long, long *); boolean group_is_null(group_type **, long); void compute_distances(pattern_elm ***, long, long); void free_patterns(pattern_elm ***, long); void produce_square_matrix(long, long *); void produce_full_matrix(long, long, long *); void output_submenu(void); void pairing_submenu(void); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void assign_lengths(double **lengths, pattern_elm ***pattern_array, long tree_index); void print_header(long trees_in_1, long trees_in_2); void output_distances(long trees_in_1, long trees_in_2); void output_long_distance(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2); void output_matrix_long(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2); void output_matrix_double(double diffl, long tree1, long tree2, long trees_in_1, long trees_in_2); void output_double_distance(double diffd, long tree1, long tree2, long trees_in_1, long trees_in_2); long symetric_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, long patternsz1, long patternsz2); double bsd_tree_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, double* lengths1, double *lengths2, long patternsz1, long patternsz2); void tree_diff(group_type **tree1, group_type **tree2, double *lengths1, double* lengths2, long patternsz1, long patternsz2, long ntree1, long ntree2, long trees_in_1, long trees_in_2); void print_line_heading(long tree); int get_num_columns(void); void print_matrix_heading(long tree, long maxtree); /* function prototpes */ #endif void assign_lengths(double **lengths, pattern_elm ***pattern_array, long tree_index) { *lengths = pattern_array[0][tree_index]->length; } void assign_tree(group_type **treeN, pattern_elm ***pattern_array, long tree_index, long *pattern_size) { /* set treeN to be the tree_index-th tree in pattern_elm */ long i; for ( i = 0 ; i < setsz ; i++ ) { treeN[i] = pattern_array[i][tree_index]->apattern; } *pattern_size = *pattern_array[0][tree_index]->patternsize; } /* assign_tree */ boolean group_is_null(group_type **treeN, long index) { /* Check to see if a given index to a tree array points to an empty group */ long i; for ( i = 0 ; i < setsz ; i++ ) if (treeN[i][index] != (group_type) 0) return false; /* If we've gotten this far, then the index is to an empty group in the tree. */ return true; } /* group_is_null */ double bsd_tree_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, double *lengths1, double* lengths2, long patternsz1, long patternsz2) { /* Compute the difference between 2 given trees. Return that value as a double. */ long index1, index2; double return_value = 0; boolean match_found; long i; if ( group_is_null(tree1, 0) || group_is_null(tree2, 0) ) { printf ("Error computing tree difference between tree %ld and tree %ld\n", ntree1, ntree2); embExitBad(); } for ( index1 = 0; index1 < patternsz1; index1++ ) { if ( !group_is_null(tree1, index1) ) { if ( lengths1[index1] == -1 ) { printf( "Error: tree %ld is missing a length from at least one branch\n", ntree1); embExitBad(); } } } for ( index2 = 0; index2 < patternsz2; index2++ ) { if ( !group_is_null(tree2, index2) ) { if ( lengths2[index2] == -1 ) { printf( "Error: tree %ld is missing a length from at least one branch\n", ntree2); embExitBad(); } } } for ( index1 = 0 ; index1 < patternsz1; index1++ ) { /* For every element in the first tree, see if there's a match to it in the second tree. */ match_found = false; if ( group_is_null(tree1, index1) ) { /* When we've gone over all the elements in tree1, greater number of elements in tree2 will constitute that much more of a difference... */ while ( !group_is_null(tree2, index1) ) { return_value += pow(lengths1[index1], 2); index1++; } break; } for ( index2 = 0 ; index2 < patternsz2 ; index2++ ) { /* For every element in the second tree, see if any match the current element in the first tree. */ if ( group_is_null(tree2, index2) ) { /* When we've gone over all the elements in tree2 */ match_found = false; break; } else { /* Tentatively set match_found; will be changed later if neccessary. . . */ match_found = true; for ( i = 0 ; i < setsz ; i++ ) { /* See if we've got a match, */ if ( tree1[i][index1] != tree2[i][index2] ) match_found = false; } if ( match_found == true ) { break; } } } if ( match_found == false ) { return_value += pow(lengths1[index1], 2); } } for ( index2 = 0 ; index2 < patternsz2 ; index2++ ) { /* For every element in the second tree, see if there's a match to it in the first tree. */ match_found = false; if ( group_is_null(tree2, index2) ) { /* When we've gone over all the elements in tree2, greater number of elements in tree1 will constitute that much more of a difference... */ while ( !group_is_null(tree1, index2) ) { return_value += pow(lengths2[index2], 2); index2++; } break; } for ( index1 = 0 ; index1 < patternsz1 ; index1++ ) { /* For every element in the first tree, see if any match the current element in the second tree. */ if ( group_is_null(tree1, index1) ) { /* When we've gone over all the elements in tree2 */ match_found = false; break; } else { /* Tentatively set match_found; will be changed later if neccessary. . . */ match_found = true; for ( i = 0 ; i < setsz ; i++ ) { /* See if we've got a match, */ if ( tree1[i][index1] != tree2[i][index2] ) match_found = false; } if ( match_found == true ) { return_value += pow(lengths1[index1] - lengths2[index2], 2); break; } } } if ( match_found == false ) { return_value += pow(lengths2[index2], 2); } } if (return_value > 0.0) return_value = sqrt(return_value); else return_value = 0.0; return return_value; } long symetric_diff(group_type **tree1, group_type **tree2, long ntree1, long ntree2, long patternsz1, long patternsz2) { /* Compute the symmetric difference between 2 given trees. Return that value as a long. */ long index1, index2, return_value = 0; boolean match_found; long i; if (group_is_null (tree1, 0) || group_is_null (tree2, 0)) { printf ("Error computing tree difference.\n"); return 0; } for (index1 = 0 ; index1 < patternsz1 ; index1++) { /* For every element in the first tree, see if there's a match to it in the second tree. */ match_found = false; if (group_is_null (tree1, index1)) { /* When we've gone over all the elements in tree1, greater number of elements in tree2 will constitute that much more of a difference... */ while (! group_is_null (tree2, index1)) { return_value++; index1++; } break; } for (index2 = 0 ; index2 < patternsz2 ; index2++) { /* For every element in the second tree, see if any match the current element in the first tree. */ if (group_is_null (tree2, index2)) { /* When we've gone over all the elements in tree2 */ match_found = false; break; } else { /* Tentatively set match_found; will be changed later if neccessary. . . */ match_found = true; for (i = 0 ; i < setsz ; i++) { /* See if we've got a match, */ if (tree1[i][index1] != tree2[i][index2]) match_found = false; } if (match_found == true) { /* If the previous loop ran from 0 to setsz without setting match_found to false, */ break; } } } if (match_found == false) { return_value++; } } return return_value; } /* symetric_diff */ void output_double_distance(double diffd, long tree1, long tree2, long trees_in_1, long trees_in_2) { switch (tree_pairing) { case ADJACENT_PAIRS: if (output_scheme == VERBOSE ) { fprintf (outfile, "Trees %ld and %ld: %e\n", tree1, tree2, diffd); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %e\n", tree1, tree2, diffd); } break; case ALL_IN_FIRST: if (output_scheme == VERBOSE) { fprintf (outfile, "Trees %ld and %ld: %e\n", tree1, tree2, diffd); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %e\n", tree1, tree2, diffd ); } else if (output_scheme == FULL_MATRIX) { output_matrix_double(diffd, tree1, tree2, trees_in_1, trees_in_2); } break; case CORR_IN_1_AND_2: if (output_scheme == VERBOSE) { fprintf (outfile, "Tree pair %ld: %e\n", tree1, diffd); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %e\n", tree1, diffd); } break; case ALL_IN_1_AND_2: if (output_scheme == VERBOSE ) fprintf (outfile, "Trees %ld and %ld: %e\n", tree1, tree2, diffd); else if (output_scheme == SPARSE) fprintf (outfile, "%ld %ld %e\n", tree1, tree2, diffd); else if (output_scheme == FULL_MATRIX) { output_matrix_double(diffd, tree1, tree2, trees_in_1, trees_in_2); } break; } } /* output_double_distance */ void print_matrix_heading(long tree, long maxtree) { long i; if ( tree_pairing == ALL_IN_1_AND_2 ) { fprintf(outfile, "\n\nFirst\\ Second tree file:\n"); fprintf(outfile, "tree \\\n"); fprintf(outfile, "file: \\"); } else fprintf(outfile, "\n\n "); for ( i = tree ; i <= maxtree ; i++ ) { if ( dtype == PHYLIPSYMMETRIC ) fprintf(outfile, "%5ld ", i); else fprintf(outfile, " %7ld ", i); } fprintf(outfile, "\n"); if ( tree_pairing == ALL_IN_1_AND_2 ) fprintf(outfile, " \\"); else fprintf(outfile, " \\"); for ( i = tree ; i <= maxtree ; i++ ) { if ( dtype == PHYLIPSYMMETRIC ) fprintf(outfile, "------"); else fprintf(outfile, "------------"); } } void print_line_heading(long tree) { if ( tree_pairing == ALL_IN_1_AND_2 ) fprintf(outfile, "\n%4ld |", tree); else fprintf(outfile, "\n%5ld |", tree); } void output_matrix_double(double diffl, long tree1, long tree2, long trees_in_1, long trees_in_2) { if ( tree1 == 1 && ((tree2 - 1) % get_num_columns() == 0 || tree2 == 1 )) { if ( (tree_pairing == ALL_IN_FIRST && tree2 + get_num_columns() - 1 < trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree2 + get_num_columns() - 1 < trees_in_2)) { print_matrix_heading(tree2, tree2 + get_num_columns() - 1); } else { if ( tree_pairing == ALL_IN_FIRST) print_matrix_heading(tree2, trees_in_1); else print_matrix_heading(tree2, trees_in_2); } } if ( (tree2 - 1) % get_num_columns() == 0 || tree2 == 1) { print_line_heading(tree1); } fprintf(outfile, " %9g ", diffl); if ((tree_pairing == ALL_IN_FIRST && tree1 == trees_in_1 && tree2 == trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree1 == trees_in_1 && tree2 == trees_in_2)) fprintf(outfile, "\n\n\n"); } /* output_matrix_double */ void output_matrix_long(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2) { if ( tree1 == 1 && ((tree2 - 1) % get_num_columns() == 0 || tree2 == 1 )) { if ( (tree_pairing == ALL_IN_FIRST && tree2 + get_num_columns() - 1 < trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree2 + get_num_columns() - 1 < trees_in_2)) { print_matrix_heading(tree2, tree2 + get_num_columns() - 1); } else { if ( tree_pairing == ALL_IN_FIRST) print_matrix_heading(tree2, trees_in_1); else print_matrix_heading(tree2, trees_in_2); } } if ( (tree2 - 1) % get_num_columns() == 0 || tree2 == 1) { print_line_heading(tree1); } fprintf(outfile, "%4ld ", diffl); if ((tree_pairing == ALL_IN_FIRST && tree1 == trees_in_1 && tree2 == trees_in_1) || (tree_pairing == ALL_IN_1_AND_2 && tree1 == trees_in_1 && tree2 == trees_in_2)) fprintf(outfile, "\n\n\n"); } /* output_matrix_long */ void output_long_distance(long diffl, long tree1, long tree2, long trees_in_1, long trees_in_2) { switch (tree_pairing) { case ADJACENT_PAIRS: if (output_scheme == VERBOSE ) { fprintf (outfile, "Trees %ld and %ld: %ld\n", tree1, tree2, diffl); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %ld\n", tree1, tree2, diffl); } break; case ALL_IN_FIRST: if (output_scheme == VERBOSE) { fprintf (outfile, "Trees %ld and %ld: %ld\n", tree1, tree2, diffl); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld %ld\n", tree1, tree2, diffl ); } else if (output_scheme == FULL_MATRIX) { output_matrix_long(diffl, tree1, tree2, trees_in_1, trees_in_2); } break; case CORR_IN_1_AND_2: if (output_scheme == VERBOSE) { fprintf (outfile, "Tree pair %ld: %ld\n", tree1, diffl); } else if (output_scheme == SPARSE) { fprintf (outfile, "%ld %ld\n", tree1, diffl); } break; case ALL_IN_1_AND_2: if (output_scheme == VERBOSE) fprintf (outfile, "Trees %ld and %ld: %ld\n", tree1, tree2, diffl); else if (output_scheme == SPARSE) fprintf (outfile, "%ld %ld %ld\n", tree1, tree2, diffl); else if (output_scheme == FULL_MATRIX ) { output_matrix_long(diffl, tree1, tree2, trees_in_1, trees_in_2); } break; } } void tree_diff(group_type **tree1, group_type **tree2, double *lengths1, double* lengths2, long patternsz1, long patternsz2, long ntree1, long ntree2, long trees_in_1, long trees_in_2) { long diffl; double diffd; switch (dtype) { case PHYLIPSYMMETRIC: diffl = symetric_diff (tree1, tree2, ntree1, ntree2, patternsz1, patternsz2); diffl += symetric_diff (tree2, tree1, ntree1, ntree2, patternsz2, patternsz1); output_long_distance(diffl, ntree1, ntree2, trees_in_1, trees_in_2); break; case PHYLIPBSD: diffd = bsd_tree_diff(tree1, tree2, ntree1, ntree2, lengths1, lengths2, patternsz1, patternsz2); output_double_distance(diffd, ntree1, ntree2, trees_in_1, trees_in_2); break; } } /* tree_diff */ int get_num_columns(void) { if ( dtype == PHYLIPSYMMETRIC ) return 10; else return 7; } /* get_num_columns */ void compute_distances(pattern_elm ***pattern_array, long trees_in_1, long trees_in_2) { /* Compute symmetric distances between arrays of trees */ long tree_index, end_tree, index1, index2, index3; group_type **treeA, **treeB; long patternsz1, patternsz2; double *length1 = NULL, *length2 = NULL; int num_columns = get_num_columns(); index1 = 0; /* Put together space for treeA and treeB */ treeA = (group_type **) Malloc (setsz * sizeof (group_type *)); treeB = (group_type **) Malloc (setsz * sizeof (group_type *)); print_header(trees_in_1, trees_in_2); switch (tree_pairing) { case ADJACENT_PAIRS: /* For every tree, compute the distance between it and the tree at the next location; do this in both directions */ end_tree = trees_in_1 - 1; for (tree_index = 0 ; tree_index < end_tree ; tree_index += 2) { assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_tree (treeB, pattern_array, tree_index + 1, &patternsz2); assign_lengths(&length1, pattern_array, tree_index); assign_lengths(&length2, pattern_array, tree_index + 1); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, tree_index+1, tree_index+2, trees_in_1, trees_in_2); if (tree_index + 2 == end_tree) printf("\nWARNING: extra tree at the end of input tree file.\n"); } break; case ALL_IN_FIRST: /* For every tree, compute the distance between it and every other tree in that file. */ end_tree = trees_in_1; if ( output_scheme != FULL_MATRIX ) { /* verbose or sparse output */ for (index1 = 0 ; index1 < end_tree ; index1++) { assign_tree (treeA, pattern_array, index1, &patternsz1); assign_lengths(&length1, pattern_array, index1); for (index2 = 0 ; index2 < end_tree ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, index1 + 1, index2 + 1, trees_in_1, trees_in_2); } } } else { /* full matrix output */ for ( index3 = 0 ; index3 < trees_in_1 ; index3 += num_columns) { for ( index1 = 0 ; index1 < trees_in_1 ; index1++) { assign_tree (treeA, pattern_array, index1, &patternsz1); assign_lengths(&length1, pattern_array, index1); for ( index2 = index3 ; index2 < index3 + num_columns && index2 < trees_in_1 ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, index1 + 1, index2 + 1, trees_in_1, trees_in_2); } } } } break; case CORR_IN_1_AND_2: if (trees_in_1 != trees_in_2) { /* Set end tree to the smaller of the two totals. */ end_tree = trees_in_1 > trees_in_2 ? trees_in_2 : trees_in_1; /* Print something out to the outfile and to the terminal. */ fprintf(outfile, "\n\n" "*** Warning: differing number of trees in first and second\n" "*** tree files. Only computing %ld pairs.\n" "\n", end_tree ); printf( "\n" " *** Warning: differing number of trees in first and second\n" " *** tree files. Only computing %ld pairs.\n" "\n", end_tree ); } else end_tree = trees_in_1; for (tree_index = 0 ; tree_index < end_tree ; tree_index++) { /* For every tree, compute the distance between it and the tree at the parallel location in the other file; do this in both directions */ assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_lengths(&length1, pattern_array, tree_index); /* (tree_index + trees_in_1) will be the corresponding tree in the second file. */ assign_tree (treeB, pattern_array, tree_index + trees_in_1, &patternsz2); assign_lengths(&length2, pattern_array, tree_index + trees_in_1); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, tree_index + 1, 0, trees_in_1, trees_in_2); } break; case ALL_IN_1_AND_2: end_tree = trees_in_1 + trees_in_2; if ( output_scheme != FULL_MATRIX ) { for (tree_index = 0 ; tree_index < trees_in_1 ; tree_index++) { /* For every tree in the first file, compute the distance between it and every tree in the second file. */ assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_lengths(&length1, pattern_array, tree_index); for (index2 = trees_in_1 ; index2 < end_tree ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff(treeA, treeB, length1, length2, patternsz1, patternsz2, tree_index + 1 , index2 + 1, trees_in_1, trees_in_2); } } for ( ; tree_index < end_tree ; tree_index++) { /* For every tree in the second file, compute the distance between it and every tree in the first file. */ assign_tree (treeA, pattern_array, tree_index, &patternsz1); assign_lengths(&length1, pattern_array, tree_index); for (index2 = 0 ; index2 < trees_in_1 ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2 , patternsz1, patternsz2, tree_index + 1, index2 + 1, trees_in_1, trees_in_2); } } } else { for ( index3 = trees_in_1 ; index3 < end_tree ; index3 += num_columns) { for ( index1 = 0 ; index1 < trees_in_1 ; index1++) { assign_tree (treeA, pattern_array, index1, &patternsz1); assign_lengths(&length1, pattern_array, index1); for ( index2 = index3 ; index2 < index3 + num_columns && index2 < end_tree ; index2++) { assign_tree (treeB, pattern_array, index2, &patternsz2); assign_lengths(&length2, pattern_array, index2); tree_diff (treeA, treeB, length1, length2, patternsz1, patternsz2, index1 + 1, index2 - trees_in_1 + 1, trees_in_1, trees_in_2); } } } } break; } /* Free up treeA and treeB */ free (treeA); free (treeB); } /* compute_distances */ void free_patterns(pattern_elm ***pattern_array, long total_trees) { long i, j; /* Free each pattern array, */ for (i=0 ; i < setsz ; i++) { for (j = 0 ; j < total_trees ; j++) { free (pattern_array[i][j]->apattern); free (pattern_array[i][j]->patternsize); free (pattern_array[i][j]->length); free (pattern_array[i][j]); } free (pattern_array[i]); } free (pattern_array); } /* free_patterns */ void print_header(long trees_in_1, long trees_in_2) { /*long end_tree;*/ switch (tree_pairing) { case ADJACENT_PAIRS: /*end_tree = trees_in_1 - 1;*/ if (output_scheme == VERBOSE) { fprintf(outfile, "\n" "Tree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) fprintf (outfile, "Branch score distances between adjacent pairs of trees:\n" "\n"); else fprintf (outfile, "Symmetric differences between adjacent pairs of trees:\n\n"); } else if ( output_scheme != SPARSE) printf ("Error -- cannot output adjacent pairs into a full matrix.\n"); break; case ALL_IN_FIRST: /*end_tree = trees_in_1;*/ if (output_scheme == VERBOSE) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) fprintf (outfile, "Branch score distances between all pairs of trees in tree file\n\n"); else fprintf (outfile, "Symmetric differences between all pairs of trees in tree file:\n\n"); } else if (output_scheme == FULL_MATRIX) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) fprintf (outfile, "Branch score distances between all pairs of trees in tree file:\n\n"); else fprintf (outfile, "Symmetric differences between all pairs of trees in tree file:\n\n"); } break; case CORR_IN_1_AND_2: if (output_scheme == VERBOSE) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) { fprintf (outfile, "Branch score distances between corresponding pairs of trees\n"); fprintf (outfile, " from first and second tree files:\n\n"); } else { fprintf (outfile, "Symmetric differences between corresponding pairs of trees\n"); fprintf (outfile, " from first and second tree files:\n\n"); } } else if (output_scheme != SPARSE) printf ( "Error -- cannot output corresponding pairs into a full matrix.\n"); break; case (ALL_IN_1_AND_2) : if ( output_scheme == VERBOSE) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); if (dtype == PHYLIPBSD) { fprintf (outfile, "Branch score distances between all pairs of trees\n"); fprintf (outfile, " from first and second tree files:\n\n"); } else { fprintf(outfile,"Symmetric differences between all pairs of trees\n"); fprintf(outfile," from first and second tree files:\n\n"); } } else if ( output_scheme == FULL_MATRIX) { fprintf(outfile, "\nTree distance program, version %s\n\n", VERSION); } break; } } /* print_header */ void output_submenu() { /* this allows the user to select a different output of distances scheme. */ long loopcount; boolean done = false; Char ch; if (tree_pairing == NO_PAIRING) return; loopcount = 0; while (!done) { printf ("\nDistances output options:\n"); if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == ALL_IN_FIRST)) printf (" F Full matrix.\n"); printf (" V One pair per line, verbose.\n"); printf (" S One pair per line, sparse.\n"); if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == ALL_IN_FIRST)) printf ("\n Choose one: (F,V,S)\n"); else printf ("\n Choose one: (V,S)\n"); fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); uppercase(&ch); if (strchr("FVS", ch) != NULL) { switch (ch) { case 'F': if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == ALL_IN_FIRST)) output_scheme = FULL_MATRIX; else /* If this can't be a full matrix... */ continue; break; case 'V': output_scheme = VERBOSE; break; case 'S': output_scheme = SPARSE; break; } done = true; } countup(&loopcount, 10); } } /* output_submenu */ void pairing_submenu() { /* this allows the user to select a different tree pairing scheme. */ long loopcount; boolean done = false; Char ch; loopcount = 0; while (!done) { cleerhome(); printf ("Tree Pairing Submenu:\n"); printf (" A Distances between adjacent pairs in tree file.\n"); printf (" P Distances between all possible pairs in tree file.\n"); printf (" C Distances between corresponding pairs in one tree file and another.\n"); printf (" L Distances between all pairs in one tree file and another.\n"); printf ("\n Choose one: (A,P,C,L)\n"); fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); uppercase(&ch); if (strchr("APCL", ch) != NULL) { switch (ch) { case 'A': tree_pairing = ADJACENT_PAIRS; break; case 'P': tree_pairing = ALL_IN_FIRST; break; case 'C': tree_pairing = CORR_IN_1_AND_2; break; case 'L': tree_pairing = ALL_IN_1_AND_2; break; } output_submenu(); done = true; } countup(&loopcount, 10); } } /* pairing_submenu */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr disttype = NULL; AjPStr tree_p = NULL; AjPStr style = NULL; dtype = PHYLIPBSD; tree_pairing = ADJACENT_PAIRS; output_scheme = VERBOSE; ibmpc = IBMCRT; ansi = ANSICRT; didreroot = false; spp = 0; grbg = NULL; col = 0; noroot = true; numopts = 0; outgrno = 1; outgropt = false; progress = true; /* The following are not used by treedist, but may be used in functions in cons.c, so we set them here. */ treeprint = false; trout = false; prntsets = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylotrees = ajAcdGetTree("intreefile"); trees_in_1 = 0; while (phylotrees[trees_in_1]) trees_in_1++; progress = ajAcdGetBoolean("progress"); outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; disttype = ajAcdGetListSingle("dtype"); if(ajStrMatchC(disttype, "s")) dtype = PHYLIPSYMMETRIC; else dtype = PHYLIPBSD; noroot = !ajAcdGetBoolean("noroot"); tree_p = ajAcdGetListSingle("pairing"); if(ajStrMatchC(tree_p, "a")) tree_pairing = ADJACENT_PAIRS; else if(ajStrMatchC(tree_p, "p")) tree_pairing = ALL_IN_FIRST; style = ajAcdGetListSingle("style"); if(ajStrMatchC(style, "f")) output_scheme = FULL_MATRIX; else if(ajStrMatchC(style, "s")) output_scheme = SPARSE; else if(ajStrMatchC(style, "v")) output_scheme = VERBOSE; embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* embosss_getoptions */ int main(int argc, Char *argv[]) { pattern_elm ***pattern_array; long tip_count = 0; double ln_maxgrp; double ln_maxgrp1; double ln_maxgrp2; node * p; #ifdef MAC argc = 1; /* macsetup("Treedist", ""); */ argv[0] = "Treedist"; #endif init(argc, argv); emboss_getoptions("ftreedist",argc,argv); /* Initialize option-based variables, then ask for changes regarding their values. */ ntrees = 0.0; lasti = -1; /* read files to determine size of structures we'll be working with */ countcomma(ajStrGetuniquePtr(&phylotrees[0]->Tree),&tip_count); tip_count++; /* countcomma does a raw comma count, tips is one greater */ /* * EWFIX.BUG.756 -- this section may be killed if a good solution * to bug 756 is found * * inside cons.c there are several arrays which are allocated * to size "maxgrp", the maximum number of groups (sets of * tips more closely connected than the rest of the tree) we * can see as the code executes. * * We have two measures we use to determine how much space to * allot: * (1) based on the tip count of the trees in the infile * (2) based on total number of trees in infile, and * * (1) -- Tip Count Method * Since each group is a subset of the set of tips we must * represent at most pow(2,tips) different groups. (Technically * two fewer since we don't store the empty or complete subsets, * but let's keep this simple. * * (2) -- Total Tree Size Method * Each tree we read results in * singleton groups for each tip, plus * a group for each interior node except the root * Since the singleton tips are identical for each tree, this gives * a bound of #tips + ( #trees * (# tips - 2 ) ) * * * Ignoring small terms where expedient, either of the following should * result in an adequate allocation: * pow(2,#tips) * (#trees + 1) * #tips * * Since "maxgrp" is a limit on the number of items we'll need to put * in a hash, we double it to make space for quick hashing * * BUT -- all of this has the possibility for overflow, so -- let's * make the initial calculations with doubles and then convert * */ /* limit chosen to make hash arithmetic work */ maxgrp = LONG_MAX / 2; ln_maxgrp = log((double)maxgrp); /* 2 * (#trees + 1) * #tips */ ln_maxgrp1 = log(2.0 * (double)tip_count * ((double)trees_in_1 + (double)trees_in_2)); /* ln only for 2 * pow(2,#tips) */ ln_maxgrp2 = (double)(1 + tip_count) * log(2.0); /* now -- find the smallest of the three */ if(ln_maxgrp1 < ln_maxgrp) { maxgrp = 2 * (trees_in_1 + trees_in_2 + 1) * tip_count; ln_maxgrp = ln_maxgrp1; } if(ln_maxgrp2 < ln_maxgrp) { maxgrp = pow(2,tip_count+1); } /* Read the (first) tree file and put together grouping, order, and timesseen */ read_groups (&pattern_array, trees_in_1 + trees_in_2, tip_count, phylotrees); if ((tree_pairing == ADJACENT_PAIRS) || (tree_pairing == ALL_IN_FIRST)) { /* Here deal with the adjacent or all-in-first pairing difference computation */ compute_distances (pattern_array, trees_in_1, 0); } else if (tree_pairing == NO_PAIRING) { /* Compute the consensus tree. */ putc('\n', outfile); /* consensus(); Reserved for future development */ } if (progress) printf("\nOutput written to file \"%s\"\n\n", outfilename); FClose(outtree); FClose(intree); FClose(outfile); if ((tree_pairing == ALL_IN_1_AND_2) || (tree_pairing == CORR_IN_1_AND_2)) FClose(intree2); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif free_patterns (pattern_array, trees_in_1 + trees_in_2); clean_up_final(); /* clean up grbg */ p = grbg; while (p != NULL) { node * r = p; p = p->next; free(r->nodeset); free(r->view); free(r); } printf("Done.\n\n"); embExit(); return 0; } /* main */ PHYLIPNEW-3.69.650/src/draw2.c0000664000175000017500000015536711605067345012270 00000000000000 #include /* Metrowerks for windows defines WIN32 here */ #define swap_m(x,y) temp = y,y=x,x=temp; extern long winheight; extern long winwidth; #ifdef WIN32 #include HDC hdc; /******* Menu Defines *******/ #define IDM_ABOUT 1000 #define IDM_PLOT 1001 #define IDM_CHANGE 1002 #define IDM_QUIT 1003 #define XWINPERCENT 0.66 #define YWINPERCENT 0.66 #endif #ifdef QUICKC extern struct videoconfig myscreen; #endif #ifdef OSX_CARBON #include #endif #include "draw.h" #include "phylip.h" static long eb[]={ 0 , 1 ,2 ,3 ,55,45,46,47,22,5,37,11,12,13,14,15,16,17,18,19,60,61,50,38, 24, 25,63,39,28,29,30,31,64,90,127,123,91,108,80,125,77,93,92,78,107,96, 75,97,240,241,242,243,244,245,246,247,248,249,122,94,76,126,110,111, 124, 193,194,195,196,197,198,199,200,201,209,210,211, 212,213,214,215,216,217, 226,227,228,229,230,231,232,233,173,224,189, 95,109,121,129,130,131,132, 133,134,135,136,137,145,146,147,148,149,150, 151, 152,153,162,163,164,165, 166,167,168,169,192,79,208,161,7}; double oldxreal, oldyreal; boolean didenter, didexit, curvetrue; extern long vrmlplotcolor; extern double oldx, oldy ; extern node *root; extern long nmoves, oldpictint ; extern long rootmatrix[51][51]; extern long strpbottom,strptop,strpwide,strpdeep; extern boolean dotmatrix, empty, previewing; extern double ynow, ysize, xsize, yunitspercm; extern FILE *plotfile; extern plottertype plotter; extern striptype stripe; extern long vrmltreecolor, vrmlnamecolor, vrmlskycolorfar, vrmlskycolornear, vrmlgroundcolorfar, vrmlgroundcolornear; extern colortype colors[7]; extern vrmllighttype vrmllights[3]; extern double pie; double pie = 3.141592654; /* Added by Dan F. for the new previewing paradigm */ extern double labelline,linewidth,oldxhigh,oldxlow,oldyhigh,oldylow, vrmllinewidth, raylinewidth,treeline,oldxsize,oldysize,oldxunitspercm, oldyunitspercm,oldxcorner,oldycorner,clipx0,clipx1,clipy0,clipy1; /* func. protocol added for vrml - danieyek 981111 */ extern long strpdiv,hpresolution; extern boolean preview,pictbold,pictitalic, pictshadow,pictoutline; extern double expand,xcorner,xnow,xscale,xunitspercm, ycorner,yscale,labelrotation, labelheight,ymargin,pagex,pagey,paperx,papery,hpmargin,vpmargin; extern long filesize; extern growth grows; extern enum {yes,no} penchange,oldpenchange; extern plottertype oldplotter,previewer; extern char resopts; extern winactiontype winaction; #ifndef OLDC /* function prototypes */ void plotdot(long, long); void circlepoints(int, int, int, int); void drawpen(long, long, long); void drawfatline(long, long, long, long, long); void idellipse(double, double); void splyne(double,double,double,double,boolean,long,boolean,boolean); static void putshort(FILE *, int); static void putint(FILE *, int); void reverse_bits (byte *, int); void makebox_no_interaction(char *, double *, double *, double *, long); void void_func(void); /* function prototypes */ #endif void plotdot(long ix, long iy) { /* plot one dot at ix, iy */ long ix0, iy0, iy1 = 0, iy2 = 0; iy0 = strptop - iy; if ((unsigned)iy0 > strpdeep || ix <= 0 || ix > strpwide) return; empty = false; ix0 = ix; switch (plotter) { case citoh: iy1 = 1; iy2 = iy0; break; case epson: iy1 = 1; iy2 = 7 - iy0; break; case oki: iy1 = 1; iy2 = 7 - iy0; break; case toshiba: iy1 = iy0 / 6 + 1; iy2 = 5 - iy0 % 6; break; case pcx: iy1 = iy0 + 1; ix0 = (ix - 1) / 8 + 1; iy2 = 7 - ((ix - 1) & 7); break; case pcl: iy1 = iy0 + 1; ix0 = (ix - 1) / 8 + 1; iy2 = 7 - ((ix - 1) & 7); break; case bmp: iy1 = iy0 + 1; ix0 = (ix - 1) / 8 + 1; iy2 = 7 - ((ix - 1) & 7); case xbm: case gif: iy1 = iy0 + 1; ix0 = (ix - 1) / 8 + 1; iy2 = (ix - 1) & 7; break; case lw: case hp: case tek: case mac: case houston: case decregis: case fig: case pict: case ray: case pov: case idraw: case ibm: case other: break; default: /* cases xpreview and vrml not handled */ break; /* code for making dot array for a new printer goes here */ } stripe[iy1 - 1][ix0 - 1] |= (unsigned char)1< x){ if (d < 0) { d = d + deltaE; deltaE += 2; deltaSE += 2; x++; } else { d+=deltaSE; deltaE += 2; deltaSE += 4; x++; y--; } circlepoints(x,y,x0,y0); } } /* drawpen */ void drawfatline(long ixabs, long iyabs, long ixnow, long iynow, long penwide) { long temp, xdiff, ydiff, err, x1, y1; didenter = false; didexit = false; if (ixabs < ixnow) { temp = ixnow; ixnow = ixabs; ixabs = temp; temp = iynow; iynow = iyabs; iyabs = temp; } xdiff = ixabs - ixnow; ydiff = iyabs - iynow; if (ydiff >= 0) { if (xdiff >= ydiff) { err = -(xdiff / 2); x1 = ixnow; while (x1 <= ixabs && !(didenter && didexit)) { drawpen(x1, iynow, penwide); err += ydiff; if (err > 0) { iynow++; err -= xdiff; } x1++; } return; } err = -(ydiff / 2); y1 = iynow; while (y1 < iyabs && !(didenter && didexit)) { drawpen(ixnow, y1, penwide); err += xdiff; if (err > 0) { ixnow++; err -= ydiff; } y1++; } return; } if (xdiff < -ydiff) { err = ydiff / 2; y1 = iynow; while (y1 >= iyabs && !(didenter && didexit)) { drawpen(ixnow, y1, penwide); err += xdiff; if (err > 0) { ixnow++; err += ydiff; } y1--; } return; } err = -(xdiff / 2); x1 = ixnow; while (x1 <= ixabs && !(didenter && didexit)) { drawpen(x1, iynow, penwide); err -= ydiff; if (err > 0) { iynow--; err -= xdiff; } x1++; } } /* drawfatline */ void plot(pensttstype pen, double xabs, double yabs) { long xhigh, yhigh, xlow, ylow, ixnow, iynow, ixabs, iyabs, cdx, /*cdy,*/ temp, i; long pictint; double newx, newy, /*dx, dy,*/ lscale, dxreal, dyreal; Char picthi, pictlo; /* added to give every line a name in vrml! - danieyek 981110 */ static long lineCount = 0; /* Record the first node as the coordinate for viewpoint! */ static int firstNodeP=1; double distance, angle; double episilon = 1.0e-10; /* For povray, added by Dan F. */ char texture_string[7]; /* remember to respect & translate for clipping region, clip{x,y}{0,1} */ if (!dotmatrix || previewing) { switch (plotter) { case xpreview: if (pen == pendown) { #ifndef X_DISPLAY_MISSING XDrawLine(display,mainwin,gc1,(long)oldx,(long)(height-oldy), (long)xabs,(long)(height-yabs)); #endif } oldx = xabs; oldy = yabs; break; case winpreview: #ifdef WIN32 if (pen == pendown) { LineTo(hdc, (int) xabs, (int)(winheight-yabs)); } else { MoveToEx(hdc, (int) xabs, (int) (winheight-yabs), (LPPOINT) NULL); } #endif break; case tek: if (pen == penup) { if (previewing) putchar('\035'); else putc('\035', plotfile); } ixnow = (long)floor(xabs + 0.5); iynow = (long)floor(yabs + 0.5); xhigh = ixnow / 32; yhigh = iynow / 32; xlow = ixnow & 31; ylow = iynow & 31; if (!ebcdic) { if (yhigh != oldyhigh) { if (previewing) putchar(yhigh + 32); else putc(yhigh + 32, plotfile); } if (ylow != oldylow || xhigh != oldxhigh) { if (previewing) putchar(ylow + 96); else putc(ylow + 96, plotfile); } if (xhigh != oldxhigh) { if (previewing) putchar(xhigh + 32); else putc(xhigh + 32, plotfile); } if (previewing) putchar(xlow + 64); else putc(xlow + 64, plotfile); } else { /* DLS/JMH -- for systems that use EBCDIC coding */ if (yhigh != oldyhigh) { if (previewing) putchar(eb[yhigh + 32]); else putc(eb[yhigh + 32], plotfile); } if (ylow != oldylow || xhigh != oldxhigh) { if (previewing) putchar(eb[ylow + 96]); else putc(eb[ylow + 96], plotfile); } if (xhigh != oldxhigh) { if (previewing) putchar(eb[xhigh + 32]); else putc(eb[xhigh + 32], plotfile); } if (previewing) putchar(eb[xlow + 64]); else putc(eb[xlow + 64], plotfile); } oldxhigh = xhigh; oldxlow = xlow; oldyhigh = yhigh; oldylow = ylow; break; case hp: if (pen == pendown) fprintf(plotfile, "PD"); else fprintf(plotfile, "PU"); pout((long)floor(xabs + 0.5)); putc(',', plotfile); pout((long)floor(yabs + 0.5)); fprintf(plotfile, ";\n"); break; case pict: newx = floor(xabs + 0.5); newy = floor(ysize * yunitspercm - yabs + 0.5); if (pen == pendown) { if (linewidth > 5) { dxreal = xabs - oldxreal; dyreal = yabs - oldyreal; lscale = sqrt(dxreal * dxreal + dyreal * dyreal) / (fabs(dxreal) + fabs(dyreal)); pictint = (long)(lscale * linewidth + 0.5); if (pictint == 0) pictint = 1; if (pictint != oldpictint) { picthi = (Char)(pictint / 256); pictlo = (Char)(pictint & 255); fprintf(plotfile, "\007%c%c%c%c", picthi, pictlo, picthi, pictlo); } oldpictint = pictint; } fprintf(plotfile, " %c%c%c%c", (Char)((long) oldy / 256), (Char)((long) oldy & 255), (Char)((long) oldx / 256), (Char)((long) oldx & 255)); fprintf(plotfile, "%c%c%c%c", (Char)((long)newy / 256), (Char)((long)newy & 255), (Char)((long)newx / 256), (Char)((long)newx & 255)); } oldxreal = xabs; oldyreal = yabs; oldx = newx; oldy = newy; break; case ray: if (pen == pendown) { if (linewidth != treeline) { if (raylinewidth > labelline) { raylinewidth = labelline; fprintf(plotfile, "end\n\n"); fprintf(plotfile, "name species_names\n"); fprintf(plotfile, "grid 22 22 22\n"); } } if (oldxreal != xabs || oldyreal != yabs) { raylinewidth *= 0.99999; fprintf(plotfile, "cylinder %8.7f %6.3f 0 %6.3f %6.3f 0 %6.3f\n", raylinewidth, oldxreal, oldyreal, xabs, yabs); fprintf(plotfile, "sphere %8.7f %6.3f 0 %6.3f\n", raylinewidth, xabs, yabs); } } oldxreal = xabs; oldyreal = yabs; break; case pov: /* Default to writing out tree texture... */ strcpy (texture_string, TREE_TEXTURE); if (pen == pendown) { if (linewidth != treeline) { /* Change the texture to name texture */ strcpy (texture_string, NAME_TEXTURE); if (raylinewidth > labelline) { raylinewidth = labelline; fprintf(plotfile, "\n// Now, the species names:\n\n"); } } if (oldxreal != xabs || oldyreal != yabs) { raylinewidth *= 0.99999; fprintf(plotfile, "cylinder { <%6.3f, 0, %6.3f,>, <%6.3f, 0, %6.3f>, %8.7f \n", oldxreal, oldyreal, xabs, yabs, raylinewidth); fprintf(plotfile, "\ttexture { %s } }\n", texture_string); fprintf(plotfile, "sphere { <%6.3f, 0, %6.3f>, %8.7f \n", xabs, yabs, raylinewidth); fprintf(plotfile, "\ttexture { %s } }\n", texture_string); } } oldxreal = xabs; oldyreal = yabs; break; case lw: if (pen == pendown){ /* If there's NO possibility that the line interesects the page, * leave it out. Otherwise, let postscript clip it to the page. */ if (!((xabs > clipx1*xunitspercm && oldx > clipx1*xunitspercm) || (xabs < clipx0*xunitspercm && oldx < clipx0*xunitspercm) || (yabs > clipy1*yunitspercm && oldy > clipy1*yunitspercm) || (yabs < clipy0*yunitspercm && oldy < clipy0*yunitspercm))) fprintf(plotfile, "%8.2f %8.2f %8.2f %8.2f l\n", oldx-(clipx0*xunitspercm), oldy-(clipy0*yunitspercm), xabs-(clipx0*xunitspercm), yabs-(clipy0*yunitspercm)); } oldx = xabs, oldy = yabs; break; case idraw: if (pen == pendown) { fprintf(plotfile, "Begin %%I Line\n"); fprintf(plotfile, "%%I b 65535\n"); fprintf(plotfile, "%d 0 0 [] 0 SetB\n", ((linewidth>=1.0) ? (int)linewidth : 1)); fprintf(plotfile, "%%I cfg Black\n"); fprintf(plotfile, "0 0 0 SetCFg\n"); fprintf(plotfile, "%%I cbg White\n"); fprintf(plotfile, "1 1 1 SetCBg\n"); fprintf(plotfile, "%%I p\n"); fprintf(plotfile, "0 SetP\n"); fprintf(plotfile, "%%I t\n"); fprintf(plotfile, "[ 0.01 0 0 0.01 216 285 ] concat\n"); fprintf(plotfile, "%%I\n"); fprintf(plotfile, "%ld %ld %ld %ld Line\n", (long)(100.0 * (oldxreal+0.5)), (long)(100.0 * (oldyreal+0.5)), (long)(100.0 * (xabs+0.5)), (long)(100.0 * (yabs+0.5))); fprintf(plotfile, "End\n\n"); if (linewidth >= 4.0) { fprintf(plotfile, "Begin %%I Elli\n"); fprintf(plotfile, "%%I b 65535\n"); fprintf(plotfile, "1 0 0 [] 0 SetB\n"); fprintf(plotfile, "%%I cfg Black\n"); fprintf(plotfile, "0 0 0 SetCFg\n"); fprintf(plotfile, "%%I cbg White\n"); fprintf(plotfile, "1 1 1 SetCBg\n"); fprintf(plotfile, "%%I p\n"); fprintf(plotfile, "0 SetP\n"); fprintf(plotfile, "%%I t\n"); fprintf(plotfile, "[ 0.01 0 0 0.01 216 285 ] concat\n"); fprintf(plotfile, "%%I\n"); fprintf(plotfile, "%ld %ld %ld %ld Elli\n", (long)(100.0 * (oldxreal+0.5)), (long)(100.0 * (oldyreal+0.5)), (long)(100.0 * (linewidth/2)) - 100, (long)(100.0 * (linewidth/2)) - 100); fprintf(plotfile, "End\n"); fprintf(plotfile, "Begin %%I Elli\n"); fprintf(plotfile, "%%I b 65535\n"); fprintf(plotfile, "1 0 0 [] 0 SetB\n"); fprintf(plotfile, "%%I cfg Black\n"); fprintf(plotfile, "0 0 0 SetCFg\n"); fprintf(plotfile, "%%I cbg White\n"); fprintf(plotfile, "1 1 1 SetCBg\n"); fprintf(plotfile, "%%I p\n"); fprintf(plotfile, "0 SetP\n"); fprintf(plotfile, "%%I t\n"); fprintf(plotfile, "[ 0.01 0 0 0.01 216 285 ] concat\n"); fprintf(plotfile, "%%I\n"); fprintf(plotfile, "%ld %ld %ld %ld Elli\n", (long)(100.0 * (xabs+0.5)), (long)(100.0 * (yabs+0.5)), (long)(100.0 * (linewidth/2)) - 100, (long)(100.0 * (linewidth/2)) - 100); fprintf(plotfile, "End\n"); } } oldxreal = xabs; oldyreal = yabs; break; case ibm: #ifdef TURBOC newx = floor(xabs + 0.5); newy = fabs(floor(yabs) - getmaxy()); if (pen == pendown) line((long)oldx,(long)oldy,(long)newx,(long)newy); oldx = newx; oldy = newy; #endif #ifdef QUICKC newx = floor(xabs + 0.5); newy = fabs(floor(yabs) - myscreen.numypixels); if (pen == pendown) _lineto((long)newx,(long)newy); else _moveto((long)newx,(long)newy); oldx = newx; oldy = newy; #endif break; case mac: #ifdef MAC if (pen == pendown){ LineTo((int)floor((double)xabs + 0.5), winheight - (long)floor((double)yabs + 0.5)+MAC_OFFSET);} else{ MoveTo((int)floor((double)xabs + 0.5), winheight - (long)floor((double)yabs + 0.5)+MAC_OFFSET);} #endif break; case houston: if (pen == pendown) fprintf(plotfile, "D "); else fprintf(plotfile, "U "); pout((long)((long)floor(xabs + 0.5))); putc(',', plotfile); pout((long)((long)floor(yabs + 0.5))); putc('\n', plotfile); break; case decregis: newx = floor(xabs + 0.5); newy = fabs(floor(yabs + 0.5) - 479); if (pen == pendown) { if (previewing) { printf("P["); pout((long)oldx); putchar(','); pout((long)oldy); printf("]V["); pout((long)newx); putchar(','); pout((long)newy); putchar(']'); } else { fprintf(plotfile, "P["); pout((long)oldx); putc(',', plotfile); pout((long)oldy); fprintf(plotfile, "]V["); pout((long)newx); putc(',', plotfile); pout((long)newy); putc(']', plotfile); } nmoves++; if (nmoves == 3) { nmoves = 0; if (previewing) putchar('\n'); else putc('\n', plotfile); } } oldx = newx; oldy = newy; break; case fig: newx = floor(xabs + 0.5); newy = floor(yabs + 0.5); if (pen == pendown) { fprintf(plotfile, "2 1 0 %5ld 0 0 0 0 0.000 0 0\n", (long)floor(linewidth + 0.5) + 1); fprintf(plotfile, "%5ld%5ld%5ld%5ld 9999 9999\n", (long)oldx, 606 - (long) oldy, (long)newx, 606 - (long)newy); fprintf(plotfile, "1 3 0 1 0 0 0 21 0.00 1 0.0 %5ld%5ld%5ld %5ld %5ld%5ld%5ld 349\n", (long)oldx, 606 - (long) oldy, (long)floor(linewidth / 2 + 0.5), (long)floor(linewidth / 2 + 0.5), (long)oldx, 606 - (long)oldy, 606 - (long)oldy); fprintf(plotfile, "1 3 0 1 0 0 0 21 0.00 1 0.0 %5ld%5ld%5ld %5ld %5ld%5ld%5ld 349\n", (long)newx, 606 - (long)newy, (long)floor(linewidth / 2 + 0.5), (long)floor(linewidth / 2 + 0.5), (long)newx, 606 - (long)newy, 606 - (long)newy); } oldx = newx; oldy = newy; break; case vrml: newx = xabs; newy = yabs; /* if this is the root node, use the coordinates to define the view point */ if (firstNodeP-- == 1) { fprintf(plotfile, "#VRML V2.0 utf8\n"); fprintf(plotfile, " NavigationInfo {\n"); fprintf(plotfile, " headlight FALSE\n"); fprintf(plotfile, " }\n"); fprintf(plotfile, " Viewpoint\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " position %f %f %f\n", xsize/2, ysize/2, ysize*1.2); fprintf(plotfile, " description \"Entry View\"\n"); fprintf(plotfile, " }\n"); for (i=0; i<3; i++) { fprintf(plotfile, " PointLight {\n"); fprintf(plotfile, " on TRUE\n"); fprintf(plotfile, " intensity %f\n", vrmllights[i].intensity); fprintf(plotfile, " ambientIntensity 0.0\n"); fprintf(plotfile, " color 1.0 1.0 1.0\n"); fprintf(plotfile, " location %f %f %f\n", vrmllights[i].x, vrmllights[i].y, vrmllights[i].z); fprintf(plotfile, " attenuation 0.0 0.0 0.0\n"); fprintf(plotfile, " radius 200.0\n"); fprintf(plotfile, " }\n"); } fprintf(plotfile, " Background\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " skyAngle [1.75]\n"); fprintf(plotfile, " skyColor [%f %f %f, %f %f %f]\n", colors[vrmlskycolornear-1].red, colors[vrmlskycolornear-1].green, colors[vrmlskycolornear-1].blue, colors[vrmlskycolorfar-1].red, colors[vrmlskycolorfar-1].green, colors[vrmlskycolorfar-1].blue); fprintf(plotfile, " groundAngle[0 1.57 3.14]\n"); fprintf(plotfile, " groundColor [0.9 0.9 0.9, 0.7 0.7 0.7, %f %f %f]\n", colors[vrmlgroundcolorfar-1].red, colors[vrmlgroundcolorfar-1].green, colors[vrmlgroundcolorfar-1].blue); fprintf(plotfile, " }\n"); } if (pen == penup) {/* pen down = beginning of a new path */ } else if (pen == pendown) {/* pen up = continue, line may not end yet. */ if (linewidth != treeline) { if (vrmllinewidth > labelline) { vrmllinewidth = labelline; vrmlplotcolor = vrmlnamecolor; } } distance = sqrt((newy - oldy)*(newy - oldy) + (newx - oldx)*(newx - oldx)); angle = computeAngle(oldx, oldy, newx, newy); if (distance >= episilon) { fprintf(plotfile, " DEF Line%ld Transform\n", lineCount++); fprintf(plotfile, " {\n"); fprintf(plotfile, " rotation 0 0 1 %f\n", angle); fprintf(plotfile, " translation %f %f 0\n", oldx, oldy); fprintf(plotfile, " children\n"); fprintf(plotfile, " [\n"); fprintf(plotfile, " Shape\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " appearance Appearance\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " material Material { diffuseColor %f %f %f}\n", colors[vrmlplotcolor-1].red, colors[vrmlplotcolor-1].green, colors[vrmlplotcolor-1].blue); fprintf(plotfile, " }\n"); fprintf(plotfile, " geometry Sphere\n"); fprintf(plotfile, " {\n"); /* vrmllinewidth *= 0.99999; */ fprintf(plotfile, " radius %f\n", vrmllinewidth); fprintf(plotfile, " }\n"); fprintf(plotfile, " }\n"); fprintf(plotfile, " Transform\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " rotation 0 0 1 -1.570796327\n" ); fprintf(plotfile, " translation %f 0 0\n", distance/2); fprintf(plotfile, " children\n"); fprintf(plotfile, " [\n"); fprintf(plotfile, " Shape\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " appearance Appearance\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " material Material { diffuseColor %f %f %f}\n", colors[vrmlplotcolor-1].red, colors[vrmlplotcolor-1].green, colors[vrmlplotcolor-1].blue ); fprintf(plotfile, " }\n"); fprintf(plotfile, " geometry Cylinder\n"); fprintf(plotfile, " {\n"); /* line radius affects end sphere's size */ /* vrmllinewidth *= 0.99999; */ fprintf(plotfile, " radius %f\n", vrmllinewidth); fprintf(plotfile, " height %f\n", distance); fprintf(plotfile, " }\n"); fprintf(plotfile, " }\n"); fprintf(plotfile, " ]\n"); fprintf(plotfile, " }\n"); fprintf(plotfile, " Transform\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " translation %f 0 0\n", distance); fprintf(plotfile, " children\n"); fprintf(plotfile, " [\n"); fprintf(plotfile, " Shape\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " appearance Appearance\n"); fprintf(plotfile, " {\n"); fprintf(plotfile, " material Material { diffuseColor %f %f %f}\n", colors[vrmlplotcolor-1].red, colors[vrmlplotcolor-1].green, colors[vrmlplotcolor-1].blue ); fprintf(plotfile, " }\n"); fprintf(plotfile, " geometry Sphere\n"); fprintf(plotfile, " {\n"); /* radius affects line size */ /* vrmllinewidth *= 0.99999; */ fprintf(plotfile, " radius %f\n", vrmllinewidth); fprintf(plotfile, " }\n"); fprintf(plotfile, " }\n"); fprintf(plotfile, " ]\n"); fprintf(plotfile, " }\n"); fprintf(plotfile, " ]\n"); fprintf(plotfile, " }\n"); } } else { fprintf(stderr, "ERROR: Programming error in plot()."); } oldx = newx; oldy = newy; break; case epson: case oki: case citoh: case toshiba: case pcx: case pcl: case bmp: case xbm: case gif: case other: break; /* code for a pen move on a new plotter goes here */ } return; } if (pen == pendown) { ixabs = (long)floor(xabs + 0.5); iyabs = (long)floor(yabs + 0.5); ixnow = (long)floor(xnow + 0.5); iynow = (long)floor(ynow + 0.5); if (ixnow > ixabs) { temp = ixnow; ixnow = ixabs; ixabs = temp; temp = iynow; iynow = iyabs; iyabs = temp; } /*dx = ixabs - ixnow; dy = iyabs - iynow;*/ /* if (dx + fabs(dy) <= 0.0) c = 0.0; else c = 0.5 * linewidth / sqrt(dx * dx + dy * dy); */ cdx = (long)floor(linewidth + 0.5); /*cdy = (long)floor(linewidth + 0.5);*/ if ((iyabs + cdx >= strpbottom || iynow + cdx >= strpbottom) && (iyabs - cdx <= strptop || iynow - cdx <= strptop)) { drawfatline(ixnow,iynow,ixabs,iyabs,(long)floor(linewidth+0.5)); } } xnow = xabs; ynow = yabs; /* Bitmap Code to plot (xnow,ynow) to (xabs,yabs) */ } /* plot */ void idellipse(double x, double y) { fprintf(plotfile, "Begin %%I Elli\n"); fprintf(plotfile, "%%I b 65535\n"); fprintf(plotfile, "1 0 0 [] 0 SetB\n"); fprintf(plotfile, "%%I cfg Black\n"); fprintf(plotfile, "0 0 0 SetCFg\n"); fprintf(plotfile, "%%I cbg White\n"); fprintf(plotfile, "1 1 1 SetCBg\n"); fprintf(plotfile, "%%I p\n"); fprintf(plotfile, "0 SetP\n"); fprintf(plotfile, "%%I t\n"); fprintf(plotfile, "[ 0.01 0 0 0.01 216 285 ] concat\n"); fprintf(plotfile, "%%I\n"); fprintf(plotfile, "%ld %ld %ld %ld Elli\n", (long)(100.0 * (x+0.5)),(long)(100.0 * (y+0.5)), (long)(100.0 * (linewidth/2)) - 100, (long)(100.0 * (linewidth/2)) - 100); fprintf(plotfile, "End\n"); } /* idellipse */ void splyne(double x1, double y1, double x2, double y2, boolean sense, long segs, boolean head, boolean tail) { /* sense is true if line departing from x1,y1 is tangential to x, false if tangential to y */ long i,fromx,fromy,tox,toy; double f, g, h, x3, y3; long ptop, pleft, pbottom, pright, startangle, arcangle; double dtheta; double sintheta,costheta,sindtheta,cosdtheta,newsintheta,newcostheta; double rx,ry; /* axes of ellipse */ double ox,oy; /* center of ellipse */ double prevx,prevy; /*long pictint;*/ x1 = x1 - (clipx0 * xunitspercm); x2 = x2 - (clipx0 * xunitspercm); y1 = y1 - (clipy0 * yunitspercm); y2 = y2 - (clipy0 * yunitspercm); /* adjust by clipping region */ switch (plotter) { case lw: fprintf(plotfile,"stroke %8.2f %8.2f moveto\n",x1,y1); if (sense) fprintf(plotfile,"%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f curveto\n", (x1+(0.55*(x2-x1))), y1, x2, (y1+(0.45*(y2-y1))), x2, y2); else fprintf(plotfile,"%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f curveto\n", x1, (y1+(0.55*(y2-y1))), (x1+(0.45*(x2-x1))), y2, x2, y2); break; case pict: { double dtop, dleft, dbottom, dright,temp; if (x1 == x2 || y1 == y2) { plot(penup, x1, y1); plot(pendown, x2, y2); } else { if (x2 > x1 && y2 < y1){ swap_m(x2,x1); swap_m(y2,y1); sense = !sense; } y1 = (ysize * yunitspercm) - y1; y2 = (ysize * yunitspercm) - y2; if (sense) { if (x2 > x1) { dtop = y2 - y1 + y2; dleft = x1 - x2 + x1; dbottom = y1; dright = x2; startangle = 90; } else { dtop = y2 - y1 + y2; dleft = x2; dbottom = y1; dright = x1 + (x1 - x2); startangle = 180; } } else { if (x2 > x1) { dtop = y1 + (y1 - y2); dleft = x1; dbottom = y2; dright = x2 + (x2 - x1); startangle = 270; } else { dtop = y2; dleft = x1; dbottom = y1 + y1 - y2;; dright = x2 + (x2 - x1); startangle = 0; } } arcangle = 90; if (dbottom < dtop) {swap_m(dbottom,dtop);} if (dleft> dright) {swap_m(dleft,dright);} ptop = (long)floor((dtop - 0) + 0.5); pleft = (long)floor(dleft + 0.5); pbottom = (long)floor(dbottom + 0.5) + (long)floor(linewidth + 0.5); pright = (long)floor(dright + 0.5) + (long)floor(linewidth + 0.5); if (!sense) pbottom++; else if (x2 < x1) pright++; else pleft--; /*pictint = 1; */ fprintf(plotfile,"\140%c%c%c%c%c%c%c%c%c%c%c%c", (Char)(ptop / 256), (Char)(ptop % 256), (Char)(pleft / 256), (Char)(pleft % 256), (Char)(pbottom / 256), (Char)(pbottom % 256), (Char)(pright / 256), (Char)(pright % 256), (Char)(startangle / 256), (Char)(startangle % 256), (Char)(arcangle / 256), (Char)(arcangle % 256)); } } break; case fig: fromx = (long)floor(x1 + 0.5); fromy = (long)floor(y1 + 0.5); tox = (long)floor(x2 + 0.5); toy = (long)floor(y2 + 0.5); fprintf(plotfile, "3 0 0 %5ld 0 0 0 0 0.000 0 0\n", (long)floor(linewidth + 0.5) + 1); if (sense) fprintf(plotfile, "%5ld%5ld%5ld%5ld%5ld%5ld%5ld%5ld 9999 9999\n", fromx, 606 - fromy, (long)floor((x1+(0.55*(x2-x1))) + 0.5), 606 - fromy, tox, 606 - (long)floor((y1+(0.45*(y2-y1))) + 0.5), tox, 606 - toy); else fprintf(plotfile, "%5ld%5ld%5ld%5ld%5ld%5ld%5ld%5ld 9999 9999\n", fromx, 606 - fromy, fromx, 606 - (long)floor((y1+(0.55*(y2-y1))) + 0.5), (long)floor((x1+(0.45*(x2-x1))) + 0.5), 606 - toy, tox, 606 - toy); fprintf(plotfile, "1 3 0 1 0 0 0 21 0.00 1 0.0 "); fprintf(plotfile, "%5ld%5ld%5ld %5ld %5ld%5ld%5ld 349\n", fromx, 606 - fromy, (long)floor(linewidth / 2 + 0.5), (long)floor(linewidth / 2 + 0.5), fromx, 606 - fromy, 606 - fromy); fprintf(plotfile, "1 3 0 1 0 0 0 21 0.00 1 0.0 "); fprintf(plotfile, "%5ld%5ld%5ld %5ld %5ld%5ld%5ld 349\n", tox, 606 - toy, (long)floor(linewidth / 2 + 0.5), (long)floor(linewidth / 2 + 0.5), tox, 606 - toy, 606 - toy); break; case idraw: if (head){ fprintf(plotfile,"Begin %%I Pict\n%%I b u\n%%I cfg u\n%%I cbg u\n"); fprintf(plotfile,"%%I f u\n%%I p u \n%%I t u\n\n"); idellipse(x1,y1); fprintf(plotfile, "Begin %%I BSpl\n"); fprintf(plotfile, "%%I b 65535\n"); fprintf(plotfile, "%ld 0 0 [] 0 SetB\n", ((linewidth>=1.0) ? (long)linewidth : 1)); fprintf(plotfile, "%%I cfg Black\n"); fprintf(plotfile, "0 0 0 SetCFg\n"); fprintf(plotfile, "%%I cbg White\n"); fprintf(plotfile, "1 1 1 SetCBg\n"); fprintf(plotfile, "none SetP %%I p n\n"); fprintf(plotfile, "%%I t\n"); fprintf(plotfile, "[ 0.01 0 0 0.01 216 285 ] concat\n"); if (tail) fprintf(plotfile,"%%I %ld\n",segs+1); else fprintf(plotfile,"%%I %ld\n",(segs*2)+1); fprintf(plotfile, "%ld %ld\n", (long)(100.0 * (x1+0.5)), (long)(100.0 * (y1+0.5))); } rx = (fabs(x2 - x1)); ry = (fabs(y2 - y1)); if (!sense){ if (x2 < x1) sintheta = 0.0, costheta = 1.0, dtheta = 90.0 / ((double)segs), ox = x2, oy = y1; else sintheta = 0.0, costheta = -1.0, dtheta = -90.0 / ((double)segs), ox = x2, oy = y1; } else{ if (x2 < x1) sintheta = -1.0, costheta = 0.0, dtheta = -90.0 / ((double)segs), ox = x1, oy = y2; else sintheta = -1.0, costheta = 0.0, dtheta = 90.0 / ((double)segs), ox = x1, oy = y2; } x3 = x1; y3 = y1; sindtheta = sin(dtheta * (3.1415926535897932384626433 / 180.0)); cosdtheta = cos(dtheta * (3.1415926535897932384626433 / 180.0)); for (i = 1; i <= segs; i++) { prevx = x3; prevy = y3; newsintheta = (sintheta * cosdtheta) + (costheta * sindtheta); newcostheta = (costheta * cosdtheta) - (sintheta * sindtheta); sintheta = newsintheta; costheta = newcostheta; x3 = ox + (costheta * rx); y3 = oy + (sintheta * ry); /* adjust spline for better aesthetics: */ if (i == 1){ if (sense) y3 = (y3 + prevy) / 2.0; else x3 = (x3 + prevx) / 2.0;} else if (i == segs - 1){ if (sense) x3 = (x3 + x2) / 2.0; else y3 = (y2 + y3) / 2.0; } fprintf(plotfile, "%ld %ld\n", (long)(100.0 * (x3+0.5)), (long)(100.0 * (y3+0.5))); } if (head && tail) fprintf(plotfile," BSpl\nEnd\n\n"); /* changed for gcc */ /*fprintf(plotfile,"%ld BSpl\nEnd\n\n"); This is the original */ else if (tail) fprintf(plotfile," BSpl \nEnd\n\n"); /* changed for gcc */ /*fprintf(plotfile,"%ld BSpl\nEnd\n\n"); This is the original */ if (tail) idellipse(x2,y2), fprintf(plotfile,"\nEnd %%I eop\n\n"); break; case hp: plot(penup,x1,y1); if (sense){ if (x2 > x1) fprintf(plotfile,"PD;AA%ld,%ld,90,1;\n",(long)x1,(long)y2); else fprintf(plotfile,"PD;AA%ld,%ld,-90,1;\n",(long)x1,(long)y2); } else { if (x2 > x1) fprintf(plotfile,"PD;AA%ld,%ld,-90,1;\n",(long)x2,(long)y1); else fprintf(plotfile,"PD;AA%ld,%ld,90,1;\n",(long)x2,(long)y1); } plot(penup,x2,y2); fprintf(plotfile,"PD;PU;"); /* else fprintf(plotfile,"PD;AA%ld,%ld,90,1;\n",(long)x2,(int)y1); */ plot(penup,x2,y2); break; default: for (i = 1; i <= 2*segs; i++) { f = (double)i / (2*segs); g = (double)i / (2*segs); h = 1.0 - sqrt(1.0 - g * g); if (sense) { x3 = x1 * (1.0 - f) + x2 * f; y3 = y1 + (y2 - y1) * h; } else { x3 = x1 + (x2 - x1) * h; y3 = y1 * (1.0 - f) + y2 * f; } plot(pendown, x3, y3); } break; } } /* splyne */ void swoopspline(double x1, double y1, double x2, double y2, double x3, double y3, boolean sense, long segs) { splyne(x1,y1,x2,y2,sense,segs/4,true,false); splyne(x2,y2,x3,y3,(boolean)(!sense),segs/4,false,true); } /* swoopspline */ void curvespline(double x1, double y1, double x2, double y2, boolean sense, long segs) { splyne(x1,y1,x2,y2,sense,segs/2,true,true); } /* curvespline */ /*******************************************/ static void putshort(FILE *fp, int i) { int c, c1; c = ((unsigned int ) i) & 0xff; c1 = (((unsigned int) i)>>8) & 0xff; putc(c, fp); putc(c1,fp); } /* putshort */ /*******************************************/ static void putint(FILE *fp, int i) { int c, c1, c2, c3; c = ((unsigned int ) i) & 0xff; c1 = (((unsigned int) i)>>8) & 0xff; c2 = (((unsigned int) i)>>16) & 0xff; c3 = (((unsigned int) i)>>24) & 0xff; putc(c, fp); putc(c1,fp); putc(c2,fp); putc(c3,fp); } /* ptint */ void write_bmp_header (FILE *plotfile,int width,int height) { /* * write a 1-bit image header * */ byte r1[2],g1[2],b1[2] ; int i, bperlin; r1[0] = (long) 255; /* Black */ g1[0] = (long) 255; b1[0] = (long) 255; r1[1] = 0; g1[1] = 0; b1[1] = 0; bperlin = ((width + 31) / 32) * 4; /* # bytes written per line */ putc('B', plotfile); putc('M', plotfile); /* BMP file magic number */ /* compute filesize and write it */ i = 14 + /* size of bitmap file header */ 40 + /* size of bitmap info header */ 8 + /* size of colormap */ bperlin * height; /* size of image data */ putint(plotfile, i); putshort(plotfile, 0); /* reserved1 */ putshort(plotfile, 0); /* reserved2 */ putint(plotfile, 14 + 40 + 8); /* offset from BOfile to BObitmap */ putint(plotfile, 40); /* biSize: size of bitmap info header */ putint(plotfile, width); /* Width */ putint(plotfile, height); /* Height */ putshort(plotfile, 1); /* Planes: must be '1' */ putshort(plotfile, 1); /* BitCount: 1 */ putint(plotfile, 0); /* Compression: BI_RGB = 0 */ putint(plotfile, bperlin*height);/* SizeImage: size of raw image data */ putint(plotfile, 75 * 39); /* XPelsPerMeter: (75dpi * 39 in. per meter) */ putint(plotfile, 75 * 39); /* YPelsPerMeter: (75dpi * 39 in. per meter) */ putint(plotfile, 2); /* ClrUsed: # of colors used in cmap */ putint(plotfile, 2); /* ClrImportant: same as above */ /* write out the colormap */ for (i = 0 ; i < 2 ; i++) { putc(b1[i],plotfile); putc(g1[i],plotfile); putc(r1[i],plotfile); putc(0, plotfile); } } /* write_bmp_header */ void reverse_bits (byte *full_pic, int location) { /* Reverse all the bits at location */ int i, loop_end ; byte orig, reversed; /* initialize...*/ orig = full_pic[location] ; reversed = (byte) '\0'; loop_end = sizeof (byte) * 8 ; if (orig == (byte) '\0') { /* No need to do anything for 0 bytes, */ return ; } else { for (i = 0 ; i < loop_end ; i++) { reversed = (reversed << 1) | (orig & 1) ; orig >>= 1 ; } full_pic[location] = reversed ; } } /* reverse_bits */ void turn_rows (byte *full_pic, int padded_width, int height) { int i, j; int midpoint = padded_width / 2 ; byte temp ; /* For the swap call */ for (j = 0 ; j < height ; j++) { for (i = 0 ; i < midpoint ; i++) { reverse_bits (full_pic, (j * padded_width) + i); reverse_bits (full_pic, (j * padded_width) + (padded_width - i)); swap_m (full_pic[(j * padded_width) + i], full_pic[(j * padded_width) + (padded_width - i)]) ; } /* Then do the midpoint */ reverse_bits (full_pic, (j * padded_width) + midpoint); } } /* turn_rows */ void translate_stripe_to_bmp(striptype *stripe, byte *full_pic, int increment, int width, int div, int *total_bytes) { int padded_width, i, j, offset, pad_size, total_stripes, last_stripe_offset, truncated_stripe_height ; if (div == 0) /* For some reason this is called once without valid data */ return ; else if (div == DEFAULT_STRIPE_HEIGHT) { /* For a non-last-stripe, figure out if the last stripe is going to be shorter than the others, to know how far from the bottom things should be offset. */ truncated_stripe_height = (int) ysize % DEFAULT_STRIPE_HEIGHT; if (truncated_stripe_height != 0) /* The last stripe isn't default height */ last_stripe_offset = DEFAULT_STRIPE_HEIGHT - ((int) ysize % DEFAULT_STRIPE_HEIGHT) ; else /* Stripes are all default height */ last_stripe_offset = 0 ; } else { /* For the last stripe, */ last_stripe_offset = 0 ; } total_stripes = (int) ceil (ysize / (double) DEFAULT_STRIPE_HEIGHT); /* width, padded to be a multiple of 32 bits, or 4 bytes */ padded_width = ((width + 3)/4) * 4; pad_size = padded_width - width; /* Include pad_size here, as it'll be turned horizontally later */ offset = ((total_stripes - increment) * (padded_width * DEFAULT_STRIPE_HEIGHT)) - (padded_width * last_stripe_offset) + pad_size ; for (j = div; j >= 0; j--) { for (i = 0; i < width; i++) { full_pic[offset + (((div-j) * padded_width) + (width-i))] = (byte) (*stripe)[j][i]; (*total_bytes)++ ; } /* Take into account the padding */ (*total_bytes) += pad_size ; } } /* translate_stripe_to_bmp */ void write_full_pic(byte *full_pic, int total_bytes) { int i ; for (i = 0; i < total_bytes; i++) { putc (full_pic[i], plotfile); } } /* write_full_pic */ void makebox_no_interaction(char *fn, double *xo, double *yo, double *scale, long ntips) /* fn--fontname xo,yo--x and y offsets */ { /* draw the box on screen which represents plotting area. */ long xpag,ypag,i,j; oldpenchange = penchange; oldxsize = xsize; oldysize = ysize; oldxunitspercm = xunitspercm; oldyunitspercm = yunitspercm; oldxcorner = xcorner; oldycorner = ycorner; oldplotter = plotter; plotrparms(ntips); xcorner += 0.05 * xsize; ycorner += 0.05 * ysize; xsize *= 0.9; ysize *= 0.9; (*scale) = ysize / oldysize; if (xsize / oldxsize < (*scale)) (*scale) = xsize / oldxsize; (*xo) = (xcorner + (xsize - oldxsize * (*scale)) / 2.0) / (*scale); (*yo) = (ycorner + (ysize - oldysize * (*scale)) / 2.0) / (*scale); xscale = (*scale) * xunitspercm; yscale = (*scale) * yunitspercm; initplotter(ntips,fn); plot(penup, xscale * (*xo), yscale * (*yo)); plot(pendown, xscale * (*xo), yscale * ((*yo) + oldysize)); plot(pendown, xscale * ((*xo) + oldxsize), yscale * ((*yo) + oldysize)); plot(pendown, xscale * ((*xo) + oldxsize), yscale * (*yo)); plot(pendown, xscale * (*xo), yscale * (*yo)); /* we've done the extent, now draw the dividing lines: */ xpag = (int)((pagex-hpmargin-0.01)/(paperx - hpmargin))+1; ypag = (int)((pagey-vpmargin-0.01)/(papery - vpmargin))+1; for (i=0;i\n"); fprintf(plotfile, "#declare C_White_trans = color rgbt<1, 1, 1, 0.7>\n"); fprintf(plotfile, "#declare C_Red = color rgb<1, 0, 0>\n"); fprintf(plotfile, "#declare C_Yellow = color rgb<1, 1, 0>\n"); fprintf(plotfile, "#declare C_Green = color rgb<0, 1, 0>\n"); fprintf(plotfile, "#declare C_Black = color rgb<0, 0, 0>\n"); fprintf(plotfile, "#declare C_Blue = color rgb<0, 0, 1>\n"); fprintf(plotfile, "\n// Declare the textures\n\n"); fprintf(plotfile, "#declare T_White = texture { pigment { C_White }}\n"); fprintf(plotfile, "#declare T_White_trans = texture { pigment { C_White_trans }}\n"); fprintf(plotfile, "#declare T_Red = texture { pigment { C_Red }\n"); fprintf(plotfile, "\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "#declare T_Red_trans = texture { pigment { C_Red filter 0.7 }\n"); fprintf(plotfile, "\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "#declare T_Green = texture { pigment { C_Green }\n"); fprintf(plotfile, "\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "#declare T_Green_trans = texture { \n"); fprintf(plotfile, "\tpigment { C_Green filter 0.7 }\n"); fprintf(plotfile, "\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "#declare T_Blue = texture { pigment { C_Blue }\n"); fprintf(plotfile, "\tfinish { phong 1 phong_size 100 }}\n"); fprintf(plotfile, "#background { color rgb<1, 1, 1> }\n"); } /* void_func */ /* added for vrml - danieyek 981111 */ /* Returned angle in radian */ /* A related function is "double angleBetVectors(Xu, Yu, Xv, Yv)" in drawtree.c */ double computeAngle(double oldx, double oldy, double newx, double newy) { double angle; if ((newx-oldx) == 0 ) { /* pi/2 or -pi/2! */ if (newy > oldy) angle = pie/2; else if (newy < oldy) angle = -pie/2; else { /* added - danieyek 990130 */ /* newx = oldx; newy = oldy; one point on top of the other! If new and old correspond to 2 points, changes are that the 2 coordinates are not identical under double precision value. */ fprintf(stderr, "ERROR: Angle can't be computed, 2 points on top of each other in computeAngle()!\n"); angle = 0; } } else { angle = atan( (newy-oldy)/(newx-oldx) ); if (newy >= oldy && newx >= oldx) { /* First quardrant - no adjustment */ } else if (newx <= oldx) { /* Second (angle = negative) and third (angle = positive) quardrant */ angle = pie + angle; } else if (newy <= oldy && newx >= oldx) { /* Fourth quardrant; "angle" is negative! */ angle = 2*pie + angle; } else { /* Should never get here. */ fprintf(stderr, "ERROR: Programming error in computeAngle()!\n"); } } return angle; } /* computeAngle */ #ifdef WIN32 #include /********************* Prototypes ***********************/ LRESULT WINAPI MainWndProc( HWND, UINT, WPARAM, LPARAM ); LRESULT WINAPI AboutDlgProc( HWND, UINT, WPARAM, LPARAM ); /******************* Global Variables ********************/ extern void winplotpreviewcore(); HANDLE ghInstance; HPEN hPenTree, hPenLabel, hPenBackground, hPenOld; /********************************************************************\ * Comments: Register window class, create and display the main * * window, and enter message loop. * \********************************************************************/ winplotpreview() { WNDCLASS wc; MSG msg; HWND hWnd; int screenXres, screenYres, winXres, winYres; winaction = quitnow; wc.lpszClassName = "GenericAppClass"; wc.lpfnWndProc = MainWndProc; wc.style = CS_OWNDC | CS_VREDRAW | CS_HREDRAW; wc.hInstance = NULL; wc.hIcon = LoadIcon( NULL, IDI_APPLICATION ); wc.hCursor = LoadCursor( NULL, IDC_ARROW ); wc.hbrBackground = (HBRUSH)( COLOR_WINDOW+1 ); wc.lpszMenuName = "GenericAppMenu"; wc.cbClsExtra = 0; wc.cbWndExtra = 0; RegisterClass( &wc ); ghInstance = NULL; screenXres = GetSystemMetrics(SM_CXSCREEN); winXres = (int)((float)(screenXres)*XWINPERCENT); screenYres = GetSystemMetrics(SM_CYSCREEN); winYres = (int)((float)(screenYres)*YWINPERCENT); hWnd = CreateWindow( "GenericAppClass", "Tree Preview", WS_OVERLAPPEDWINDOW, 0, 0, winXres, winYres, NULL, NULL, NULL, NULL ); ShowWindow( hWnd, SW_SHOWNORMAL ); while( GetMessage( &msg, NULL, 0, 0 ) ) { TranslateMessage( &msg ); DispatchMessage( &msg ); } return msg.wParam; } /********************* * * * * Comments: The following messages are processed * * * * WM_PAINT * * WM_COMMAND * * WM_DESTROY * * * * * \********************************************************************/ LRESULT CALLBACK MainWndProc( HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam ) { PAINTSTRUCT ps; LOGBRUSH lb; HBRUSH bgbrush; RECT lpRect; int windowwidth, windowheight; switch( msg ) { /**************************************************************\ * WM_ACTIVATE: * \**************************************************************/ case WM_ACTIVATE: if (wParam != WA_INACTIVE) BringWindowToTop(hWnd); break; /**************************************************************\ * WM_PAINT: * \**************************************************************/ case WM_PAINT: hdc = BeginPaint( hWnd, &ps ); /* Initialize the pen's brush. */ lb.lbStyle = BS_SOLID; lb.lbColor = RGB(0,0,0); lb.lbHatch = 0; /* 2 pixel pen for the tree */ hPenTree = ExtCreatePen(PS_GEOMETRIC | PS_SOLID | PS_ENDCAP_ROUND, (DWORD)2, &lb, 0, NULL); /* 1 pixel pen for labels */ hPenLabel = ExtCreatePen(PS_GEOMETRIC | PS_SOLID | PS_ENDCAP_ROUND, (DWORD)1, &lb, 0, NULL); /* light blue pen for outline of background rectangle */ lb.lbColor = RGB(204,255,255); hPenBackground = ExtCreatePen(PS_GEOMETRIC | PS_SOLID, (DWORD)1, &lb, 0, NULL); /* light blue brush for interior of background rectangle */ bgbrush = CreateSolidBrush(RGB(204,255,255)); /* GetClientRect returns the size of that part of the window that is actually ours to draw in. */ GetClientRect(hWnd, &lpRect); windowwidth = lpRect.right; windowheight = lpRect.bottom; /* select background pen and brush */ SelectObject(hdc, hPenBackground); SelectObject(hdc, bgbrush); /* fill background */ Rectangle(hdc, 0, 0, windowwidth, windowheight); /* select tree pen */ hPenOld = SelectObject(hdc, hPenTree); /* winplotpreviewcore calls makebox, plottree, plotlabels and finishplotter */ winplotpreviewcore(windowwidth, windowheight); /* delete pens to recover memory */ DeleteObject(hPenTree); DeleteObject(hPenLabel); DeleteObject(hPenBackground); DeleteObject(bgbrush); EndPaint( hWnd, &ps ); break; /**************************************************************\ * WM_COMMAND: * \**************************************************************/ case WM_COMMAND: switch( wParam ) { case IDM_ABOUT: DialogBox( ghInstance, "AboutDlg", hWnd, (DLGPROC) AboutDlgProc ); break; case IDM_PLOT: // "Plot" menu item winaction = plotnow; DestroyWindow(hWnd); break; case IDM_CHANGE: // "Change Parameters" menu item winaction = changeparms; DestroyWindow(hWnd); break; case IDM_QUIT: // "Quit" menu item winaction = quitnow; DestroyWindow(hWnd); break; } break; /**************************************************************\ * WM_DESTROY: PostQuitMessage() is called * \**************************************************************/ case WM_DESTROY: PostQuitMessage( 0 ); break; /**************************************************************\ * Let the default window proc handle all other messages * \**************************************************************/ default: return( DefWindowProc( hWnd, msg, wParam, lParam )); } return 0; } /********************************************************************\ * Function: LRESULT CALLBACK AboutDlgProc(HWND, UINT, WPARAM, LPARAM)* * * * Purpose: Processes "About" Dialog Box Messages * * * * Comments: The About dialog box is displayed when the user clicks * * About from the Help menu. * * * \********************************************************************/ LRESULT CALLBACK AboutDlgProc( HWND hDlg, UINT uMsg, WPARAM wParam, LPARAM lParam ) { switch( uMsg ) { case WM_INITDIALOG: return TRUE; case WM_COMMAND: switch( wParam ) { case IDOK: EndDialog( hDlg, TRUE ); return TRUE; } break; } return FALSE; } #endif PHYLIPNEW-3.69.650/src/disc.c0000664000175000017500000006002311253743724012154 00000000000000#include "phylip.h" #include "disc.h" AjPPhyloState* phylostates; /* version 3.6. (c) Copyright 1993-2002 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ long chars, nonodes, nextree, which; /* nonodes = number of nodes in tree * * chars = number of binary characters * * words = number of words needed to represent characters of one organism */ steptr weight, extras; boolean printdata; void disc_inputdata(AjPPhyloState state, pointptr treenode,boolean dollo,boolean printdata, FILE *outfile) { /* input the names and character state data for species */ /* used in Dollop, Dolpenny, Dolmove, & Move */ long i, j, l; char k; Char charstate; /* possible states are '0', '1', 'P', 'B', and '?' */ if (printdata) headings(chars, "Characters", "----------"); for (i = 0; i < (chars); i++) extras[i] = 0; for (i = 1; i <= spp; i++) { initnamestate(state, i-1); if (printdata) { for (j = 0; j < nmlngth; j++) putc(nayme[i - 1][j], outfile); fprintf(outfile, " "); } for (j = 0; j < (words); j++) { treenode[i - 1]->stateone[j] = 0; treenode[i - 1]->statezero[j] = 0; } for (j = 1; j <= (chars); j++) { k = (j - 1) % bits + 1; l = (j - 1) / bits + 1; charstate = ajStrGetCharPos(state->Str[i-1], j-1); if (charstate == 'b') charstate = 'B'; if (charstate == 'p') charstate = 'P'; if (charstate != '0' && charstate != '1' && charstate != '?' && charstate != 'P' && charstate != 'B') { printf("\n\nERROR: Bad character state: %c ",charstate); printf("at character %ld of species %ld\n\n", j, i); exxit(-1); } if (printdata) { newline(outfile, j, 55, nmlngth + 3); putc(charstate, outfile); if (j % 5 == 0) putc(' ', outfile); } if (charstate == '1') treenode[i - 1]->stateone[l - 1] = ((long)treenode[i - 1]->stateone[l - 1]) | (1L << k); if (charstate == '0') treenode[i - 1]->statezero[l - 1] = ((long)treenode[i - 1]->statezero[l - 1]) | (1L << k); if (charstate == 'P' || charstate == 'B') { if (dollo) extras[j - 1] += weight[j - 1]; else { treenode[i - 1]->stateone[l - 1] = ((long)treenode[i - 1]->stateone[l - 1]) | (1L << k); treenode[i - 1]->statezero[l - 1] = ((long)treenode[i - 1]->statezero[l - 1]) | (1L << k); } } } if (printdata) putc('\n', outfile); } if (printdata) fprintf(outfile, "\n\n"); } /* inputdata */ void disc_inputdata2(AjPPhyloState state, pointptr2 treenode) { /* input the names and character state data for species */ /* used in Mix & Penny */ long i, j, l; char k; Char charstate; AjPStr str; /* possible states are '0', '1', 'P', 'B', and '?' */ if (printdata) headings(chars, "Characters", "----------"); for (i = 0; i < (chars); i++) extras[i] = 0; for (i = 1; i <= spp; i++) { str = state->Str[i-1]; initnamestate(state,i-1); if (printdata) { for (j = 0; j < nmlngth; j++) putc(nayme[i - 1][j], outfile); } fprintf(outfile, " "); for (j = 0; j < (words); j++) { treenode[i - 1]->fulstte1[j] = 0; treenode[i - 1]->fulstte0[j] = 0; treenode[i - 1]->empstte1[j] = 0; treenode[i - 1]->empstte0[j] = 0; } for (j = 1; j <= (chars); j++) { k = (j - 1) % bits + 1; l = (j - 1) / bits + 1; charstate = ajStrGetCharPos(str, j-1); if (charstate == 'b') charstate = 'B'; if (charstate == 'p') charstate = 'P'; if (charstate != '0' && charstate != '1' && charstate != '?' && charstate != 'P' && charstate != 'B') { printf("\n\nERROR: Bad character state: %c ",charstate); printf("at character %ld of species %ld\n\n", j, i); exxit(-1); } if (printdata) { newline(outfile, j, 55, nmlngth + 3); putc(charstate, outfile); if (j % 5 == 0) putc(' ', outfile); } if (charstate == '1') { treenode[i-1]->fulstte1[l-1] = ((long)treenode[i-1]->fulstte1[l-1]) | (1L << k); treenode[i-1]->empstte1[l-1] = treenode[i-1]->fulstte1[l-1]; } if (charstate == '0') { treenode[i-1]->fulstte0[l-1] = ((long)treenode[i-1]->fulstte0[l-1]) | (1L << k); treenode[i-1]->empstte0[l-1] = treenode[i-1]->fulstte0[l-1]; } if (charstate == 'P' || charstate == 'B') extras[j-1] += weight[j-1]; } if (printdata) putc('\n', outfile); } fprintf(outfile, "\n\n"); } /* inputdata2 */ void alloctree(pointptr *treenode) { /* allocate tree nodes dynamically */ /* used in dollop, dolmove, dolpenny, & move */ long i, j; node *p, *q; (*treenode) = (pointptr)Malloc(nonodes*sizeof(node *)); for (i = 0; i < (spp); i++) { (*treenode)[i] = (node *)Malloc(sizeof(node)); (*treenode)[i]->stateone = (bitptr)Malloc(words*sizeof(long)); (*treenode)[i]->statezero = (bitptr)Malloc(words*sizeof(long)); } for (i = spp; i < (nonodes); i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node *)Malloc(sizeof(node)); p->stateone = (bitptr)Malloc(words*sizeof(long)); p->statezero = (bitptr)Malloc(words*sizeof(long)); p->next = q; q = p; } p->next->next->next = p; (*treenode)[i] = p; } } /* alloctree */ void alloctree2(pointptr2 *treenode) { /* allocate tree nodes dynamically */ /* used in mix & penny */ long i, j; node2 *p, *q; (*treenode) = (pointptr2)Malloc(nonodes*sizeof(node2 *)); for (i = 0; i < (spp); i++) { (*treenode)[i] = (node2 *)Malloc(sizeof(node2)); (*treenode)[i]->fulstte1 = (bitptr)Malloc(words*sizeof(long)); (*treenode)[i]->fulstte0 = (bitptr)Malloc(words*sizeof(long)); (*treenode)[i]->empstte1 = (bitptr)Malloc(words*sizeof(long)); (*treenode)[i]->empstte0 = (bitptr)Malloc(words*sizeof(long)); (*treenode)[i]->fulsteps = (bitptr)Malloc(words*sizeof(long)); (*treenode)[i]->empsteps = (bitptr)Malloc(words*sizeof(long)); } for (i = spp; i < (nonodes); i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node2 *)Malloc(sizeof(node2)); p->fulstte1 = (bitptr)Malloc(words*sizeof(long)); p->fulstte0 = (bitptr)Malloc(words*sizeof(long)); p->empstte1 = (bitptr)Malloc(words*sizeof(long)); p->empstte0 = (bitptr)Malloc(words*sizeof(long)); p->fulsteps = (bitptr)Malloc(words*sizeof(long)); p->empsteps = (bitptr)Malloc(words*sizeof(long)); p->next = q; q = p; } p->next->next->next = p; (*treenode)[i] = p; } } /* alloctree2 */ void setuptree(pointptr treenode) { /* initialize tree nodes */ /* used in dollop, dolmove, dolpenny, & move */ long i; node *p; for (i = 1; i <= (nonodes); i++) { treenode[i-1]->back = NULL; treenode[i-1]->tip = (i <= spp); treenode[i-1]->index = i; if (i > spp) { p = treenode[i-1]->next; while (p != treenode[i-1]) { p->back = NULL; p->tip = false; p->index = i; p = p->next; } } } } /* setuptree */ void setuptree2(pointptr2 treenode) { /* initialize tree nodes */ /* used in mix & penny */ long i; node2 *p; for (i = 1; i <= (nonodes); i++) { treenode[i-1]->back = NULL; treenode[i-1]->tip = (i <= spp); treenode[i-1]->index = i; if (i > spp) { p = treenode[i-1]->next; while (p != treenode[i-1]) { p->back = NULL; p->tip = false; p->index = i; p = p->next; } } } } /* setuptree2 */ void inputancestorsstr(AjPStr propstr, boolean *anczero0, boolean *ancone0) { /* reads the ancestral states for each character */ /* used in dollop, dolmove, dolpenny, mix, move, & penny */ long i; Char ch; for (i = 0; i < (chars); i++) { anczero0[i] = true; ancone0[i] = true; ch = ajStrGetCharPos(propstr, i); if (ch == 'p') ch = 'P'; if (ch == 'b') ch = 'B'; if (strchr("10PB?",ch) != NULL){ anczero0[i] = (ch == '1') ? false : anczero0[i]; ancone0[i] = (ch == '0') ? false : ancone0[i]; } else { ajErr("bad ancestor state: %c at character %4ld\n", ch, i + 1); exxit(-1); } } } /* inputancestorsprop */ void printancestors(FILE *filename, boolean *anczero, boolean *ancone) { /* print out list of ancestral states */ /* used in dollop, dolmove, dolpenny, mix, move, & penny */ long i; fprintf(filename, " Ancestral states:\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', filename); for (i = 1; i <= (chars); i++) { newline(filename, i, 55, nmlngth + 3); if (ancone[i-1] && anczero[i-1]) putc('?', filename); else if (ancone[i-1]) putc('1', filename); else putc('0', filename); if (i % 5 == 0) putc(' ', filename); } fprintf(filename, "\n\n"); } /* printancestor */ void add(node *below, node *newtip, node *newfork, node **root, pointptr treenode) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant. The global variable root is also updated */ /* used in dollop & dolpenny */ if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; below->back = newfork->next->next; newfork->next->next->back = below; newfork->next->back = newtip; newtip->back = newfork->next; if (*root == below) *root = newfork; } /* add */ void add2(node *below, node *newtip, node *newfork, node **root, boolean restoring, boolean wasleft, pointptr treenode) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant */ /* used in move & dolmove */ boolean putleft; node *leftdesc, *rtdesc; if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; putleft = true; if (restoring) putleft = wasleft; if (putleft) { leftdesc = newtip; rtdesc = below; } else { leftdesc = below; rtdesc = newtip; } rtdesc->back = newfork->next->next; newfork->next->next->back = rtdesc; newfork->next->back = leftdesc; leftdesc->back = newfork->next; if (*root == below) *root = newfork; (*root)->back = NULL; } /* add2 */ void add3(node2 *below, node2 *newtip, node2 *newfork, node2 **root, pointptr2 treenode) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant. The global variable root is also updated */ /* used in mix & penny */ node2 *p; if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; below->back = newfork->next->next; newfork->next->next->back = below; newfork->next->back = newtip; newtip->back = newfork->next; if (*root == below) *root = newfork; (*root)->back = NULL; p = newfork; do { p->visited = false; p = p->back; if (p != NULL) p = treenode[p->index - 1]; } while (p != NULL); } /* add3 */ void re_move(node **item, node **fork, node **root, pointptr treenode) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork. The global variable root is also updated */ /* used in dollop & dolpenny */ node *p, *q; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = treenode[(*item)->back->index - 1]; if (*root == *fork) { if (*item == (*fork)->next->back) *root = (*fork)->next->next->back; else *root = (*fork)->next->back; } p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; } /* re_move */ void re_move2(node **item, node **fork, node **root, boolean *wasleft, pointptr treenode) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork */ /* used in move & dolmove */ node *p, *q; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = treenode[(*item)->back->index - 1]; if (*item == (*fork)->next->back) { if (*root == *fork) *root = (*fork)->next->next->back; (*wasleft) = true; } else { if (*root == *fork) *root = (*fork)->next->back; (*wasleft) = false; } p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; } /* re_move2 */ void re_move3(node2 **item, node2 **fork, node2 **root, pointptr2 treenode) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork. The global variable *root is also updated */ /* used in mix & penny */ node2 *p, *q; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = treenode[(*item)->back->index - 1]; if (*root == *fork) { if (*item == (*fork)->next->back) *root = (*fork)->next->next->back; else *root = (*fork)->next->back; } p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; q = (*fork)->back; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } (*item)->back = NULL; if (q != NULL) q = treenode[q->index - 1]; while (q != NULL) { q-> visited = false; q = q->back; if (q != NULL) q = treenode[q->index - 1]; } } /* re_move3 */ void coordinates(node *p, long *tipy, double f, long *fartemp) { /* establishes coordinates of nodes */ /* used in dollop, dolpenny, dolmove, & move */ node *q, *first, *last; if (p->tip) { p->xcoord = 0; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; *tipy += down; return; } q = p->next; do { coordinates(q->back, tipy, f, fartemp); q = q->next; } while (p != q); first = p->next->back; q = p->next; while (q->next != p) q = q->next; last = q->back; p->xcoord = (last->ymax - first->ymin) * f; p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; if (p->xcoord > *fartemp) *fartemp = p->xcoord; } /* coordinates */ void coordinates2(node2 *p, long *tipy) { /* establishes coordinates2 of nodes */ node2 *q, *first, *last; if (p->tip) { p->xcoord = 0; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; return; } q = p->next; do { coordinates2(q->back, tipy); q = q->next; } while (p != q); first = p->next->back; q = p->next; while (q->next != p) q = q->next; last = q->back; p->xcoord = last->ymax - first->ymin; p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* coordinates2 */ void treeout(node *p, long nextree, long *col, node *root) { /* write out file with representation of final tree */ /* used in dollop, dolmove, dolpenny, & move */ long i, n; Char c; node *q; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { q = p->next; putc('(', outtree); (*col)++; while (q != p) { treeout(q->back, nextree, col, root); q = q->next; if (q == p) break; putc(',', outtree); (*col)++; if (*col > 65) { putc('\n', outtree); *col = 0; } } putc(')', outtree); (*col)++; } if (p != root) return; if (nextree > 2) fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); else fprintf(outtree, ";\n"); } /* treeout */ void treeout2(node2 *p, long *col, node2 *root) { /* write out file with representation of final tree */ /* used in mix & penny */ long i, n; Char c; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { putc('(', outtree); (*col)++; treeout2(p->next->back, col, root); putc(',', outtree); (*col)++; if (*col > 65) { putc('\n', outtree); *col = 0; } treeout2(p->next->next->back, col, root); putc(')', outtree); (*col)++; } if (p != root) return; if (nextree > 2) fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); else fprintf(outtree, ";\n"); } /* treeout2 */ void standev(long numtrees, long minwhich, double minsteps, double *nsteps, double **fsteps, longer seed) { /* compute and write standard deviation of user trees */ /* used in pars */ long i, j, k; double wt, sumw, sum, sum2, sd; double temp; double **covar, *P, *f; #define SAMPLES 1000 /* ????? if numtrees too big for Shimo, truncate */ if (numtrees > maxuser) { printf("TOO MANY USER-DEFINED TREES"); printf(" test only performed in the first %ld of them\n", (long)maxuser); } else if (numtrees == 2) { fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); fprintf(outfile, "Tree Steps Diff Steps Its S.D."); fprintf(outfile, " Significantly worse?\n\n"); which = 1; while (which <= numtrees) { fprintf(outfile, "%3ld%10.1f", which, nsteps[which - 1]); if (minwhich == which) fprintf(outfile, " <------ best\n"); else { sumw = 0.0; sum = 0.0; sum2 = 0.0; for (i = 0; i < chars; i++) { if (weight[i] > 0) { wt = weight[i]; sumw += wt; temp = (fsteps[which - 1][i] - fsteps[minwhich - 1][i]); sum += temp; sum2 += temp * temp / wt; } } temp = sum / sumw; sd = sqrt(sumw / (sumw - 1.0) * (sum2 - temp * temp)); fprintf(outfile, "%10.1f%12.4f", (nsteps[which - 1] - minsteps) / 10, sd); if (sum > 1.95996 * sd) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } which++; } fprintf(outfile, "\n\n"); } else { /* Shimodaira-Hasegawa test using normal approximation */ if(numtrees > MAXSHIMOTREES){ fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" , MAXSHIMOTREES, numtrees); numtrees = MAXSHIMOTREES; } else { fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); } covar = (double **)Malloc(numtrees*sizeof(double *)); for (i = 0; i < numtrees; i++) covar[i] = (double *)Malloc(numtrees*sizeof(double)); sumw = 0.0; for (i = 0; i < chars; i++) sumw += weight[i]; for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ sum = nsteps[i]/sumw; for (j = 0; j <=i; j++) { sum2 = nsteps[j]/sumw; temp = 0.0; for (k = 0; k < chars; k++) { if (weight[k] > 0) temp = temp + weight[k]*(fsteps[i][k]-sum) *(fsteps[j][k]-sum2); } covar[i][j] = temp; if (i != j) covar[j][i] = temp; } } for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition of trees x trees covariance matrix */ sum = 0.0; for (j = 0; j <= i-1; j++) sum = sum + covar[i][j] * covar[i][j]; if (covar[i][i]-sum <= 0.0) temp = 0.0; else temp = sqrt(covar[i][i] - sum); covar[i][i] = temp; for (j = i+1; j < numtrees; j++) { sum = 0.0; for (k = 0; k < i; k++) sum = sum + covar[i][k] * covar[j][k]; if (fabs(temp) < 1.0E-12) covar[j][i] = 0.0; else covar[j][i] = (covar[j][i] - sum)/temp; } } f = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ for (i = 0; i < numtrees; i++) P[i] = 0.0; sum2 = nsteps[0]; /* sum2 will be smallest # of steps */ for (i = 1; i < numtrees; i++) if (sum2 > nsteps[i]) sum2 = nsteps[i]; for (i = 1; i < SAMPLES; i++) { /* loop over resampled trees */ for (j = 0; j < numtrees; j++) { /* draw vectors */ sum = 0.0; for (k = 0; k <= j; k++) sum += normrand(seed)*covar[j][k]; f[j] = sum; } sum = f[1]; for (j = 1; j < numtrees; j++) /* get min of vector */ if (f[j] < sum) sum = f[j]; for (j = 0; j < numtrees; j++) /* accumulate P's */ if (nsteps[j]-sum2 <= f[j] - sum) P[j] += 1.0/SAMPLES; } fprintf(outfile, "Tree Steps Diff Steps P value"); fprintf(outfile, " Significantly worse?\n\n"); for (i = 0; i < numtrees; i++) { fprintf(outfile, "%3ld%10.1f", i+1, nsteps[i]); if ((minwhich-1) == i) fprintf(outfile, " <------ best\n"); else { fprintf(outfile, " %9.1f %10.3f", nsteps[i]-sum2, P[i]); if (P[i] < 0.05) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } } fprintf(outfile, "\n"); free(P); /* free the variables we Malloc'ed */ free(f); for (i = 0; i < numtrees; i++) free(covar[i]); free(covar); } } /* standev */ void guesstates(Char *guess) { /* write best guesses of ancestral states */ /* used in dollop, dolpenny, mix, & penny */ long i, j; fprintf(outfile, "best guesses of ancestral states:\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%2ld", i); fprintf(outfile, "\n *--------------------\n"); for (i = 0; i <= (chars / 10); i++) { fprintf(outfile, "%5ld!", i * 10); for (j = 0; j <= 9; j++) { if (i * 10 + j == 0 || i * 10 + j > chars) fprintf(outfile, " "); else fprintf(outfile, " %c", guess[i * 10 + j - 1]); } putc('\n', outfile); } putc('\n', outfile); } /* guesstates */ void freegarbage(gbit **garbage) { /* used in dollop, dolpenny, mix, & penny */ gbit *p; while (*garbage) { p = *garbage; *garbage = (*garbage)->next; free(p->bits_); free(p); } } /* freegarbage */ void disc_gnu(gbit **p, gbit **grbg) { /* this is a do-it-yourself garbage collectors for move Make a new node or pull one off the garbage list */ if (*grbg != NULL) { *p = *grbg; *grbg = (*grbg)->next; } else { *p = (gbit *)Malloc(sizeof(gbit)); (*p)->bits_ = (bitptr)Malloc(words*sizeof(long)); } (*p)->next = NULL; } /* disc_gnu */ void disc_chuck(gbit *p, gbit **grbg) { /* collect garbage on p -- put it on front of garbage list */ p->next = *grbg; *grbg = p; } /* disc_chuck */ PHYLIPNEW-3.69.650/src/contrast.c0000664000175000017500000006067311305225544013073 00000000000000/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include "phylip.h" #include "cont.h" AjPPhyloFreq phylofreq; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void getdata(void); void allocrest(void); void doinit(void); void contwithin(void); void contbetween(node *, node *); void nuview(node *); void makecontrasts(node *); void writecontrasts(void); void regressions(void); double logdet(double **); void invert(double **); void initcovars(boolean); double normdiff(boolean); void matcopy(double **, double **); void newcovars(boolean); void printcovariances(boolean); void emiterate(boolean); void initcontrastnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void maketree(void); /* function prototypes */ #endif const char* outfilename; AjPFile embossoutfile; long nonodes, chars, numtrees; long *sample, contnum; phenotype3 **x, **cntrast, *ssqcont; double **vara, **vare, **oldvara, **oldvare, **Bax, **Bex, **temp1, **temp2, **temp3; double logL, logLvara, logLnovara; boolean nophylo, printdata, progress, reg, mulsets, varywithin, writecont, bifurcating; Char ch; long contno; node *grbg; /* Local variables for maketree, propagated globally for c version: */ tree curtree; /* Variables declared just to make treeread happy */ boolean haslengths, goteof, first; double trweight; void emboss_getoptions(char *pgm, int argc, char *argv[]) { mulsets = false; nophylo = false; printdata = false; progress = true; varywithin = false; writecont = false; reg = true; embInitPV(pgm, argc, argv,"PHYLIPNEW",VERSION); phylofreq = ajAcdGetFrequencies("infile"); phylotrees = ajAcdGetTree("intreefile"); numtrees = 0; while (phylotrees[numtrees]) numtrees++; varywithin = ajAcdGetBoolean("varywithin"); if(varywithin) nophylo = ajAcdGetBoolean("nophylo"); else { reg = ajAcdGetBoolean("reg"); writecont = ajAcdGetBoolean("writecont"); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void getdata() { /* read species data */ long i, j, k, l; long idata = 0; if (printdata) { fprintf(outfile, "\nContinuous character contrasts analysis, version %s\n\n",VERSION); fprintf(outfile, "%4ld Populations, %4ld Characters\n\n", spp, chars); fprintf(outfile, "Name"); fprintf(outfile, " Phenotypes\n"); fprintf(outfile, "----"); fprintf(outfile, " ----------\n\n"); } x = (phenotype3 **)Malloc((long)spp*sizeof(phenotype3 *)); cntrast = (phenotype3 **)Malloc((long)spp*sizeof(phenotype3 *)); ssqcont = (phenotype3 *)Malloc((long)spp*sizeof(phenotype3 *)); contnum = spp-1; for (i = 0; i < spp; i++) { initnamefreq(phylofreq, i); if (varywithin) { sample[i] = phylofreq->Individuals[i]; contnum += sample[i]-1; } else sample[i] = 1; if (printdata) for(j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); x[i] = (phenotype3 *)Malloc((long)sample[i]*sizeof(phenotype3)); cntrast[i] = (phenotype3 *)Malloc((long)(sample[i]*sizeof(phenotype3))); ssqcont[i] = (double *)Malloc((long)(sample[i]*sizeof(double))); for (k = 0; k <= sample[i]-1; k++) { x[i][k] = (phenotype3)Malloc((long)chars*sizeof(double)); cntrast[i][k] = (phenotype3)Malloc((long)chars*sizeof(double)); for (j = 1; j <= chars; j++) { x[i][k][j - 1] = phylofreq->Data[idata++]; if (printdata) { fprintf(outfile, "%10.5f", x[i][k][j - 1]); if (j % 6 == 0) { putc('\n', outfile); for (l = 1; l <= nmlngth; l++) putc(' ', outfile); } } } } if (printdata) putc('\n', outfile); } if (printdata) putc('\n', outfile); } /* getdata */ void allocrest() { long i; /* otherwise if individual variation, these are allocated in getdata */ sample = (long *)Malloc((long)spp*sizeof(long)); nayme = (naym *)Malloc((long)spp*sizeof(naym)); vara = (double **)Malloc((long)chars*sizeof(double *)); oldvara = (double **)Malloc((long)chars*sizeof(double *)); vare = (double **)Malloc((long)chars*sizeof(double *)); oldvare = (double **)Malloc((long)chars*sizeof(double *)); Bax = (double **)Malloc((long)chars*sizeof(double *)); Bex = (double **)Malloc((long)chars*sizeof(double *)); temp1 = (double **)Malloc((long)chars*sizeof(double *)); temp2 = (double **)Malloc((long)chars*sizeof(double *)); temp3 = (double **)Malloc((long)chars*sizeof(double *)); for (i = 0; i < chars; i++) { vara[i] = (double *)Malloc((long)chars*sizeof(double)); oldvara[i] = (double *)Malloc((long)chars*sizeof(double)); vare[i] = (double *)Malloc((long)chars*sizeof(double)); oldvare[i] = (double *)Malloc((long)chars*sizeof(double)); Bax[i] = (double *)Malloc((long)chars*sizeof(double)); Bex[i] = (double *)Malloc((long)chars*sizeof(double)); temp1[i] = (double *)Malloc((long)chars*sizeof(double)); temp2[i] = (double *)Malloc((long)chars*sizeof(double)); temp3[i] = (double *)Malloc((long)chars*sizeof(double)); } } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersfreq(phylofreq, &spp, &chars, &nonodes, 1); allocrest(); } /* doinit */ void contwithin() { /* compute the within-species contrasts, if any */ long i, j, k; double *sumphen; sumphen = (double *)Malloc((long)chars*sizeof(double)); for (i = 0; i <= spp-1 ; i++) { for (j = 0; j < chars; j++) sumphen[j] = 0.0; for (k = 0; k <= (sample[i]-1); k++) { for (j = 0; j < chars; j++) { if (k > 0) cntrast[i][k][j] = (sumphen[j] - k*x[i][k][j])/sqrt((double)(k*(k+1))); sumphen[j] += x[i][k][j]; if (k == (sample[i]-1)) curtree.nodep[i]->view[j] = sumphen[j]/sample[i]; x[i][0][j] = sumphen[j]/sample[i]; } if (k == 0) curtree.nodep[i]->ssq = 1.0/sample[i]; /* sum of squares for sp. i */ else ssqcont[i][k] = 1.0; /* if a within contrast */ } } free(sumphen); contno = 1; } /* contwithin */ void contbetween(node *p, node *q) { /* compute one contrast */ long i; double v1, v2; for (i = 0; i < chars; i++) cntrast[contno - 1][0][i] = (p->view[i] - q->view[i])/sqrt(p->ssq+q->ssq); v1 = q->v + q->deltav; if (p->back != q) v2 = p->v + p->deltav; else v2 = p->deltav; ssqcont[contno - 1][0] = (v1 + v2)/(p->ssq + q->ssq); /* this is really the variance of the contrast */ contno++; } /* contbetween */ void nuview(node *p) { /* renew information about subtrees */ long j; node *q, *r; double v1, v2, vtot, f1, f2; q = p->next->back; r = p->next->next->back; v1 = q->v + q->deltav; v2 = r->v + r->deltav; vtot = v1 + v2; if (vtot > 0.0) f1 = v2 / vtot; else f1 = 0.5; f2 = 1.0 - f1; for (j = 0; j < chars; j++) p->view[j] = f1 * q->view[j] + f2 * r->view[j]; p->deltav = v1 * f1; p->ssq = f1*f1*q->ssq + f2*f2*r->ssq; } /* nuview */ void makecontrasts(node *p) { /* compute the contrasts, recursively */ if (p->tip) return; makecontrasts(p->next->back); makecontrasts(p->next->next->back); nuview(p); contbetween(p->next->back, p->next->next->back); } /* makecontrasts */ void writecontrasts() { /* write out the contrasts */ long i, j; if (printdata || reg) { fprintf(outfile, "\nContrasts (columns are different characters)\n"); fprintf(outfile, "--------- -------- --- --------- -----------\n\n"); } for (i = 0; i <= contno - 2; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.5f", cntrast[i][0][j]/sqrt(ssqcont[i][0])); putc('\n', outfile); } } /* writecontrasts */ void regressions() { /* compute regressions and correlations among contrasts */ long i, j, k; double **sumprod; sumprod = (double **)Malloc((long)chars*sizeof(double *)); for (i = 0; i < chars; i++) { sumprod[i] = (double *)Malloc((long)chars*sizeof(double)); for (j = 0; j < chars; j++) sumprod[i][j] = 0.0; } for (i = 0; i <= contno - 2; i++) { for (j = 0; j < chars; j++) { for (k = 0; k < chars; k++) sumprod[j][k] += cntrast[i][0][j] * cntrast[i][0][k] / ssqcont[i][0]; } } fprintf(outfile, "\nCovariance matrix\n"); fprintf(outfile, "---------- ------\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) sumprod[i][j] /= contno - 1; } for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", sumprod[i][j]); putc('\n', outfile); } fprintf(outfile, "\nRegressions (columns on rows)\n"); fprintf(outfile, "----------- -------- -- -----\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", sumprod[i][j] / sumprod[i][i]); putc('\n', outfile); } fprintf(outfile, "\nCorrelations\n"); fprintf(outfile, "------------\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", sumprod[i][j] / sqrt(sumprod[i][i] * sumprod[j][j])); putc('\n', outfile); } for (i = 0; i < chars; i++) free(sumprod[i]); free(sumprod); } /* regressions */ double logdet(double **a) { /* Gauss-Jordan log determinant calculation. in place, overwriting previous contents of a. On exit, matrix a contains the inverse. Works only for positive definite A */ long i, j, k; double temp, sum; sum = 0.0; for (i = 0; i < chars; i++) { if (fabs(a[i][i]) < 1.0E-37) { printf("ERROR: tried to invert singular matrix.\n"); exxit(-1); } sum += log(a[i][i]); temp = 1.0 / a[i][i]; a[i][i] = 1.0; for (j = 0; j < chars; j++) a[i][j] *= temp; for (j = 0; j < chars; j++) { if (j != i) { temp = a[j][i]; a[j][i] = 0.0; for (k = 0; k < chars; k++) a[j][k] -= temp * a[i][k]; } } } return(sum); } /* logdet */ void invert(double **a) { /* Gauss-Jordan reduction -- invert chars x chars matrix a in place, overwriting previous contents of a. On exit, matrix a contains the inverse.*/ long i, j, k; double temp; for (i = 0; i < chars; i++) { if (fabs(a[i][i]) < 1.0E-37) { printf("ERROR: tried to invert singular matrix.\n"); exxit(-1); } temp = 1.0 / a[i][i]; a[i][i] = 1.0; for (j = 0; j < chars; j++) a[i][j] *= temp; for (j = 0; j < chars; j++) { if (j != i) { temp = a[j][i]; a[j][i] = 0.0; for (k = 0; k < chars; k++) a[j][k] -= temp * a[i][k]; } } } } /*invert*/ void initcovars(boolean novara) { /* Initialize covariance estimates */ long i, j, k, l, contswithin; /* zero the matrices */ for (i = 0; i < chars; i++) for (j = 0; j < chars; j++) { vara[i][j] = 0.0; vare[i][j] = 0.0; } /* estimate VE from within contrasts -- unbiasedly */ contswithin = 0; for (i = 0; i < spp; i++) { for (j = 1; j < sample[i]; j++) { contswithin++; for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) vare[k][l] += cntrast[i][j][k]*cntrast[i][j][l]; } } /* estimate VA from between contrasts -- biasedly: does not take out VE */ if (!novara) { /* leave VarA = 0 if no A component assumed present */ for (i = 0; i < spp-1; i++) { for (j = 0; j < chars; j++) for (k = 0; k < chars; k++) if (ssqcont[i][0] <= 0.0) vara[j][k] += cntrast[i][0][j]*cntrast[i][0][k]; else vara[j][k] += cntrast[i][0][j]*cntrast[i][0][k] / ((long)(spp-1)*ssqcont[i][0]); } } for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) if (contswithin > 0) vare[k][l] /= contswithin; else { if (!novara) { vara[k][l] = 0.5 * vara[k][l]; vare[k][l] = vara[k][l]; } } } /* initcovars */ double normdiff(boolean novara) { /* Get relative norm of difference between old, new covariances */ double s; long i, j; s = 0.0; for (i = 0; i < chars; i++) for (j = 0; j < chars; j++) { if (!novara) { if (fabs(oldvara[i][j]) <= 0.00000001) s += vara[i][j]; else s += fabs(vara[i][j]/oldvara[i][j]-1.0); } if (fabs(oldvare[i][j]) <= 0.00000001) s += vare[i][j]; else s += fabs(vare[i][j]/oldvare[i][j]-1.0); } return s/((double)(chars*chars)); } /* normdiff */ void matcopy(double **a, double **b) { /* Copy matrices chars x chars: a to b */ long i; for (i = 0; i < chars; i++) { memcpy(b[i], a[i], chars*sizeof(double)); } } /* matcopy */ void newcovars(boolean novara) { /* one EM update of covariances, compute old likelihood too */ long i, j, k, l, m; double sum, sum2, sum3, sqssq; if (!novara) matcopy(vara, oldvara); matcopy(vare, oldvare); sum2 = 0.0; /* log likelihood of old parameters accumulates here */ for (i = 0; i < chars; i++) /* zero out vara and vare */ for (j = 0; j < chars; j++) { if (!novara) vara[i][j] = 0.0; vare[i][j] = 0.0; } for (i = 0; i < spp-1; i++) { /* accumulate over contrasts ... */ if (i <= spp-2) { /* E(aa'|x) and E(ee'|x) for "between" contrasts */ sqssq = sqrt(ssqcont[i][0]); for (k = 0; k < chars; k++) /* compute (dA+E) for this contrast */ for (l = 0; l < chars; l++) if (!novara) temp1[k][l] = ssqcont[i][0] * oldvara[k][l] + oldvare[k][l]; else temp1[k][l] = oldvare[k][l]; matcopy(temp1, temp2); invert(temp2); /* compute (dA+E)^(-1) */ /* sum of - x (dA+E)^(-1) x'/2 for old A, E */ for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) sum2 -= cntrast[i][0][k]*temp2[k][l]*cntrast[i][0][l]/2.0; matcopy(temp1, temp3); sum2 -= 0.5 * logdet(temp3); /* log determinant term too */ if (!novara) { for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { sum = 0.0; for (j = 0; j < chars; j++) sum += temp2[k][j] * sqssq * oldvara[j][l]; Bax[k][l] = sum; /* Bax = (dA+E)^(-1) * sqrt(d) * A */ } } for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { sum = 0.0; for (j = 0; j < chars; j++) sum += temp2[k][j] * oldvare[j][l]; Bex[k][l] = sum; /* Bex = (dA+E)^(-1) * E */ } if (!novara) { for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { sum = 0.0; for (m = 0; m < chars; m++) sum += Bax[m][k] * (cntrast[i][0][m]*cntrast[i][0][l] -temp1[m][l]); temp2[k][l] = sum; /* Bax'*(xx'-(dA+E)) ... */ } for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { sum = 0.0; for (m = 0; m < chars; m++) sum += temp2[k][m] * Bax[m][l]; vara[k][l] += sum; /* ... * Bax */ } } for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { sum = 0.0; for (m = 0; m < chars; m++) sum += Bex[m][k] * (cntrast[i][0][m]*cntrast[i][0][l] -temp1[m][l]); temp2[k][l] = sum; /* Bex'*(xx'-(dA+E)) ... */ } for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { sum = 0.0; for (m = 0; m < chars; m++) sum += temp2[k][m] * Bex[m][l]; vare[k][l] += sum; /* ... * Bex */ } } } matcopy(oldvare, temp2); invert(temp2); /* get E^(-1) */ matcopy(oldvare, temp3); sum3 = 0.5 * logdet(temp3); /* get 1/2 log det(E) */ for (i = 0; i < spp; i++) { if (sample[i] > 1) { for (j = 1; j < sample[i]; j++) { /* E(aa'|x) (invisibly) and E(ee'|x) for within contrasts */ for (k = 0; k < chars; k++) for (l = 0; l < chars; l++) { vare[k][l] += cntrast[i][j][k] * cntrast[i][j][l] - oldvare[k][l]; sum2 -= cntrast[i][j][k] * temp2[k][l] * cntrast[i][j][l] / 2.0; /* accumulate - x*E^(-1)*x'/2 for old E */ } sum2 -= sum3; /* log determinant term too */ } } } for (i = 0; i < chars; i++) /* complete EM by dividing by denom ... */ for (j = 0; j < chars; j++) { /* ... and adding old VA, VE */ if (!novara) { vara[i][j] /= (double)contnum; vara[i][j] += oldvara[i][j]; } vare[i][j] /= (double)contnum; vare[i][j] += oldvare[i][j]; } logL = sum2; /* log likelihood for old values */ } /* newcovars */ void printcovariances(boolean novara) { /* print out ML covariances and regressions in the error-covariance case */ long i, j; fprintf(outfile, "\n\n"); if (novara) fprintf(outfile, "Estimates when VarA is not in the model\n\n"); else fprintf(outfile, "Estimates when VarA is in the model\n\n"); if (!novara) { fprintf(outfile, "Estimate of VarA\n"); fprintf(outfile, "-------- -- ----\n"); fprintf(outfile, "\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, " %12.6f ", vara[i][j]); fprintf(outfile, "\n"); } fprintf(outfile, "\n"); } fprintf(outfile, "Estimate of VarE\n"); fprintf(outfile, "-------- -- ----\n"); fprintf(outfile, "\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, " %12.6f ", vare[i][j]); fprintf(outfile, "\n"); } fprintf(outfile, "\n"); if (!novara) { fprintf(outfile, "VarA Regressions (columns on rows)\n"); fprintf(outfile, "---- ----------- -------- -- -----\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", vara[i][j] / vara[i][i]); putc('\n', outfile); } fprintf(outfile, "\n"); fprintf(outfile, "VarA Correlations\n"); fprintf(outfile, "---- ------------\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", vara[i][j] / sqrt(vara[i][i] * vara[j][j])); putc('\n', outfile); } fprintf(outfile, "\n"); } fprintf(outfile, "VarE Regressions (columns on rows)\n"); fprintf(outfile, "---- ----------- -------- -- -----\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", vare[i][j] / vare[i][i]); putc('\n', outfile); } fprintf(outfile, "\n"); fprintf(outfile, "\nVarE Correlations\n"); fprintf(outfile, "---- ------------\n\n"); for (i = 0; i < chars; i++) { for (j = 0; j < chars; j++) fprintf(outfile, "%10.4f", vare[i][j] / sqrt(vare[i][i] * vare[j][j])); putc('\n', outfile); } fprintf(outfile, "\n\n"); } /* printcovariances */ void emiterate(boolean novara) { /* EM iteration of error and phylogenetic covariances */ /* How to handle missing values? */ long its; double relnorm; initcovars(novara); its = 1; do { newcovars(novara); relnorm = normdiff(novara); if (its % 100 == 0) printf("Iteration no. %ld: ln L = %f, Norm = %f\n", its, logL, relnorm); its++; } while ((relnorm > 0.00001) && (its < 10000)); if (its == 10000) { printf("\nWARNING: Iterations did not converge."); printf(" Results may be unreliable.\n"); } } /* emiterate */ void initcontrastnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; nodep[(*p)->index - 1] = (*p); (*p)->view = (phenotype3)Malloc((long)chars*sizeof(double)); break; case nonbottom: gnu(grbg, p); (*p)->index = nodei; (*p)->view = (phenotype3)Malloc((long)chars*sizeof(double)); break; case tip: match_names_to_data (str, nodep, p, spp); (*p)->view = (phenotype3)Malloc((long)chars*sizeof(double)); (*p)->deltav = 0.0; break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); (*p)->v = valyew / divisor; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; default: /* cases of hslength,iter,hsnolength,treewt,unittrwt*/ break; /* not handled */ } } /* initcontrastnode */ void maketree() { /* set up the tree and use it */ long which, nextnode; node *q, *r; char* treestr; alloctree(&curtree.nodep, nonodes); setuptree(&curtree, nonodes); which = 1; while (which <= numtrees) { if ((printdata || reg) && numtrees > 1) { fprintf(outfile, "Tree number%4ld\n", which); fprintf(outfile, "==== ====== ====\n\n"); } nextnode = 0; nextnode = 0; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread (&treestr, &curtree.start, curtree.nodep, &goteof, &first, curtree.nodep, &nextnode, &haslengths, &grbg, initcontrastnode,false,nonodes); q = curtree.start; r = curtree.start; while (!(q->next == curtree.start)) q = q->next; q->next = curtree.start->next; curtree.start = q; chuck(&grbg, r); curtree.nodep[spp] = q; bifurcating = (curtree.start->next->next == curtree.start); contwithin(); makecontrasts(curtree.start); if (!bifurcating) { makecontrasts(curtree.start->back); contbetween(curtree.start, curtree.start->back); } if (!varywithin) { if (writecont) writecontrasts(); if (reg) regressions(); putc('\n', outfile); } else { emiterate(false); printcovariances(false); if (nophylo) { logLvara = logL; emiterate(nophylo); printcovariances(nophylo); logLnovara = logL; fprintf(outfile, "\n\n\n Likelihood Ratio Test"); fprintf(outfile, " of no VarA component\n"); fprintf(outfile, " ---------- ----- ----"); fprintf(outfile, " -- -- ---- ---------\n\n"); fprintf(outfile, " Log likelihood with varA = %13.5f,", logLvara); fprintf(outfile, " %ld parameters\n\n", chars*(chars+1)); fprintf(outfile, " Log likelihood without varA = %13.5f,", logLnovara); fprintf(outfile, " %ld parameters\n\n", chars*(chars+1)/2); fprintf(outfile, " difference = %13.5f\n\n", logLvara-logLnovara); fprintf(outfile, " Chi-square value = %13.5f, ", 2.0*(logLvara-logLnovara)); fprintf(outfile, " %ld degrees of freedom\n\n", chars*(chars+1)/2); } } which++; } if (progress) printf("\nOutput written to file \"%s\"\n\n", outfilename); } /* maketree */ int main(int argc, Char *argv[]) { /* main program */ #ifdef MAC argc = 1; /* macsetup("Contrast","Contrast"); */ argv[0] = "Contrast"; #endif init(argc, argv); emboss_getoptions("fcontrast", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; doinit(); getdata(); maketree(); FClose(infile); FClose(outfile); FClose(intree); printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/printree.c0000664000175000017500000001042111001347767013055 00000000000000#include "printree.h" static void mlk_drawline(FILE *fp, tree *t, long i, double scale) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first =NULL, *last =NULL; long n, j; boolean extra, done; p = t->root; q = t->root; extra = false; if ((long)(p->ycoord) == i) { if (p->index - spp >= 10) fprintf(fp, "-%2ld", p->index - spp); else fprintf(fp, "--%ld", p->index - spp); extra = true; } else fprintf(fp, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)(scale * ((long)(p->xcoord) - (long)(q->xcoord)) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)(q->ycoord) == i && !done) { if (p->ycoord != q->ycoord) putc('+', fp); else putc('-', fp); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', fp); if (q->index - spp >= 10) fprintf(fp, "%2ld", q->index - spp); else fprintf(fp, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', fp); } } else if (!p->tip) { if ((long)(last->ycoord) > i && (long)(first->ycoord) < i && i != (long)(p->ycoord)) { putc('!', fp); for (j = 1; j < n; j++) putc(' ', fp); } else { for (j = 1; j <= n; j++) putc(' ', fp); } } else { for (j = 1; j <= n; j++) putc(' ', fp); } if (p != q) p = q; } while (!done); if ((long)(p->ycoord) == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], fp); } putc('\n', fp); } /* mlk_drawline */ static void mlk_coordinates(node *p, long *tipy) { /* establishes coordinates of nodes */ node *q, *first, *last, *pp1 =NULL, *pp2 =NULL; long num_sibs, p1, p2, i; if (p->tip) { p->xcoord = 0; p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; return; } q = p->next; do { mlk_coordinates(q->back, tipy); q = q->next; } while (p != q); num_sibs = count_sibs(p); p1 = (long)((num_sibs+1)/2.0); p2 = (long)((num_sibs+2)/2.0); i = 1; q = p->next; first = q->back; do { if (i == p1) pp1 = q->back; if (i == p2) pp2 = q->back; last = q->back; q = q->next; i++; } while (q != p); p->xcoord = (long)(0.5 - over * p->tyme); p->ycoord = (pp1->ycoord + pp2->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* mlk_coordinates */ void mlk_printree(FILE *fp, tree *t) { /* prints out diagram of the tree */ long tipy; double scale; long i; node *p; assert(fp != NULL); putc('\n', fp); tipy = 1; mlk_coordinates(t->root, &tipy); p = t->root; while (!p->tip) p = p->next->back; scale = 1.0 / (long)(p->tyme - t->root->tyme + 1.000); putc('\n', fp); for (i = 1; i <= tipy - down; i++) mlk_drawline(fp, t, i, scale); putc('\n', fp); } /* dnamlk_printree */ static void describe_r(FILE *fp, tree *t, node *p, double fracchange) { long i, num_sibs; node *sib_ptr, *sib_back_ptr; double v; if (p == t->root) fprintf(fp, " root "); else fprintf(fp, "%4ld ", p->back->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index - 1][i], fp); } else fprintf(fp, "%4ld ", p->index - spp); if (p != t->root) { fprintf(fp, "%11.5f", fracchange * (p->tyme - t->root->tyme)); v = fracchange * (p->tyme - t->nodep[p->back->index - 1]->tyme); fprintf(fp, "%13.5f", v); } putc('\n', fp); if (!p->tip) { sib_ptr = p; num_sibs = count_sibs(p); for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; describe_r(fp, t, sib_back_ptr, fracchange); } } } /* describe */ void mlk_describe(FILE *fp, tree *t, double fracchange) { describe_r(fp, t, t->root, fracchange); } PHYLIPNEW-3.69.650/src/discrete.c0000664000175000017500000024355411605067345013047 00000000000000#include "phylip.h" #include "discrete.h" /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ long nonodes, endsite, outgrno, nextree, which; boolean interleaved, printdata, outgropt, treeprint, dotdiff; steptr weight, category, alias, location, ally; sequence y, convtab; void discrete_inputdata(AjPPhyloState state, long chars) { /* input the names and sequences for each species */ /* used by pars */ long i, j, k, l; long nsymbol=0, convsymboli=0; Char charstate; boolean found; if (printdata) headings(chars, "Sequences", "---------"); for(i=0;iStr[i]),chars); y[i][chars] = '\0'; } if(!printdata) return; if (printdata) { for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { for (j = 1; j <= spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j - 1][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (dotdiff && (j > 1 && y[j - 1][k - 1] == y[0][k - 1])) charstate = '.'; else charstate = y[j - 1][k - 1]; putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } for (i = 1; i <= chars; i++) { nsymbol = 0; for (j = 1; j <= spp; j++) { if ((nsymbol == 0) && (y[j - 1][i - 1] != '?')) { nsymbol = 1; convsymboli = 1; convtab[0][i-1] = y[j-1][i-1]; } else if (y[j - 1][i - 1] != '?'){ found = false; for (k = 1; k <= nsymbol; k++) { if (convtab[k - 1][i - 1] == y[j - 1][i - 1]) { found = true; convsymboli = k; } } if (!found) { nsymbol++; convtab[nsymbol-1][i - 1] = y[j - 1][i - 1]; convsymboli = nsymbol; } } if (nsymbol <= 8) { if (y[j - 1][i - 1] != '?') y[j - 1][i - 1] = (Char)('0' + (convsymboli - 1)); } else { printf( "\n\nERROR: More than maximum of 8 symbols in column %ld\n\n", i); exxit(-1); } } } } /* inputdata */ void alloctree(pointarray *treenode, long nonodes, boolean usertree) { /* allocate treenode dynamically */ /* used in pars */ long i, j; node *p, *q; *treenode = (pointarray)Malloc(nonodes*sizeof(node *)); for (i = 0; i < spp; i++) { (*treenode)[i] = (node *)Malloc(sizeof(node)); (*treenode)[i]->tip = true; (*treenode)[i]->index = i+1; (*treenode)[i]->iter = true; (*treenode)[i]->branchnum = i+1; (*treenode)[i]->initialized = true; } if (!usertree) for (i = spp; i < nonodes; i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node *)Malloc(sizeof(node)); p->tip = false; p->index = i+1; p->iter = true; p->branchnum = i+1; p->initialized = false; p->next = q; q = p; } p->next->next->next = p; (*treenode)[i] = p; } } /* alloctree */ void setuptree(pointarray treenode, long nonodes, boolean usertree) { /* initialize treenodes */ long i; node *p; for (i = 1; i <= nonodes; i++) { if (i <= spp || !usertree) { treenode[i-1]->back = NULL; treenode[i-1]->tip = (i <= spp); treenode[i-1]->index = i; treenode[i-1]->numdesc = 0; treenode[i-1]->iter = true; treenode[i-1]->initialized = true; } } if (!usertree) { for (i = spp + 1; i <= nonodes; i++) { p = treenode[i-1]->next; while (p != treenode[i-1]) { p->back = NULL; p->tip = false; p->index = i; p->numdesc = 0; p->iter = true; p->initialized = false; p = p->next; } } } } /* setuptree */ void alloctip(node *p, long *zeros, unsigned char *zeros2) { /* allocate a tip node */ /* used by pars */ p->numsteps = (steptr)Malloc(endsite*sizeof(long)); p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); p->discbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); p->olddiscbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); memcpy(p->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(p->numsteps, zeros, endsite*sizeof(long)); memcpy(p->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); } /* alloctip */ void sitesort(long chars, steptr weight) { /* Shell sort keeping sites, weights in same order */ /* used in pars */ long gap, i, j, jj, jg, k, itemp; boolean flip, tied; gap = chars / 2; while (gap > 0) { for (i = gap + 1; i <= chars; i++) { j = i - gap; flip = true; while (j > 0 && flip) { jj = alias[j - 1]; jg = alias[j + gap - 1]; tied = true; k = 1; while (k <= spp && tied) { flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); k++; } if (!flip) break; itemp = alias[j - 1]; alias[j - 1] = alias[j + gap - 1]; alias[j + gap - 1] = itemp; itemp = weight[j - 1]; weight[j - 1] = weight[j + gap - 1]; weight[j + gap - 1] = itemp; j -= gap; } } gap /= 2; } } /* sitesort */ void sitecombine(long chars) { /* combine sites that have identical patterns */ /* used in pars */ long i, j, k; boolean tied; i = 1; while (i < chars) { j = i + 1; tied = true; while (j <= chars && tied) { k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); k++; } if (tied) { weight[i - 1] += weight[j - 1]; weight[j - 1] = 0; ally[alias[j - 1] - 1] = alias[i - 1]; } j++; } i = j - 1; } } /* sitecombine */ void sitescrunch(long chars) { /* move so one representative of each pattern of sites comes first */ /* used in pars */ long i, j, itemp; boolean done, found; done = false; i = 1; j = 2; while (!done) { if (ally[alias[i - 1] - 1] != alias[i - 1]) { if (j <= i) j = i + 1; if (j <= chars) { do { found = (ally[alias[j - 1] - 1] == alias[j - 1]); j++; } while (!(found || j > chars)); if (found) { j--; itemp = alias[i - 1]; alias[i - 1] = alias[j - 1]; alias[j - 1] = itemp; itemp = weight[i - 1]; weight[i - 1] = weight[j - 1]; weight[j - 1] = itemp; } else done = true; } else done = true; } i++; done = (done || i >= chars); } } /* sitescrunch */ void makevalues(pointarray treenode, long *zeros, unsigned char *zeros2, boolean usertree) { /* set up fractional likelihoods at tips */ /* used by pars */ long i, j; unsigned char ns=0; node *p; setuptree(treenode, nonodes, usertree); for (i = 0; i < spp; i++) alloctip(treenode[i], zeros, zeros2); if (!usertree) { for (i = spp; i < nonodes; i++) { p = treenode[i]; do { allocdiscnontip(p, zeros, zeros2, endsite); p = p->next; } while (p != treenode[i]); } } for (j = 0; j < endsite; j++) { for (i = 0; i < spp; i++) { switch (y[i][alias[j] - 1]) { case '0': ns = 1 << zero; break; case '1': ns = 1 << one; break; case '2': ns = 1 << two; break; case '3': ns = 1 << three; break; case '4': ns = 1 << four; break; case '5': ns = 1 << five; break; case '6': ns = 1 << six; break; case '7': ns = 1 << seven; break; case '?': ns = (1 << zero) | (1 << one) | (1 << two) | (1 << three) | (1 << four) | (1 << five) | (1 << six) | (1 << seven); break; } treenode[i]->discbase[j] = ns; treenode[i]->numsteps[j] = 0; } } } /* makevalues */ void fillin(node *p, node *left, node *rt) { /* sets up for each node in the tree the base sequence at that point and counts the changes. */ long i, j, k, n; node *q; if (!left) { memcpy(p->discbase, rt->discbase, endsite*sizeof(unsigned char)); memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); q = rt; } else if (!rt) { memcpy(p->discbase, left->discbase, endsite*sizeof(unsigned char)); memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); q = left; } else { for (i = 0; i < endsite; i++) { p->discbase[i] = left->discbase[i] & rt->discbase[i]; p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; if (p->discbase[i] == 0) { p->discbase[i] = left->discbase[i] | rt->discbase[i]; p->numsteps[i] += weight[i]; } } q = rt; } if (left && rt) n = 2; else n = 1; for (i = 0; i < endsite; i++) for (j = (long)zero; j <= (long)seven; j++) p->discnumnuc[i][j] = 0; for (k = 1; k <= n; k++) { if (k == 2) q = left; for (i = 0; i < endsite; i++) { for (j = (long)zero; j <= (long)seven; j++) { if (q->discbase[i] & (1 << j)) p->discnumnuc[i][j]++; } } } } /* fillin */ long getlargest(long *discnumnuc) { /* find the largest in array numnuc */ long i, largest; largest = 0; for (i = (long)zero; i <= (long)seven; i++) if (discnumnuc[i] > largest) largest = discnumnuc[i]; return largest; } /* getlargest */ void multifillin(node *p, node *q, long dnumdesc) { /* sets up for each node in the tree the base sequence at that point and counts the changes according to the changes in q's base */ long i, j, largest, descsteps; unsigned char b; memcpy(p->olddiscbase, p->discbase, endsite*sizeof(unsigned char)); memcpy(p->oldnumsteps, p->numsteps, endsite*sizeof(long)); for (i = 0; i < endsite; i++) { descsteps = 0; for (j = (long)zero; j <= (long)seven; j++) { b = 1 << j; if ((descsteps == 0) && (p->discbase[i] & b)) descsteps = p->numsteps[i] - (p->numdesc - dnumdesc - p->discnumnuc[i][j]) * weight[i]; } if (dnumdesc == -1) descsteps -= q->oldnumsteps[i]; else if (dnumdesc == 0) descsteps += (q->numsteps[i] - q->oldnumsteps[i]); else descsteps += q->numsteps[i]; if (q->olddiscbase[i] != q->discbase[i]) { for (j = (long)zero; j <= (long)seven; j++) { b = 1 << j; if ((q->olddiscbase[i] & b) && !(q->discbase[i] & b)) p->discnumnuc[i][j]--; else if (!(q->olddiscbase[i] & b) && (q->discbase[i] & b)) p->discnumnuc[i][j]++; } } largest = getlargest(p->discnumnuc[i]); if (q->olddiscbase[i] != q->discbase[i]) { p->discbase[i] = 0; for (j = (long)zero; j <= (long)seven; j++) { if (p->discnumnuc[i][j] == largest) p->discbase[i] |= (1 << j); } } p->numsteps[i] = (p->numdesc - largest) * weight[i] + descsteps; } } /* multifillin */ void sumnsteps(node *p, node *left, node *rt, long a, long b) { /* sets up for each node in the tree the base sequence at that point and counts the changes. */ long i; unsigned char ns, rs, ls; if (!left) { memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); memcpy(p->discbase, rt->discbase, endsite*sizeof(unsigned char)); } else if (!rt) { memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); memcpy(p->discbase, left->discbase, endsite*sizeof(unsigned char)); } else for (i = a; i < b; i++) { ls = left->discbase[i]; rs = rt->discbase[i]; ns = ls & rs; p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; if (ns == 0) { ns = ls | rs; p->numsteps[i] += weight[i]; } p->discbase[i] = ns; } } /* sumnsteps */ void sumnsteps2(node *p, node *left, node *rt, long a, long b, long *threshwt) { /* counts the changes at each node. */ long i, steps; unsigned char ns, rs, ls; long term; if (a == 0) p->sumsteps = 0.0; if (!left) memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); else if (!rt) memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); else for (i = a; i < b; i++) { ls = left->discbase[i]; rs = rt->discbase[i]; ns = ls & rs; p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; if (ns == 0) p->numsteps[i] += weight[i]; } for (i = a; i < b; i++) { steps = p->numsteps[i]; if ((long)steps <= threshwt[i]) term = steps; else term = threshwt[i]; p->sumsteps += (double)term; } } /* sumnsteps2 */ void multisumnsteps(node *p, node *q, long a, long b, long *threshwt) { /* sets up for each node in the tree the base sequence at that point and counts the changes according to the changes in q's base */ long i, j, steps, largest, descsteps; long term; if (a == 0) p->sumsteps = 0.0; for (i = a; i < b; i++) { descsteps = 0; for (j = (long)zero; j <= (long)seven; j++) { if ((descsteps == 0) && (p->discbase[i] & (1 << j))) descsteps = p->numsteps[i] - (p->numdesc - 1 - p->discnumnuc[i][j]) * weight[i]; } descsteps += q->numsteps[i]; largest = 0; for (j = (long)zero; j <= (long)seven; j++) { if (q->discbase[i] & (1 << j)) p->discnumnuc[i][j]++; if (p->discnumnuc[i][j] > largest) largest = p->discnumnuc[i][j]; } steps = ((p->numdesc - largest) * weight[i] + descsteps); if ((long)steps <= threshwt[i]) term = steps; else term = threshwt[i]; p->sumsteps += (double)term; } } /* multisumnsteps */ void multisumnsteps2(node *p) { /* counts the changes at each multi-way node. Sums up steps of all descendants */ long i, j, largest; node *q; discbaseptr b; for (i = 0; i < endsite; i++) { p->numsteps[i] = 0; q = p->next; while (q != p) { if (q->back) { p->numsteps[i] += q->back->numsteps[i]; b = q->back->discbase; for (j = (long)zero; j <= (long)seven; j++) if (b[i] & (1 << j)) p->discnumnuc[i][j]++; } q = q->next; } largest = getlargest(p->discnumnuc[i]); p->numsteps[i] += ((p->numdesc - largest) * weight[i]); p->discbase[i] = 0; for (j = (long)zero; j <= (long)seven; j++) { if (p->discnumnuc[i][j] == largest) p->discbase[i] |= (1 << j); } } } /* multisumnsteps2 */ boolean alltips(node *forknode, node *p) { /* returns true if all descendants of forknode except p are tips; false otherwise. */ node *q, *r; boolean tips; tips = true; r = forknode; q = forknode->next; do { if (q->back && q->back != p && !q->back->tip) tips = false; q = q->next; } while (tips && q != r); return tips; } /* alltips */ void gdispose(node *p, node **grbg, pointarray treenode) { /* go through tree throwing away nodes */ node *q, *r; p->back = NULL; if (p->tip) return; treenode[p->index - 1] = NULL; q = p->next; while (q != p) { gdispose(q->back, grbg, treenode); q->back = NULL; r = q; q = q->next; chuck(grbg, r); } chuck(grbg, q); } /* gdispose */ void preorder(node *p, node *r, node *root, node *removing, node *adding, node *changing, long dnumdesc) { /* recompute number of steps in preorder taking both ancestoral and descendent steps into account. removing points to a node being removed, if any */ node *q, *p1, *p2; if (p && !p->tip && p != adding) { q = p; do { if (p->back != r) { if (p->numdesc > 2) { if (changing) multifillin (p, r, dnumdesc); else multifillin (p, r, 0); } else { p1 = p->next; if (!removing) while (!p1->back) p1 = p1->next; else while (!p1->back || p1->back == removing) p1 = p1->next; p2 = p1->next; if (!removing) while (!p2->back) p2 = p2->next; else while (!p2->back || p2->back == removing) p2 = p2->next; p1 = p1->back; p2 = p2->back; if (p->back == p1) p1 = NULL; else if (p->back == p2) p2 = NULL; memcpy(p->olddiscbase, p->discbase, endsite*sizeof(unsigned char)); memcpy(p->oldnumsteps, p->numsteps, endsite*sizeof(long)); fillin(p, p1, p2); } } p = p->next; } while (p != q); q = p; do { preorder(p->next->back, p->next, root, removing, adding, NULL, 0); p = p->next; } while (p->next != q); } } /* preorder */ void updatenumdesc(node *p, node *root, long n) { /* set p's numdesc to n. If p is the root, numdesc of p's descendants are set to n-1. */ node *q; q = p; if (p == root && n > 0) { p->numdesc = n; n--; q = q->next; } do { q->numdesc = n; q = q->next; } while (q != p); } void add(node *below, node *newtip, node *newfork, node **root, boolean recompute, pointarray treenode, node **grbg, long *zeros, unsigned char *zeros2) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant. if newfork is NULL, newtip is added as below's sibling */ /* used in pars */ node *p; if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (newfork) { if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; below->back = newfork->next->next; newfork->next->next->back = below; newfork->next->back = newtip; newtip->back = newfork->next; if (*root == below) *root = newfork; updatenumdesc(newfork, *root, 2); } else { gnudisctreenode(grbg, &p, below->index, endsite, zeros, zeros2); p->back = newtip; newtip->back = p; p->next = below->next; below->next = p; updatenumdesc(below, *root, below->numdesc + 1); } if (!newtip->tip) updatenumdesc(newtip, *root, newtip->numdesc); (*root)->back = NULL; if (!recompute) return; if (!newfork) { memcpy(newtip->back->discbase, below->discbase, endsite*sizeof(unsigned char)); memcpy(newtip->back->numsteps, below->numsteps, endsite*sizeof(long)); memcpy(newtip->back->discnumnuc, below->discnumnuc, endsite*sizeof(discnucarray)); if (below != *root) { memcpy(below->back->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(below->back->oldnumsteps, zeros, endsite*sizeof(long)); multifillin(newtip->back, below->back, 1); } if (!newtip->tip) { memcpy(newtip->back->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(newtip->back->oldnumsteps, zeros, endsite*sizeof(long)); preorder(newtip, newtip->back, *root, NULL, NULL, below, 1); } memcpy(newtip->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(newtip->oldnumsteps, zeros, endsite*sizeof(long)); preorder(below, newtip, *root, NULL, newtip, below, 1); if (below != *root) preorder(below->back, below, *root, NULL, NULL, NULL, 0); } else { fillin(newtip->back, newtip->back->next->back, newtip->back->next->next->back); if (!newtip->tip) { memcpy(newtip->back->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(newtip->back->oldnumsteps, zeros, endsite*sizeof(long)); preorder(newtip, newtip->back, *root, NULL, NULL, newfork, 1); } if (newfork != *root) { memcpy(below->back->discbase, newfork->back->discbase, endsite*sizeof(unsigned char)); memcpy(below->back->numsteps, newfork->back->numsteps, endsite*sizeof(long)); preorder(newfork, newtip, *root, NULL, newtip, NULL, 0); } else { fillin(below->back, newtip, NULL); fillin(newfork, newtip, below); memcpy(below->back->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(below->back->oldnumsteps, zeros, endsite*sizeof(long)); preorder(below, below->back, *root, NULL, NULL, newfork, 1); } if (newfork != *root) { memcpy(newfork->olddiscbase, below->discbase, endsite*sizeof(unsigned char)); memcpy(newfork->oldnumsteps, below->numsteps, endsite*sizeof(long)); preorder(newfork->back, newfork, *root, NULL, NULL, NULL, 0); } } } /* add */ void findbelow(node **below, node *item, node *fork) { /* decide which of fork's binary children is below */ if (fork->next->back == item) *below = fork->next->next->back; else *below = fork->next->back; } /* findbelow */ void re_move(node *item, node **fork, node **root, boolean recompute, pointarray treenode, node **grbg, long *zeros, unsigned char *zeros2) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork. If item belongs to a node with more than 2 descendants, fork will not be deleted */ /* used in pars */ node *p, *q, *other=NULL, *otherback = NULL; if (item->back == NULL) { *fork = NULL; return; } *fork = treenode[item->back->index - 1]; if ((*fork)->numdesc == 2) { updatenumdesc(*fork, *root, 0); findbelow(&other, item, *fork); otherback = other->back; if (*root == *fork) { if (other->tip) *root = NULL; else { *root = other; updatenumdesc(other, *root, other->numdesc); } } p = item->back->next->back; q = item->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } } else { updatenumdesc(*fork, *root, (*fork)->numdesc - 1); p = *fork; while (p->next != item->back) p = p->next; p->next = item->back->next; } if (!item->tip) { updatenumdesc(item, item, item->numdesc); if (recompute) { memcpy(item->back->olddiscbase, item->back->discbase, endsite*sizeof(unsigned char)); memcpy(item->back->oldnumsteps, item->back->numsteps, endsite*sizeof(long)); memcpy(item->back->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(item->back->numsteps, zeros, endsite*sizeof(long)); preorder(item, item->back, *root, item->back, NULL, item, -1); } } if ((*fork)->numdesc >= 2) chuck(grbg, item->back); item->back = NULL; if (!recompute) return; if ((*fork)->numdesc == 0) { memcpy(otherback->olddiscbase, otherback->discbase, endsite*sizeof(unsigned char)); memcpy(otherback->oldnumsteps, otherback->numsteps, endsite*sizeof(long)); if (other == *root) { memcpy(otherback->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(otherback->numsteps, zeros, endsite*sizeof(long)); } else { memcpy(otherback->discbase, other->back->discbase, endsite*sizeof(unsigned char)); memcpy(otherback->numsteps, other->back->numsteps, endsite*sizeof(long)); } p = other->back; other->back = otherback; if (other == *root) preorder(other, otherback, *root, otherback, NULL, other, -1); else preorder(other, otherback, *root, NULL, NULL, NULL, 0); other->back = p; if (other != *root) { memcpy(other->olddiscbase,(*fork)->discbase, endsite*sizeof(unsigned char)); memcpy(other->oldnumsteps,(*fork)->numsteps, endsite*sizeof(long)); preorder(other->back, other, *root, NULL, NULL, NULL, 0); } } else { memcpy(item->olddiscbase, item->discbase, endsite*sizeof(unsigned char)); memcpy(item->oldnumsteps, item->numsteps, endsite*sizeof(long)); memcpy(item->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(item->numsteps, zeros, endsite*sizeof(long)); preorder(*fork, item, *root, NULL, NULL, *fork, -1); if (*fork != *root) preorder((*fork)->back, *fork, *root, NULL, NULL, NULL, 0); memcpy(item->discbase, item->olddiscbase, endsite*sizeof(unsigned char)); memcpy(item->numsteps, item->oldnumsteps, endsite*sizeof(long)); } } /* re_move */ void postorder(node *p) { /* traverses an n-ary tree, suming up steps at a node's descendants */ /* used in pars */ node *q; if (p->tip) return; q = p->next; while (q != p) { postorder(q->back); q = q->next; } zerodiscnumnuc(p, endsite); if (p->numdesc > 2) multisumnsteps2(p); else fillin(p, p->next->back, p->next->next->back); } /* postorder */ void getnufork(node **nufork, node **grbg, pointarray treenode, long *zeros, unsigned char *zeros2) { /* find a fork not used currently */ long i; i = spp; while (treenode[i] && treenode[i]->numdesc > 0) i++; if (!treenode[i]) gnudisctreenode(grbg, &treenode[i], i, endsite, zeros, zeros2); *nufork = treenode[i]; } /* getnufork */ void reroot(node *outgroup, node *root) { /* reorients tree, putting outgroup in desired position. used if the root is binary. */ /* used in pars */ node *p, *q; if (outgroup->back->index == root->index) return; p = root->next; q = root->next->next; p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; outgroup->back->back = q; outgroup->back = p; } /* reroot */ void reroot2(node *outgroup, node *root) { /* reorients tree, putting outgroup in desired position. */ /* used in pars */ node *p; p = outgroup->back->next; while (p->next != outgroup->back) p = p->next; root->next = outgroup->back; p->next = root; } /* reroot2 */ void reroot3(node *outgroup,node *root,node *root2,node *lastdesc,node **grbg) { /* reorients tree, putting back outgroup in original position. */ /* used in pars */ node *p; p = root->next; while (p->next != root) p = p->next; chuck(grbg, root); p->next = outgroup->back; root2->next = lastdesc->next; lastdesc->next = root2; } /* reroot3 */ void savetraverse(node *p) { /* sets BOOLEANs that indicate which way is down */ node *q; p->bottom = true; if (p->tip) return; q = p->next; while (q != p) { q->bottom = false; savetraverse(q->back); q = q->next; } } /* savetraverse */ void newindex(long i, node *p) { /* assigns index i to node p */ while (p->index != i) { p->index = i; p = p->next; } } /* newindex */ void flipindexes(long nextnode, pointarray treenode) { /* flips index of nodes between nextnode and last node. */ long last; node *temp; last = nonodes; while (treenode[last - 1]->numdesc == 0) last--; if (last > nextnode) { temp = treenode[nextnode - 1]; treenode[nextnode - 1] = treenode[last - 1]; treenode[last - 1] = temp; newindex(nextnode, treenode[nextnode - 1]); newindex(last, treenode[last - 1]); } } /* flipindexes */ boolean parentinmulti(node *anode) { /* sees if anode's parent has more than 2 children */ node *p; while (!anode->bottom) anode = anode->next; p = anode->back; while (!p->bottom) p = p->next; return (p->numdesc > 2); } /* parentinmulti */ long sibsvisited(node *anode, long *place) { /* computes the number of nodes which are visited earlier than anode among its siblings */ node *p; long nvisited; while (!anode->bottom) anode = anode->next; p = anode->back->next; nvisited = 0; do { if (!p->bottom && place[p->back->index - 1] != 0) nvisited++; p = p->next; } while (p != anode->back); return nvisited; } /* sibsvisited */ long smallest(node *anode, long *place) { /* finds the smallest index of sibling of anode */ node *p; long min; while (!anode->bottom) anode = anode->next; p = anode->back->next; if (p->bottom) p = p->next; min = nonodes; do { if (p->back && place[p->back->index - 1] != 0) { if (p->back->index <= spp) { if (p->back->index < min) min = p->back->index; } else { if (place[p->back->index - 1] < min) min = place[p->back->index - 1]; } } p = p->next; if (p->bottom) p = p->next; } while (p != anode->back); return min; } /* smallest */ void bintomulti(node **root, node **binroot, node **grbg, long *zeros, unsigned char *zeros2) { /* attaches root's left child to its right child and makes the right child new root */ node *left, *right, *newnode, *temp; right = (*root)->next->next->back; left = (*root)->next->back; if (right->tip) { (*root)->next = right->back; (*root)->next->next = left->back; temp = left; left = right; right = temp; right->back->next = *root; } gnudisctreenode(grbg, &newnode, right->index, endsite, zeros, zeros2); newnode->next = right->next; newnode->back = left; left->back = newnode; right->next = newnode; (*root)->next->back = (*root)->next->next->back = NULL; *binroot = *root; (*binroot)->numdesc = 0; *root = right; (*root)->numdesc++; (*root)->back = NULL; } /* bintomulti */ void backtobinary(node **root, node *binroot, node **grbg) { /* restores binary root */ node *p; binroot->next->back = (*root)->next->back; (*root)->next->back->back = binroot->next; p = (*root)->next; (*root)->next = p->next; binroot->next->next->back = *root; (*root)->back = binroot->next->next; chuck(grbg, p); (*root)->numdesc--; *root = binroot; (*root)->numdesc = 2; } /* backtobinary */ boolean outgrin(node *root, node *outgrnode) { /* checks if outgroup node is a child of root */ node *p; p = root->next; while (p != root) { if (p->back == outgrnode) return true; p = p->next; } return false; } /* outgrin */ void flipnodes(node *nodea, node *nodeb) { /* flip nodes */ node *backa, *backb; backa = nodea->back; backb = nodeb->back; backa->back = nodeb; backb->back = nodea; nodea->back = backb; nodeb->back = backa; } /* flipnodes */ void moveleft(node *root, node *outgrnode, node **flipback) { /* makes outgroup node to leftmost child of root */ node *p; boolean done; p = root->next; done = false; while (p != root && !done) { if (p->back == outgrnode) { *flipback = p; flipnodes(root->next->back, p->back); done = true; } p = p->next; } } /* moveleft */ void savetree(node *root, long *place, pointarray treenode, node **grbg, long *zeros, unsigned char *zeros2) { /* record in place where each species has to be added to reconstruct this tree */ /* used by pars */ long i, j, nextnode, nvisited; node *p, *q, *r = NULL, *root2, *lastdesc, *outgrnode, *binroot, *flipback; boolean done, newfork; binroot = NULL; lastdesc = NULL; root2 = NULL; flipback = NULL; outgrnode = treenode[outgrno - 1]; if (root->numdesc == 2) bintomulti(&root, &binroot, grbg, zeros, zeros2); if (outgrin(root, outgrnode)) { if (outgrnode != root->next->back) moveleft(root, outgrnode, &flipback); } else { root2 = root; lastdesc = root->next; while (lastdesc->next != root) lastdesc = lastdesc->next; lastdesc->next = root->next; gnudisctreenode(grbg, &root, outgrnode->back->index, endsite, zeros, zeros2); root->numdesc = root2->numdesc; reroot2(outgrnode, root); } savetraverse(root); nextnode = spp + 1; for (i = nextnode; i <= nonodes; i++) if (treenode[i - 1]->numdesc == 0) flipindexes(i, treenode); for (i = 0; i < nonodes; i++) place[i] = 0; place[root->index - 1] = 1; for (i = 1; i <= spp; i++) { p = treenode[i - 1]; while (place[p->index - 1] == 0) { place[p->index - 1] = i; while (!p->bottom) p = p->next; r = p; p = p->back; } if (i > 1) { q = treenode[i - 1]; newfork = true; nvisited = sibsvisited(q, place); if (nvisited == 0) { if (parentinmulti(r)) { nvisited = sibsvisited(r, place); if (nvisited == 0) place[i - 1] = place[p->index - 1]; else if (nvisited == 1) place[i - 1] = smallest(r, place); else { place[i - 1] = -smallest(r, place); newfork = false; } } else place[i - 1] = place[p->index - 1]; } else if (nvisited == 1) { place[i - 1] = place[p->index - 1]; } else { place[i - 1] = -smallest(q, place); newfork = false; } if (newfork) { j = place[p->index - 1]; done = false; while (!done) { place[p->index - 1] = nextnode; while (!p->bottom) p = p->next; p = p->back; done = (p == NULL); if (!done) done = (place[p->index - 1] != j); if (done) { nextnode++; } } } } } if (flipback) flipnodes(outgrnode, flipback->back); else { if (root2) { reroot3(outgrnode, root, root2, lastdesc, grbg); root = root2; } } if (binroot) backtobinary(&root, binroot, grbg); } /* savetree */ void addnsave(node *p, node *item, node *nufork, node **root, node **grbg, boolean multf, pointarray treenode, long *place, long *zeros, unsigned char *zeros2) { /* adds item to tree and save it. Then removes item. */ node *dummy; if (!multf) add(p, item, nufork, root, false, treenode, grbg, zeros, zeros2); else add(p, item, NULL, root, false, treenode, grbg, zeros, zeros2); savetree(*root, place, treenode, grbg, zeros, zeros2); if (!multf) re_move(item, &nufork, root, false, treenode, grbg, zeros, zeros2); else re_move(item, &dummy, root, false, treenode, grbg, zeros, zeros2); } /* addnsave */ void addbestever(long *pos, long *nextree, long maxtrees, boolean collapse, long *place, bestelm *bestrees) { /* adds first best tree */ *pos = 1; *nextree = 1; initbestrees(bestrees, maxtrees, true); initbestrees(bestrees, maxtrees, false); addtree(*pos, nextree, collapse, place, bestrees); } /* addbestever */ void addtiedtree(long pos, long *nextree, long maxtrees, boolean collapse, long *place, bestelm *bestrees) { /* add tied tree */ if (*nextree <= maxtrees) addtree(pos, nextree, collapse, place, bestrees); } /* addtiedtree */ void clearcollapse(pointarray treenode) { /* clears collapse status at a node */ long i; node *p; for (i = 0; i < nonodes; i++) { treenode[i]->collapse = undefined; if (!treenode[i]->tip) { p = treenode[i]->next; while (p != treenode[i]) { p->collapse = undefined; p = p->next; } } } } /* clearcollapse */ void clearbottom(pointarray treenode) { /* clears boolean bottom at a node */ long i; node *p; for (i = 0; i < nonodes; i++) { treenode[i]->bottom = false; if (!treenode[i]->tip) { p = treenode[i]->next; while (p != treenode[i]) { p->bottom = false; p = p->next; } } } } /* clearbottom */ void collabranch(node *collapfrom, node *tempfrom, node *tempto) { /* collapse branch from collapfrom */ long i, j, largest, descsteps; boolean done; unsigned char b; for (i = 0; i < endsite; i++) { descsteps = 0; for (j = (long)zero; j <= (long)seven; j++) { b = 1 << j; if ((descsteps == 0) && (collapfrom->discbase[i] & b)) descsteps = tempfrom->oldnumsteps[i] - (collapfrom->numdesc - collapfrom->discnumnuc[i][j]) * weight[i]; } done = false; for (j = (long)zero; j <= (long)seven; j++) { b = 1 << j; if (!done && (tempto->discbase[i] & b)) { descsteps += (tempto->numsteps[i] - (tempto->numdesc - collapfrom->numdesc - tempto->discnumnuc[i][j]) * weight[i]); done = true; } } for (j = (long)zero; j <= (long)seven; j++) tempto->discnumnuc[i][j] += collapfrom->discnumnuc[i][j]; largest = getlargest(tempto->discnumnuc[i]); tempto->discbase[i] = 0; for (j = (long)zero; j <= (long)seven; j++) { if (tempto->discnumnuc[i][j] == largest) tempto->discbase[i] |= (1 << j); } tempto->numsteps[i] = (tempto->numdesc - largest) * weight[i] + descsteps; } } /* collabranch */ boolean allcommonbases(node *a, node *b, boolean *allsame) { /* see if bases are common at all sites for nodes a and b */ long i; boolean allcommon; allcommon = true; *allsame = true; for (i = 0; i < endsite; i++) { if ((a->discbase[i] & b->discbase[i]) == 0) allcommon = false; else if (a->discbase[i] != b->discbase[i]) *allsame = false; } return allcommon; } /* allcommonbases */ void findbottom(node *p, node **bottom) { /* find a node with field bottom set at node p */ node *q; if (p->bottom) *bottom = p; else { q = p->next; while(!q->bottom && q != p) q = q->next; *bottom = q; } } /* findbottom */ boolean moresteps(node *a, node *b) { /* see if numsteps of node a exceeds those of node b */ long i; for (i = 0; i < endsite; i++) if (a->numsteps[i] > b->numsteps[i]) return true; return false; } /* moresteps */ boolean passdown(node *desc, node *parent, node *start, node *below, node *item, node *added, node *total, node *tempdsc, node *tempprt, boolean multf) { /* track down to node start to see if an ancestor branch can be collapsed */ node *temp; boolean done, allsame; done = (parent == start); while (!done) { desc = parent; findbottom(parent->back, &parent); if (multf && start == below && parent == below) parent = added; memcpy(tempdsc->discbase, tempprt->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); memcpy(tempdsc->olddiscbase, desc->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->oldnumsteps, desc->numsteps, endsite*sizeof(long)); memcpy(tempprt->discbase, parent->discbase, endsite*sizeof(unsigned char)); memcpy(tempprt->numsteps, parent->numsteps, endsite*sizeof(long)); memcpy(tempprt->discnumnuc, parent->discnumnuc, endsite*sizeof(discnucarray)); tempprt->numdesc = parent->numdesc; multifillin(tempprt, tempdsc, 0); if (!allcommonbases(tempprt, parent, &allsame)) return false; else if (moresteps(tempprt, parent)) return false; else if (allsame) return true; if (parent == added) parent = below; done = (parent == start); if (done && ((start == item) || (!multf && start == below))) { memcpy(tempdsc->discbase, tempprt->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); memcpy(tempdsc->olddiscbase, start->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->oldnumsteps, start->numsteps, endsite*sizeof(long)); multifillin(added, tempdsc, 0); tempprt = added; } } temp = tempdsc; if (start == below || start == item) fillin(temp, tempprt, below->back); else fillin(temp, tempprt, added); return !moresteps(temp, total); } /* passdown */ boolean trycollapdesc(node *desc, node *parent, node *start, node *below, node *item, node *added, node *total, node *tempdsc, node *tempprt, boolean multf,long *zeros, unsigned char *zeros2) { /* see if branch between nodes desc and parent can be collapsed */ boolean allsame; if (desc->numdesc == 1) return true; if (multf && start == below && parent == below) parent = added; memcpy(tempdsc->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(tempdsc->numsteps, zeros, endsite*sizeof(long)); memcpy(tempdsc->olddiscbase, desc->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->oldnumsteps, desc->numsteps, endsite*sizeof(long)); memcpy(tempprt->discbase, parent->discbase, endsite*sizeof(unsigned char)); memcpy(tempprt->numsteps, parent->numsteps, endsite*sizeof(long)); memcpy(tempprt->discnumnuc, parent->discnumnuc, endsite*sizeof(discnucarray)); tempprt->numdesc = parent->numdesc - 1; multifillin(tempprt, tempdsc, -1); tempprt->numdesc += desc->numdesc; collabranch(desc, tempdsc, tempprt); if (!allcommonbases(tempprt, parent, &allsame) || moresteps(tempprt, parent)) { if (parent != added) { desc->collapse = nocollap; parent->collapse = nocollap; } return false; } else if (allsame) { if (parent != added) { desc->collapse = tocollap; parent->collapse = tocollap; } return true; } if (parent == added) parent = below; if ((start == item && parent == item) || (!multf && start == below && parent == below)) { memcpy(tempdsc->discbase, tempprt->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); memcpy(tempdsc->olddiscbase, start->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->oldnumsteps, start->numsteps, endsite*sizeof(long)); memcpy(tempprt->discbase, added->discbase, endsite*sizeof(unsigned char)); memcpy(tempprt->numsteps, added->numsteps, endsite*sizeof(long)); memcpy(tempprt->discnumnuc, added->discnumnuc, endsite*sizeof(discnucarray)); tempprt->numdesc = added->numdesc; multifillin(tempprt, tempdsc, 0); if (!allcommonbases(tempprt, added, &allsame)) return false; else if (moresteps(tempprt, added)) return false; else if (allsame) return true; } return passdown(desc, parent, start, below, item, added, total, tempdsc, tempprt, multf); } /* trycollapdesc */ void setbottom(node *p) { /* set field bottom at node p */ node *q; p->bottom = true; q = p->next; do { q->bottom = false; q = q->next; } while (q != p); } /* setbottom */ boolean zeroinsubtree(node *subtree, node *start, node *below, node *item, node *added, node *total, node *tempdsc, node *tempprt, boolean multf, node* root, long *zeros, unsigned char *zeros2) { /* sees if subtree contains a zero length branch */ node *p; if (!subtree->tip) { setbottom(subtree); p = subtree->next; do { if (p->back && !p->back->tip && !((p->back->collapse == nocollap) && (subtree->collapse == nocollap)) && (subtree->numdesc != 1)) { if ((p->back->collapse == tocollap) && (subtree->collapse == tocollap) && multf && (subtree != below)) return true; /* when root->numdesc == 2 * there is no mandatory step at the root, * instead of checking at the root we check around it * we only need to check p because the first if * statement already gets rid of it for the subtree */ else if ((p->back->index != root->index || root->numdesc > 2) && trycollapdesc(p->back, subtree, start, below, item, added, total, tempdsc, tempprt, multf, zeros, zeros2)) return true; else if ((p->back->index == root->index && root->numdesc == 2) && !(root->next->back->tip) && !(root->next->next->back->tip) && trycollapdesc(root->next->back, root->next->next->back, start, below, item, added, total, tempdsc, tempprt, multf, zeros, zeros2)) return true; } p = p->next; } while (p != subtree); p = subtree->next; do { if (p->back && !p->back->tip) { if (zeroinsubtree(p->back, start, below, item, added, total, tempdsc, tempprt, multf, root, zeros, zeros2)) return true; } p = p->next; } while (p != subtree); } return false; } /* zeroinsubtree */ boolean collapsible(node *item, node *below, node *temp, node *temp1, node *tempdsc, node *tempprt, node *added, node *total, boolean multf, node *root, long *zeros, unsigned char *zeros2, pointarray treenode) { /* sees if any branch can be collapsed */ node *belowbk; boolean allsame; if (multf) { memcpy(tempdsc->discbase, item->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->numsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempdsc->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(tempdsc->oldnumsteps, zeros, endsite*sizeof(long)); memcpy(added->discbase, below->discbase, endsite*sizeof(unsigned char)); memcpy(added->numsteps, below->numsteps, endsite*sizeof(long)); memcpy(added->discnumnuc, below->discnumnuc, endsite*sizeof(discnucarray)); added->numdesc = below->numdesc + 1; multifillin(added, tempdsc, 1); } else { fillin(added, item, below); added->numdesc = 2; } fillin(total, added, below->back); clearbottom(treenode); if (below->back) { if (zeroinsubtree(below->back, below->back, below, item, added, total, tempdsc, tempprt, multf, root, zeros, zeros2)) return true; } if (multf) { if (zeroinsubtree(below, below, below, item, added, total, tempdsc, tempprt, multf, root, zeros, zeros2)) return true; } else if (!below->tip) { if (zeroinsubtree(below, below, below, item, added, total, tempdsc, tempprt, multf, root, zeros, zeros2)) return true; } if (!item->tip) { if (zeroinsubtree(item, item, below, item, added, total, tempdsc, tempprt, multf, root, zeros, zeros2)) return true; } if (multf && below->back && !below->back->tip) { memcpy(tempdsc->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(tempdsc->numsteps, zeros, endsite*sizeof(long)); memcpy(tempdsc->olddiscbase, added->discbase, endsite*sizeof(unsigned char)); memcpy(tempdsc->oldnumsteps, added->numsteps, endsite*sizeof(long)); if (below->back == treenode[below->back->index - 1]) belowbk = below->back->next; else belowbk = treenode[below->back->index - 1]; memcpy(tempprt->discbase, belowbk->discbase, endsite*sizeof(unsigned char)); memcpy(tempprt->numsteps, belowbk->numsteps, endsite*sizeof(long)); memcpy(tempprt->discnumnuc, belowbk->discnumnuc, endsite*sizeof(discnucarray)); tempprt->numdesc = belowbk->numdesc - 1; multifillin(tempprt, tempdsc, -1); tempprt->numdesc += added->numdesc; collabranch(added, tempdsc, tempprt); if (!allcommonbases(tempprt, belowbk, &allsame)) return false; else if (allsame && !moresteps(tempprt, belowbk)) return true; else if (belowbk->back) { fillin(temp, tempprt, belowbk->back); fillin(temp1, belowbk, belowbk->back); return !moresteps(temp, temp1); } } return false; } /* collapsible */ void replaceback(node **oldback, node *item, node *forknode, node **grbg, long *zeros, unsigned char *zeros2) { /* replaces back node of item with another */ node *p; p = forknode; while (p->next->back != item) p = p->next; *oldback = p->next; gnudisctreenode(grbg, &p->next, forknode->index, endsite, zeros, zeros2); p->next->next = (*oldback)->next; p->next->back = (*oldback)->back; p->next->back->back = p->next; (*oldback)->next = (*oldback)->back = NULL; } /* replaceback */ void putback(node *oldback, node *item, node *forknode, node **grbg) { /* restores node to back of item */ node *p, *q; p = forknode; while (p->next != item->back) p = p->next; q = p->next; oldback->next = p->next->next; p->next = oldback; oldback->back = item; item->back = oldback; oldback->index = forknode->index; chuck(grbg, q); } /* putback */ void savelocrearr(node *item, node *forknode, node *below, node *tmp, node *tmp1, node *tmp2, node *tmp3, node *tmprm, node *tmpadd, node **root, long maxtrees, long *nextree, boolean multf, boolean bestever, boolean *saved, long *place, bestelm *bestrees, pointarray treenode, node **grbg, long *zeros, unsigned char *zeros2) { /* saves tied or better trees during local rearrangements by removing item from forknode and adding to below */ node *other, *otherback=NULL, *oldfork, *nufork, *oldback; long pos; boolean found, collapse; if (forknode->numdesc == 2) { findbelow(&other, item, forknode); otherback = other->back; oldback = NULL; } else { other = NULL; replaceback(&oldback, item, forknode, grbg, zeros, zeros2); } re_move(item, &oldfork, root, false, treenode, grbg, zeros, zeros2); if (!multf) getnufork(&nufork, grbg, treenode, zeros, zeros2); else nufork = NULL; addnsave(below, item, nufork, root, grbg, multf, treenode, place, zeros, zeros2); pos = 0; findtree(&found, &pos, *nextree, place, bestrees); if (other) { add(other, item, oldfork, root, false, treenode, grbg, zeros, zeros2); if (otherback->back != other) flipnodes(item, other); } else add(forknode, item, NULL, root, false, treenode, grbg, zeros, zeros2); *saved = false; if (found) { if (oldback) putback(oldback, item, forknode, grbg); } else { if (oldback) chuck(grbg, oldback); re_move(item, &oldfork, root, true, treenode, grbg, zeros, zeros2); collapse = collapsible(item, below, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, multf, *root, zeros, zeros2, treenode); if (!collapse) { if (bestever) addbestever(&pos, nextree, maxtrees, collapse, place, bestrees); else addtiedtree(pos, nextree, maxtrees, collapse, place, bestrees); } if (other) add(other, item, oldfork, root, true, treenode, grbg, zeros, zeros2); else add(forknode, item, NULL, root, true, treenode, grbg, zeros, zeros2); *saved = !collapse; } } /* savelocrearr */ void clearvisited(pointarray treenode) { /* clears boolean visited at a node */ long i; node *p; for (i = 0; i < nonodes; i++) { treenode[i]->visited = false; if (!treenode[i]->tip) { p = treenode[i]->next; while (p != treenode[i]) { p->visited = false; p = p->next; } } } } /* clearvisited */ void hyprint(long b1,long b2,struct LOC_hyptrav *htrav,pointarray treenode) { /* print out states in sites b1 through b2 at node */ long i, j, k; boolean dot, found; if (htrav->bottom) { if (!outgropt) fprintf(outfile, " "); else fprintf(outfile, "root "); } else fprintf(outfile, "%4ld ", htrav->r->back->index - spp); if (htrav->r->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[htrav->r->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", htrav->r->index - spp); if (htrav->bottom) fprintf(outfile, " "); else if (htrav->nonzero) fprintf(outfile, " yes "); else if (htrav->maybe) fprintf(outfile, " maybe "); else fprintf(outfile, " no "); for (i = b1; i <= b2; i++) { j = location[ally[i - 1] - 1]; htrav->tempset = htrav->r->discbase[j - 1]; htrav->anc = htrav->hypset[j - 1]; if (!htrav->bottom) htrav->anc = treenode[htrav->r->back->index - 1]->discbase[j - 1]; dot = dotdiff && (htrav->tempset == htrav->anc && !htrav->bottom); if (dot) putc('.', outfile); else { found = false; k = (long)zero; do { if (htrav->tempset == (1 << k)) { putc(convtab[k][i - 1], outfile); found = true; } k++; } while (!found && k <= (long)seven); if (!found) putc('?', outfile); } if (i % 10 == 0) putc(' ', outfile); } putc('\n', outfile); } /* hyprint */ void gnubase(gbases **p, gbases **garbage, long endsite) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (*garbage != NULL) { *p = *garbage; *garbage = (*garbage)->next; } else { *p = (gbases *)Malloc(sizeof(gbases)); (*p)->discbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); } (*p)->next = NULL; } /* gnubase */ void chuckbase(gbases *p, gbases **garbage) { /* collect garbage on p -- put it on front of garbage list */ p->next = *garbage; *garbage = p; } /* chuckbase */ void hyptrav(node *r_, discbaseptr hypset_, long b1, long b2, boolean bottom_, pointarray treenode, gbases **garbage) { /* compute, print out states at one interior node */ struct LOC_hyptrav Vars; long i, j, k; long largest; gbases *ancset; discnucarray *tempnuc; node *p, *q; Vars.bottom = bottom_; Vars.r = r_; Vars.hypset = hypset_; gnubase(&ancset, garbage, endsite); tempnuc = (discnucarray *)Malloc(endsite*sizeof(discnucarray)); Vars.maybe = false; Vars.nonzero = false; if (!Vars.r->tip) zerodiscnumnuc(Vars.r, endsite); for (i = b1 - 1; i < b2; i++) { j = location[ally[i] - 1]; Vars.anc = Vars.hypset[j - 1]; if (!Vars.r->tip) { p = Vars.r->next; for (k = (long)zero; k <= (long)seven; k++) if (Vars.anc & (1 << k)) Vars.r->discnumnuc[j - 1][k]++; do { for (k = (long)zero; k <= (long)seven; k++) if (p->back->discbase[j - 1] & (1 << k)) Vars.r->discnumnuc[j - 1][k]++; p = p->next; } while (p != Vars.r); largest = getlargest(Vars.r->discnumnuc[j - 1]); Vars.tempset = 0; for (k = (long)zero; k <= (long)seven; k++) { if (Vars.r->discnumnuc[j - 1][k] == largest) Vars.tempset |= (1 << k); } Vars.r->discbase[j - 1] = Vars.tempset; } if (!Vars.bottom) Vars.anc = treenode[Vars.r->back->index - 1]->discbase[j - 1]; Vars.nonzero = (Vars.nonzero || (Vars.r->discbase[j - 1] & Vars.anc) == 0); Vars.maybe = (Vars.maybe || Vars.r->discbase[j - 1] != Vars.anc); } hyprint(b1, b2, &Vars, treenode); Vars.bottom = false; if (!Vars.r->tip) { memcpy(tempnuc, Vars.r->discnumnuc, endsite*sizeof(discnucarray)); q = Vars.r->next; do { memcpy(Vars.r->discnumnuc, tempnuc, endsite*sizeof(discnucarray)); for (i = b1 - 1; i < b2; i++) { j = location[ally[i] - 1]; for (k = (long)zero; k <= (long)seven; k++) if (q->back->discbase[j - 1] & (1 << k)) Vars.r->discnumnuc[j - 1][k]--; largest = getlargest(Vars.r->discnumnuc[j - 1]); ancset->discbase[j - 1] = 0; for (k = (long)zero; k <= (long)seven; k++) if (Vars.r->discnumnuc[j - 1][k] == largest) ancset->discbase[j - 1] |= (1 << k); if (!Vars.bottom) Vars.anc = ancset->discbase[j - 1]; } hyptrav(q->back, ancset->discbase, b1, b2, Vars.bottom, treenode, garbage); q = q->next; } while (q != Vars.r); } chuckbase(ancset, garbage); } /* hyptrav */ void hypstates(long chars, node *root, pointarray treenode, gbases **garbage) { /* fill in and describe states at interior nodes */ /* used in pars */ long i, n; discbaseptr nothing; fprintf(outfile, "\nFrom To Any Steps? State at upper node\n"); fprintf(outfile, " "); if (dotdiff) fprintf(outfile, " ( . means same as in the node below it on tree)\n"); nothing = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); for (i = 0; i < endsite; i++) nothing[i] = 0; for (i = 1; i <= ((chars - 1) / 40 + 1); i++) { putc('\n', outfile); n = i * 40; if (n > chars) n = chars; hyptrav(root, nothing, i * 40 - 39, n, true, treenode, garbage); } free(nothing); } /* hypstates */ void initbranchlen(node *p) { node *q; p->v = 0.0; if (p->back) p->back->v = 0.0; if (p->tip) return; q = p->next; while (q != p) { initbranchlen(q->back); q = q->next; } q = p->next; while (q != p) { q->v = 0.0; q = q->next; } } /* initbranchlen */ void initmin(node *p, long sitei, boolean internal) { long i; if (internal) { for (i = (long)zero; i <= (long)seven; i++) { p->disccumlengths[i] = 0; p->discnumreconst[i] = 1; } } else { for (i = (long)zero; i <= (long)seven; i++) { if (p->discbase[sitei - 1] & (1 << i)) { p->disccumlengths[i] = 0; p->discnumreconst[i] = 1; } else { p->disccumlengths[i] = -1; p->discnumreconst[i] = 0; } } } } /* initmin */ void initbase(node *p, long sitei) { /* traverse tree to initialize base at internal nodes */ node *q; long i, largest; if (p->tip) return; q = p->next; while (q != p) { if (q->back) { memcpy(q->discnumnuc, p->discnumnuc, endsite*sizeof(discnucarray)); for (i = (long)zero; i <= (long)seven; i++) { if (q->back->discbase[sitei - 1] & (1 << i)) q->discnumnuc[sitei - 1][i]--; } if (p->back) { for (i = (long)zero; i <= (long)seven; i++) { if (p->back->discbase[sitei - 1] & (1 << i)) q->discnumnuc[sitei - 1][i]++; } } largest = getlargest(q->discnumnuc[sitei - 1]); q->discbase[sitei - 1] = 0; for (i = (long)zero; i <= (long)seven; i++) { if (q->discnumnuc[sitei - 1][i] == largest) q->discbase[sitei - 1] |= (1 << i); } } q = q->next; } q = p->next; while (q != p) { initbase(q->back, sitei); q = q->next; } } /* initbase */ void inittreetrav(node *p, long sitei) { /* traverse tree to clear boolean initialized and set up base */ node *q; if (p->tip) { initmin(p, sitei, false); p->initialized = true; return; } q = p->next; while (q != p) { inittreetrav(q->back, sitei); q = q->next; } initmin(p, sitei, true); p->initialized = false; q = p->next; while (q != p) { initmin(q, sitei, true); q->initialized = false; q = q->next; } } /* inittreetrav */ void compmin(node *p, node *desc) { /* computes minimum lengths up to p */ long i, j, minn, cost, desclen, descrecon=0, maxx; maxx = 10 * spp; for (i = (long)zero; i <= (long)seven; i++) { minn = maxx; for (j = (long)zero; j <= (long)seven; j++) { if (i == j) cost = 0; else cost = 1; if (desc->disccumlengths[j] == -1) { desclen = maxx; } else { desclen = desc->disccumlengths[j]; } if (minn > cost + desclen) { minn = cost + desclen; descrecon = 0; } if (minn == cost + desclen) { descrecon += desc->discnumreconst[j]; } } p->disccumlengths[i] += minn; p->discnumreconst[i] *= descrecon; } p->initialized = true; } /* compmin */ void minpostorder(node *p, pointarray treenode) { /* traverses an n-ary tree, computing minimum steps at each node */ node *q; if (p->tip) { return; } q = p->next; while (q != p) { if (q->back) minpostorder(q->back, treenode); q = q->next; } if (!p->initialized) { q = p->next; while (q != p) { if (q->back) compmin(p, q->back); q = q->next; } } } /* minpostorder */ void branchlength(node *subtr1, node *subtr2, double *brlen, pointarray treenode) { /* computes a branch length between two subtrees for a given site */ long i, j, minn, cost, nom, denom; node *temp; if (subtr1->tip) { temp = subtr1; subtr1 = subtr2; subtr2 = temp; } if (subtr1->index == outgrno) { temp = subtr1; subtr1 = subtr2; subtr2 = temp; } minpostorder(subtr1, treenode); minpostorder(subtr2, treenode); minn = 10 * spp; nom = 0; denom = 0; for (i = (long)zero; i <= (long)seven; i++) { for (j = (long)zero; j <= (long)seven; j++) { if (i == j) cost = 0; else cost = 1; if (subtr1->disccumlengths[i] != -1 && (subtr2->disccumlengths[j] != -1)) { if (subtr1->disccumlengths[i] + cost + subtr2->disccumlengths[j] < minn) { minn = subtr1->disccumlengths[i] + cost + subtr2->disccumlengths[j]; nom = 0; denom = 0; } if (subtr1->disccumlengths[i] + cost + subtr2->disccumlengths[j] == minn) { nom += subtr1->discnumreconst[i] * subtr2->discnumreconst[j] * cost; denom += subtr1->discnumreconst[i] * subtr2->discnumreconst[j]; } } } } *brlen = (double)nom/(double)denom; } /* branchlength */ void printbranchlengths(node *p) { node *q; long i; if (p->tip) return; q = p->next; do { fprintf(outfile, "%6ld ",q->index - spp); if (q->back->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[q->back->index - 1][i], outfile); } else fprintf(outfile, "%6ld ", q->back->index - spp); fprintf(outfile, " %.2f\n",q->v); if (q->back) printbranchlengths(q->back); q = q->next; } while (q != p); } /* printbranchlengths */ void branchlentrav(node *p, node *root, long sitei, long chars, double *brlen, pointarray treenode) { /* traverses the tree computing tree length at each branch */ node *q; if (p->tip) return; if (p->index == outgrno) p = p->back; q = p->next; do { if (q->back) { branchlength(q, q->back, brlen, treenode); q->v += ((weight[sitei - 1] / 10.0) * (*brlen)); q->back->v += ((weight[sitei - 1] / 10.0) * (*brlen)); if (!q->back->tip) branchlentrav(q->back, root, sitei, chars, brlen, treenode); } q = q->next; } while (q != p); } /* branchlentrav */ void treelength(node *root, long chars, pointarray treenode) { /* calls branchlentrav at each site */ long sitei; double trlen; initbranchlen(root); for (sitei = 1; sitei <= endsite; sitei++) { trlen = 0.0; initbase(root, sitei); inittreetrav(root, sitei); branchlentrav(root, root, sitei, chars, &trlen, treenode); } } /* treelength */ void coordinates(node *p, long *tipy, double f, long *fartemp) { /* establishes coordinates of nodes for display without lengths */ node *q, *first, *last, *mid1 = NULL, *mid2 = NULL; long numbranches, numb2; if (p->tip) { p->xcoord = 0; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; return; } numbranches = 0; q = p->next; do { coordinates(q->back, tipy, f, fartemp); numbranches += 1; q = q->next; } while (p != q); first = p->next->back; q = p->next; while (q->next != p) q = q->next; last = q->back; numb2 = 1; q = p->next; while (q != p) { if (numb2 == (numbranches + 1)/2) mid1 = q->back; if (numb2 == (numbranches/2 + 1)) mid2 = q->back; numb2 += 1; q = q->next; } p->xcoord = (long)((double)(last->ymax - first->ymin) * f); p->ycoord = (long)((mid1->ycoord + mid2->ycoord) / 2); p->ymin = first->ymin; p->ymax = last->ymax; if (p->xcoord > *fartemp) *fartemp = p->xcoord; } /* coordinates */ void drawline(long i, double scale, node *root) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first = NULL, *last = NULL; long n, j; boolean extra, done, noplus; p = root; q = root; extra = false; noplus = false; if (i == (long)p->ycoord && p == root) { if (p->index - spp >= 10) fprintf(outfile, " %2ld", p->index - spp); else fprintf(outfile, " %ld", p->index - spp); extra = true; noplus = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)(scale * (p->xcoord - q->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if (noplus) { putc('-', outfile); noplus = false; } else putc('+', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; noplus = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } noplus = false; } else { for (j = 1; j <= n; j++) putc(' ', outfile); noplus = false; } if (p != q) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); } /* drawline */ void printree(node *root, double f) { /* prints out diagram of the tree */ /* used in pars */ long i, tipy, dummy; double scale; putc('\n', outfile); if (!treeprint) return; putc('\n', outfile); tipy = 1; dummy = 0; coordinates(root, &tipy, f, &dummy); scale = 1.5; putc('\n', outfile); for (i = 1; i <= (tipy - down); i++) drawline(i, scale, root); fprintf(outfile, "\n remember:"); if (outgropt) fprintf(outfile, " (although rooted by outgroup)"); fprintf(outfile, " this is an unrooted tree!\n\n"); } /* printree */ void writesteps(long chars, boolean weights, steptr oldweight, node *root) { /* used in pars */ long i, j, k, l; putc('\n', outfile); if (weights) fprintf(outfile, "weighted "); fprintf(outfile, "steps in each site:\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%4ld", i); fprintf(outfile, "\n *------------------------------------"); fprintf(outfile, "-----\n"); for (i = 0; i <= (chars / 10); i++) { fprintf(outfile, "%5ld", i * 10); putc('|', outfile); for (j = 0; j <= 9; j++) { k = i * 10 + j; if (k == 0 || k > chars) fprintf(outfile, " "); else { l = location[ally[k - 1] - 1]; if (oldweight[k - 1] > 0) fprintf(outfile, "%4ld", oldweight[k - 1] * (root->numsteps[l - 1] / weight[l - 1])); else fprintf(outfile, " 0"); } } putc('\n', outfile); } } /* writesteps */ void treeout(node *p, long nextree, long *col, node *root) { /* write out file with representation of final tree */ /* used in pars */ node *q; long i, n; Char c; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { putc('(', outtree); (*col)++; q = p->next; while (q != p) { treeout(q->back, nextree, col, root); q = q->next; if (q == p) break; putc(',', outtree); (*col)++; if (*col > 60) { putc('\n', outtree); *col = 0; } } putc(')', outtree); (*col)++; } if (p != root) return; if (nextree > 2) fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); else fprintf(outtree, ";\n"); } /* treeout */ void treeout3(node *p, long nextree, long *col, node *root) { /* write out file with representation of final tree */ /* used in dnapars -- writes branch lengths */ node *q; long i, n, w; double x; Char c; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { putc('(', outtree); (*col)++; q = p->next; while (q != p) { treeout3(q->back, nextree, col, root); q = q->next; if (q == p) break; putc(',', outtree); (*col)++; if (*col > 60) { putc('\n', outtree); *col = 0; } } putc(')', outtree); (*col)++; } x = p->v; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p != root) { fprintf(outtree, ":%*.2f", (int)(w + 4), x); } if (p != root) return; if (nextree > 2) fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); else fprintf(outtree, ";\n"); } /* treeout3 */ void drawline3(long i, double scale, node *start) { /* draws one row of the tree diagram by moving up tree */ /* used in pars */ node *p, *q; long n, j; boolean extra; node *r, *first = NULL, *last = NULL; boolean done; p = start; q = start; extra = false; if (i == (long)p->ycoord) { if (p->index - spp >= 10) fprintf(outfile, " %2ld", p->index - spp); else fprintf(outfile, " %ld", p->index - spp); extra = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || (r == p))); first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; } done = (p->tip || p == q); n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if ((long)p->ycoord != (long)q->ycoord) putc('+', outfile); else putc('-', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && (i != (long)p->ycoord || p == start)) { putc('|', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } } else { for (j = 1; j <= n; j++) putc(' ', outfile); } if (q != p) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index-1][j], outfile); } putc('\n', outfile); } /* drawline3 */ void standev(long chars, long numtrees, long minwhich, double minsteps, double *nsteps, long **fsteps, longer seed) { /* compute and write standard deviation of user trees */ /* used in pars */ long i, j, k; double wt, sumw, sum, sum2, sd; double temp; double **covar, *P, *f; #define SAMPLES 1000 /* ????? if numtrees too big for Shimo, truncate */ if (numtrees == 2) { fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); fprintf(outfile, "Tree Steps Diff Steps Its S.D."); fprintf(outfile, " Significantly worse?\n\n"); which = 1; while (which <= numtrees) { fprintf(outfile, "%3ld%10.1f", which, nsteps[which - 1] / 10); if (minwhich == which) fprintf(outfile, " <------ best\n"); else { sumw = 0.0; sum = 0.0; sum2 = 0.0; for (i = 0; i < chars; i++) { if (weight[i] > 0) { wt = weight[i] / 10.0; sumw += wt; temp = (fsteps[which - 1][i] - fsteps[minwhich - 1][i]) / 10.0; sum += temp; sum2 += temp * temp / wt; } } temp = sum / sumw; sd = sqrt(sumw / (sumw - 1.0) * (sum2 - temp * temp)); fprintf(outfile, "%10.1f%12.4f", (nsteps[which - 1] - minsteps) / 10, sd); if (sum > 1.95996 * sd) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } which++; } fprintf(outfile, "\n\n"); } else { /* Shimodaira-Hasegawa test using normal approximation */ if(numtrees > MAXSHIMOTREES){ fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" , MAXSHIMOTREES, numtrees); numtrees = MAXSHIMOTREES; } else { fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); } covar = (double **)Malloc(numtrees*sizeof(double *)); for (i = 0; i < numtrees; i++) covar[i] = (double *)Malloc(numtrees*sizeof(double)); sumw = 0.0; for (i = 0; i < chars; i++) sumw += weight[i]; for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ sum = nsteps[i]/(10.0*sumw); for (j = 0; j <=i; j++) { sum2 = nsteps[j]/(10.0*sumw); temp = 0.0; for (k = 0; k < chars; k++) { wt = weight[k]/10.0; if (weight[k] > 0) { temp = temp + wt*(fsteps[i][k]/(10.0*wt)-sum) *(fsteps[j][k]/(10.0*wt)-sum2); } } covar[i][j] = temp; if (i != j) covar[j][i] = temp; } } for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition of trees x trees covariance matrix */ sum = 0.0; for (j = 0; j <= i-1; j++) sum = sum + covar[i][j] * covar[i][j]; if (covar[i][i]-sum <= 0.0) temp = 0.0; else temp = sqrt(covar[i][i] - sum); covar[i][i] = temp; for (j = i+1; j < numtrees; j++) { sum = 0.0; for (k = 0; k < i; k++) sum = sum + covar[i][k] * covar[j][k]; if (fabs(temp) < 1.0E-23) covar[j][i] = 0.0; else covar[j][i] = (covar[j][i] - sum)/temp; } } f = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ for (i = 0; i < numtrees; i++) P[i] = 0.0; sum2 = nsteps[0]/10.0; /* sum2 will be smallest # of steps */ for (i = 1; i < numtrees; i++) if (sum2 > nsteps[i]/10.0) sum2 = nsteps[i]/10.0; for (i = 1; i < SAMPLES; i++) { /* loop over resampled trees */ for (j = 0; j < numtrees; j++) { /* draw vectors */ sum = 0.0; for (k = 0; k <= j; k++) sum += normrand(seed)*covar[j][k]; f[j] = sum; } sum = f[1]; for (j = 1; j < numtrees; j++) /* get min of vector */ if (f[j] < sum) sum = f[j]; for (j = 0; j < numtrees; j++) /* accumulate P's */ if (nsteps[j]/10.0-sum2 <= f[j] - sum) P[j] += 1.0/SAMPLES; } fprintf(outfile, "Tree Steps Diff Steps P value"); fprintf(outfile, " Significantly worse?\n\n"); for (i = 0; i < numtrees; i++) { fprintf(outfile, "%3ld%10.1f", i+1, nsteps[i]/10); if ((minwhich-1) == i) fprintf(outfile, " <------ best\n"); else { fprintf(outfile, " %9.1f %10.3f", nsteps[i]/10.0-sum2, P[i]); if (P[i] < 0.05) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } } fprintf(outfile, "\n"); free(P); /* free the variables we Malloc'ed */ free(f); for (i = 0; i < numtrees; i++) free(covar[i]); free(covar); } } /* standev */ void freetip(node *anode) { /* used in pars */ free(anode->numsteps); free(anode->oldnumsteps); free(anode->discbase); free(anode->olddiscbase); } /* freetip */ void freenontip(node *anode) { /* used in pars */ free(anode->numsteps); free(anode->oldnumsteps); free(anode->discbase); free(anode->olddiscbase); free(anode->discnumnuc); } /* freenontip */ void freenodes(long nonodes, pointarray treenode) { /* used in pars */ long i; node *p; for (i = 0; i < spp; i++) freetip(treenode[i]); for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]->next; do { freenontip(p); p = p->next; } while (p != treenode[i]); freenontip(p); } } } /* freenodes */ void freenode(node **anode) { /* used in pars */ freenontip(*anode); free(*anode); } /* freenode */ void freetree(long nonodes, pointarray treenode) { /* used in pars */ long i; node *p, *q; for (i = 0; i < spp; i++) free(treenode[i]); for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]->next; do { q = p->next; free(p); p = q; } while (p != treenode[i]); free(p); } } free(treenode); } /* freetree */ void freegarbage(gbases **garbage) { /* used in pars */ gbases *p; while (*garbage) { p = *garbage; *garbage = (*garbage)->next; free(p->discbase); free(p); } } /* freegarbage */ void freegrbg(node **grbg) { /* used in pars */ node *p; while (*grbg) { p = *grbg; *grbg = (*grbg)->next; freenontip(p); free(p); } } /*freegrbg */ void collapsetree(node *p, node *root, node **grbg, pointarray treenode, long *zeros, unsigned char *zeros2) { /* Recurse through tree searching for zero length brances between */ /* nodes (not to tips). If one exists, collapse the nodes together, */ /* removing the branch. */ node *q, *x1, *y1, *x2, *y2; long i, /*j,*/ index, index2, numd; if (p->tip) return; q = p->next; do { if (!q->back->tip && q->v == 0.000000) { /* merge the two nodes. */ x1 = y2 = q->next; x2 = y1 = q->back->next; while(x1->next != q) x1 = x1-> next; while(y1->next != q->back) y1 = y1-> next; x1->next = x2; y1->next = y2; index = q->index; index2 = q->back->index; numd = treenode[index-1]->numdesc + q->back->numdesc -1; chuck(grbg, q->back); chuck(grbg, q); q = x2; /* update the indicies around the node circle */ do{ if(q->index != index){ q->index = index; } q = q-> next; }while(x2 != q); updatenumdesc(treenode[index-1], root, numd); /* Alter treenode to point to real nodes, and update indicies */ /* acordingly. */ /*j = 0;*/ i=0; for(i = (index2-1); i < nonodes-1 && treenode[i+1]; i++){ treenode[i]=treenode[i+1]; treenode[i+1] = NULL; x1=x2=treenode[i]; do{ x1->index = i+1; x1 = x1 -> next; } while(x1 != x2); } /* Create a new empty fork in the blank spot of treenode */ x1=NULL; for(i=1; i <=3 ; i++){ gnudisctreenode(grbg, &x2, index2, endsite, zeros, zeros2); x2->next = x1; x1 = x2; } x2->next->next->next = x2; treenode[nonodes-1]=x2; if (q->back) collapsetree(q->back, root, grbg, treenode, zeros, zeros2); } else { if (q->back) collapsetree(q->back, root, grbg, treenode, zeros, zeros2); q = q->next; } } while (q != p); } /* collapsetree */ void collapsebestrees(node **root, node **grbg, pointarray treenode, bestelm *bestrees, long *place, long *zeros, unsigned char *zeros2, long chars, boolean recompute, boolean progress) { /* Goes through all best trees, collapsing trees where possible, and */ /* deleting trees that are not unique. */ long i,j, k, pos, nextnode, oldnextree; boolean found; node *dummy; oldnextree = nextree; for(i = 0 ; i < (oldnextree - 1) ; i++){ bestrees[i].collapse = true; } if(progress) printf("Collapsing best trees\n "); k = 0; for(i = 0 ; i < (oldnextree - 1) ; i++){ if(progress){ if(i % (((oldnextree-1) / 72) + 1) == 0) putchar('.'); fflush(stdout); } while(!bestrees[k].collapse) k++; /* Reconstruct tree. */ *root = treenode[0]; add(treenode[0], treenode[1], treenode[spp], root, recompute, treenode, grbg, zeros, zeros2); nextnode = spp + 2; for (j = 3; j <= spp; j++) { if (bestrees[k].btree[j - 1] > 0) add(treenode[bestrees[k].btree[j - 1] - 1], treenode[j - 1], treenode[nextnode++ - 1], root, recompute, treenode, grbg, zeros, zeros2); else add(treenode[treenode[-bestrees[k].btree[j - 1]-1]->back->index-1], treenode[j - 1], NULL, root, recompute, treenode, grbg, zeros, zeros2); } reroot(treenode[outgrno - 1], *root); treelength(*root, chars, treenode); collapsetree(*root, *root, grbg, treenode, zeros, zeros2); savetree(*root, place, treenode, grbg, zeros, zeros2); /* move everything down in the bestree list */ for(j = k ; j < (nextree - 2) ; j++){ memcpy(bestrees[j].btree, bestrees[j + 1].btree, spp * sizeof(long)); bestrees[j].gloreange = bestrees[j + 1].gloreange; bestrees[j + 1].gloreange = false; bestrees[j].locreange = bestrees[j + 1].locreange; bestrees[j + 1].locreange = false; bestrees[j].collapse = bestrees[j + 1].collapse; } pos=0; findtree(&found, &pos, nextree-1, place, bestrees); /* put the new tree at the end of the list if it wasn't found */ nextree--; if(!found) addtree(pos, &nextree, false, place, bestrees); /* Deconstruct the tree */ for (j = 1; j < spp; j++){ re_move(treenode[j], &dummy, root, recompute, treenode, grbg, zeros, zeros2); } } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } } PHYLIPNEW-3.69.650/src/seqbootall.c0000664000175000017500000012155111605067345013402 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2005 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, and Doug Buxton. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef enum { seqs, morphology, restsites, genefreqs } datatype; typedef enum { dna, rna, protein } seqtype; AjPSeqset seqset = NULL; AjPPhyloState phylorest = NULL; AjPPhyloState phylostate = NULL; AjPPhyloFreq phylofreqs = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylomix = NULL; AjPPhyloProp phylofact = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void seqboot_inputnumbersseq(AjPSeqset); void seqboot_inputnumbersfreq(AjPPhyloFreq); void seqboot_inputnumbersrest(AjPPhyloState); void seqboot_inputnumbersstate(AjPPhyloState); void inputoptions(void); char **matrix_char_new(long rows, long cols); void matrix_char_delete(char **mat, long rows); double **matrix_double_new(long rows, long cols); void matrix_double_delete(double **mat, long rows); void seqboot_inputdataseq(AjPSeqset); void seqboot_inputdatafreq(AjPPhyloFreq); void seqboot_inputdatarest(AjPPhyloState); void allocrest(void); void freerest(void); void allocnew(void); void freenew(void); void allocnewer(long newergroups, long newersites); void doinput(int argc, Char *argv[]); void bootweights(void); void permute_vec(long *a, long n); void sppermute(long); void charpermute(long, long); void writedata(void); void writeweights(void); void writecategories(void); void writeauxdata(steptr, FILE*); void writefactors(void); void bootwrite(void); void seqboot_inputaux(steptr, FILE*); void freenewer(void); void seqboot_inputfactors(AjPPhyloProp fact); /* function prototypes */ #endif /*** Config vars ***/ /* Mutually exclusive booleans for boostrap type */ boolean bootstrap, jackknife; boolean permute; /* permute char order */ boolean ild; /* permute species for each char */ boolean lockhart; /* permute chars within species */ boolean rewrite; boolean factors = false; /* Use factors (only with morph data) */ /* Bootstrap/jackknife sample frequency */ boolean regular = true; /* Use 50% sampling with bootstrap/jackknife */ double fracsample = 0.5; /* ...or user-defined sample freq, [0..inf) */ /* Output format: mutually exclusive, none indicates PHYLIP */ boolean xml = false; boolean nexus = false; boolean weights = false;/* Read weights file */ boolean categories = false;/* Use categories (permuted with dataset) */ boolean enzymes; boolean all; /* All alleles present in infile? */ boolean justwts = false; /* Write boot'd/jack'd weights, no datasets */ boolean mixture; boolean ancvar; boolean progress = true; /* Enable progress indications */ boolean firstrep; /* TODO Must this be global? */ longer seed; /* Filehandles and paths */ /* Usual suspects declared in phylip.c/h */ FILE *outcatfile, *outweightfile, *outmixfile, *outancfile, *outfactfile; Char infilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH], mixfilename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; const char* outweightfilename; AjPFile embossoutweightfile; const char* outmixfilename; AjPFile embossoutmixfile; const char* outancfilename; AjPFile embossoutancfile; const char* outcatfilename; AjPFile embossoutcatfile; const char* outfactfilename; AjPFile embossoutfactfile; long sites, loci, maxalleles, groups, nenzymes, reps, ws, blocksize, categs, maxnewsites; datatype data; seqtype seq; steptr oldweight, where, how_many, mixdata, ancdata; /* Original dataset */ /* [0..spp-1][0..sites-1] */ Char **nodep = NULL; /* molecular or morph data */ double **nodef = NULL; /* gene freqs */ Char *factor = NULL; /* factor[sites] - direct read-in of factors file */ long *factorr = NULL; /* [0..sites-1] => nondecreasing [1..groups] */ long *alleles = NULL; /* Mapping with read-in weights eliminated * Allocated once in allocnew() */ long newsites; long newgroups; long *newwhere = NULL; /* Map [0..newgroups-1] => [1..newsites] */ long *newhowmany = NULL; /* Number of chars for each [0..newgroups-1] */ /* Mapping with bootstrapped weights applied */ /* (re)allocated by allocnewer() */ long newersites, newergroups; long *newerfactor = NULL; /* Map [0..newersites-1] => [1..newergroups] */ long *newerwhere = NULL; /* Map [0..newergroups-1] => [1..newersites] */ long *newerhowmany = NULL; /* Number of chars for each [0..newergroups-1] */ long **charorder = NULL; /* Permutation [0..spp-1][0..newergroups-1] */ long **sppord = NULL; /* Permutation [0..newergroups-1][0..spp-1] */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr typeofdata = NULL; AjPStr test = NULL; AjPStr outputformat = NULL; AjPStr typeofseq = NULL; AjPStr justweights = NULL; AjBool rewrite = false; long inseed, inseed0; data = seqs; seq = dna; bootstrap = false; jackknife = false; permute = false; ild = false; lockhart = false; blocksize = 1; regular = true; fracsample = 1.0; all = false; reps = 100; weights = false; mixture = false; ancvar = false; categories = false; justwts = false; printdata = false; dotdiff = true; progress = true; interleaved = true; xml = false; nexus = false; factors = false; enzymes = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqset = ajAcdGetSeqset("infilesequences"); typeofdata = ajAcdGetListSingle("datatype"); if(ajStrMatchC(typeofdata, "s")) data = seqs; else if(ajStrMatchC(typeofdata, "m")) { data = morphology; phylofact = ajAcdGetProperties("factorfile"); if(phylofact) { factors = true; emboss_openfile(embossoutfactfile, &outfactfile, &outfactfilename); } } else if(ajStrMatchC(typeofdata, "r")) { data = restsites; enzymes = ajAcdGetBoolean("enzymes"); } else if(ajStrMatchC(typeofdata, "g")) { data = genefreqs; all = ajAcdGetBoolean("all"); } test = ajAcdGetListSingle("test"); if(ajStrMatchC(test, "b")) { bootstrap = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 1.0; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } blocksize = ajAcdGetInt("blocksize"); } else if(ajStrMatchC(test, "j")) { jackknife = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 0.5; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } } else if(ajStrMatchC(test, "c")) permute = true; else if(ajStrMatchC(test, "o")) ild = true; else if(ajStrMatchC(test, "s")) lockhart = true; else if(ajStrMatchC(test, "r")) rewrite = true; if(rewrite) { if (data == seqs) { outputformat = ajAcdGetListSingle("rewriteformat"); if(ajStrMatchC(outputformat, "n")) nexus = true; else if(ajStrMatchC(outputformat, "x")) xml = true; if( (nexus) || (xml) ) { typeofseq = ajAcdGetListSingle("seqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } if (data == morphology) { typeofseq = ajAcdGetListSingle("morphseqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } else{ reps = ajAcdGetInt("reps"); inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); if(jackknife || bootstrap || permute) { phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; if( data == morphology) { phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) { ancvar = true; emboss_openfile(embossoutancfile, &outancfile, &outancfilename); } phylomix = ajAcdGetProperties("mixfile"); if(phylomix) { mixture = true; emboss_openfile(embossoutmixfile, &outmixfile, &outmixfilename); } } if(data == seqs) { phyloratecat = ajAcdGetProperties("categories"); if(phyloratecat) categories = true; } if(!permute) { justweights = ajAcdGetListSingle("justweights"); if(ajStrMatchC(justweights, "j")) justwts = true; } } } printdata = ajAcdGetBoolean("printdata"); if(printdata) dotdiff = ajAcdGetBoolean("dotdiff"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); printf("\n bootstrap: %s",(bootstrap ? "true" : "false")); printf("\njackknife: %s",(jackknife ? "true" : "false")); printf("\n permute: %s",(permute ? "true" : "false")); printf("\n lockhart: %s",(lockhart ? "true" : "false")); printf("\n ild: %s",(ild ? "true" : "false")); printf("\n justwts: %s \n",(justwts ? "true" : "false")); } /* emboss_getoptions */ void seqboot_inputnumbersseq(AjPSeqset seqset) { /* read numbers of species and of sites */ spp = ajSeqsetGetSize(seqset); sites = ajSeqsetGetLen(seqset); loci = sites; maxalleles = 1; } /* seqboot_inputnumbersseq */ void seqboot_inputnumbersfreq(AjPPhyloFreq freq) { /* read numbers of species and of sites */ long i; spp = freq->Size; sites = freq->Loci; loci = sites; maxalleles = 1; if (!freq->ContChar) { alleles = (long *)Malloc(sites*sizeof(long)); sites = 0; for (i = 0; i < (loci); i++) { alleles[i] = freq->Allele[i]; if (alleles[i] > maxalleles) maxalleles = alleles[i]; sites += alleles[i]; } } } /* seqboot_inputnumbersfreq */ void seqboot_inputnumbersrest(AjPPhyloState rest) { /* read numbers of species and of sites */ spp = rest->Size; sites = rest->Len; loci = sites; nenzymes = rest->Count; } /* seqboot_inputnumbersrest */ void seqboot_inputnumbersstate(AjPPhyloState state) { /* read numbers of species and of sites */ spp = state->Size; sites = state->Len; loci = sites; } /* seqboot_inputnumberstate */ void seqboot_inputfactors(AjPPhyloProp fact) { long i, j; Char ch, prevch; AjPStr str; prevch = ' '; str = fact->Str[0]; j = 0; for (i = 0; i < (sites); i++) { ch = ajStrGetCharPos(str,i); if (ch != prevch) j++; prevch = ch; factorr[i] = j; } } /* seqboot_inputfactors */ void inputoptions() { /* input the information on the options */ long weightsum, maxfactsize, i, j, k, l, m; if (data == genefreqs) { k = 0; l = 0; for (i = 0; i < (loci); i++) { m = alleles[i]; k++; for (j = 1; j <= m; j++) { l++; factorr[l - 1] = k; } } } else { for (i = 1; i <= (sites); i++) factorr[i - 1] = i; } if(factors){ seqboot_inputfactors(phylofact); } for (i = 0; i < (sites); i++) oldweight[i] = 1; if (weights) inputweightsstr2(phyloweights->Str[0],0, sites, &weightsum, oldweight, &weights, "seqboot"); if (factors && printdata) { for(i = 0; i < sites; i++) factor[i] = (char)('0' + (factorr[i]%10)); printfactors(outfile, sites, factor, " (least significant digit)"); } if (weights && printdata) printweights(outfile, 0, sites, oldweight, "Sites"); for (i = 0; i < (loci); i++) how_many[i] = 0; for (i = 0; i < (loci); i++) where[i] = 0; for (i = 1; i <= (sites); i++) { how_many[factorr[i - 1] - 1]++; if (where[factorr[i - 1] - 1] == 0) where[factorr[i - 1] - 1] = i; } groups = factorr[sites - 1]; newgroups = 0; newsites = 0; maxfactsize = 0; for(i = 0 ; i < loci ; i++){ if(how_many[i] > maxfactsize){ maxfactsize = how_many[i]; } } maxnewsites = groups * maxfactsize; allocnew(); for (i = 0; i < groups; i++) { if (oldweight[where[i] - 1] > 0) { newgroups++; newsites += how_many[i]; newwhere[newgroups - 1] = where[i]; newhowmany[newgroups - 1] = how_many[i]; } } } /* inputoptions */ char **matrix_char_new(long rows, long cols) { char **mat; long i; assert(rows > 0); assert(cols > 0); mat = (char **)Malloc(rows*sizeof(char *)); for (i = 0; i < rows; i++) mat[i] = (char *)Malloc(cols*sizeof(char)); return mat; } void matrix_char_delete(char **mat, long rows) { long i; assert(mat != NULL); for (i = 0; i < rows; i++) free(mat[i]); free(mat); } double **matrix_double_new(long rows, long cols) { double **mat; long i; assert(rows > 0); assert(cols > 0); mat = (double **)Malloc(rows*sizeof(double *)); for (i = 0; i < rows; i++) mat[i] = (double *)Malloc(cols*sizeof(double)); return mat; } void matrix_double_delete(double **mat, long rows) { long i; assert(mat != NULL); for (i = 0; i < rows; i++) free(mat[i]); free(mat); } void seqboot_inputdataseq(AjPSeqset seqset) { /* input the names and sequences for each species */ long i, j, k, l, m, n; Char charstate; boolean allread, done; const AjPStr str; nodep = matrix_char_new(spp, sites); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); fprintf(outfile, " site\n\n"); } else { if (ild) { fprintf(outfile, "Site"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) fprintf(outfile, "Site"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, ", spp); fprintf(outfile, "%3ld sites\n\n", sites); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } allread = false; while (!allread) { i = 1; while (i <= spp) { initnameseq(seqset, i-1); str = ajSeqGetSeqS(ajSeqsetGetseqSeq(seqset, i-1)); j=0; done = false; while (!done) { while (j < sites ) { charstate = ajStrGetCharPos(str, j); uppercase(&charstate); j++; if (charstate == '.') charstate = nodep[0][j-1]; nodep[i-1][j-1] = charstate; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (!printdata) return; m = (sites - 1) / 60 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; n = (i - 1) * 60; for (k = n; k < l; k++) { if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) charstate = '.'; else charstate = nodep[j][k]; putc(charstate, outfile); if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdataseq */ void seqboot_inputdatafreq(AjPPhyloFreq freq) { /* input the names and sequences for each species */ long i, j, k, l, m, n; double x; ajint ipos=0; nodef = matrix_double_new(spp, sites); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); fprintf(outfile, " locus\n\n"); } else { if (ild) { fprintf(outfile, "Locus"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) fprintf(outfile, "Locus"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, %3ld loci\n\n", spp, loci); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } for (i = 1; i <= (spp); i++) { initnamefreq(freq,i - 1); j = 1; while (j <= sites) { x = freq->Data[ipos++]; if ((unsigned)x > 1.0) { printf("GENE FREQ OUTSIDE [0,1] in species %ld\n", i); embExitBad(); } else { nodef[i - 1][j - 1] = x; j++; } } } if (!printdata) return; m = (sites - 1) / 8 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 8; if (l > sites) l = sites; n = (i - 1) * 8; for (k = n; k < l; k++) { fprintf(outfile, "%8.5f", nodef[j][k]); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdatafreq */ void seqboot_inputdatarest(AjPPhyloState rest) { /* input the names and sequences for each species */ long i, j, k, l, m, n; Char charstate; AjPStr str; boolean allread, done; nodep = matrix_char_new(spp, sites); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); if (data == morphology) fprintf(outfile, " character\n\n"); if (data == restsites) fprintf(outfile, " site\n\n"); } else { if (ild) { if (data == morphology) fprintf(outfile, "Character"); if (data == restsites) fprintf(outfile, "Site"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) if (data == morphology) fprintf(outfile, "Character"); if (data == restsites) fprintf(outfile, "Site"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, ", spp); if (data == seqs) fprintf(outfile, "%3ld sites\n\n", sites); else if (data == morphology) fprintf(outfile, "%3ld characters\n\n", sites); else if (data == restsites) fprintf(outfile, "%3ld sites\n\n", sites); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } allread = false; while (!allread) { allread = true; i = 1; while (i <= spp) { initnamestate(rest, i-1); str = rest->Str[i-1]; j = 0; done = false; while (!done) { while (j < sites) { charstate = ajStrGetCharPos(str, j); uppercase(&charstate); j++; if (charstate == '.') charstate = nodep[0][j-1]; nodep[i-1][j-1] = charstate; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (!printdata) return; m = (sites - 1) / 60 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; n = (i - 1) * 60; for (k = n; k < l; k++) { if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) charstate = '.'; else charstate = nodep[j][k]; putc(charstate, outfile); if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdatarest */ void allocrest() { /* allocate memory for bookkeeping arrays */ oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); if (categories) category = (steptr)Malloc(sites*sizeof(long)); if (mixture) mixdata = (steptr)Malloc(sites*sizeof(long)); if (ancvar) ancdata = (steptr)Malloc(sites*sizeof(long)); where = (steptr)Malloc(loci*sizeof(long)); how_many = (steptr)Malloc(loci*sizeof(long)); factor = (Char *)Malloc(sites*sizeof(Char)); factorr = (steptr)Malloc(sites*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); } /* allocrest */ void freerest() { /* Free bookkeeping arrays */ if (alleles) free(alleles); free(oldweight); free(weight); if (categories) free(category); if (mixture) free(mixdata); if (ancvar) free(ancdata); free(where); free(how_many); free(factor); free(factorr); free(nayme); } void allocnew(void) { /* allocate memory for arrays that depend on the lenght of the output sequence*/ /* Only call this function once */ assert(newwhere == NULL && newhowmany == NULL); newwhere = (steptr)Malloc(loci*sizeof(long)); newhowmany = (steptr)Malloc(loci*sizeof(long)); } void freenew(void) { /* free arrays allocated by allocnew() */ /* Only call this function once */ assert(newwhere != NULL); assert(newhowmany != NULL); free(newwhere); free(newhowmany); } void allocnewer(long newergroups, long newersites) { /* allocate memory for arrays that depend on the length of the bootstrapped output sequence */ /* Assumes that spp remains constant */ static long curnewergroups = 0; static long curnewersites = 0; long i; if (newerwhere != NULL) { if (newergroups > curnewergroups) { free(newerwhere); free(newerhowmany); for (i = 0; i < spp; i++) free(charorder[i]); newerwhere = NULL; } if (newersites > curnewersites) { free(newerfactor); newerfactor = NULL; } } if (charorder == NULL) charorder = (steptr *)Malloc(spp*sizeof(steptr)); /* Malloc() will fail if either is 0, so add a dummy element */ if (newergroups == 0) newergroups++; if (newersites == 0) newersites++; if (newerwhere == NULL) { newerwhere = (steptr)Malloc(newergroups*sizeof(long)); newerhowmany = (steptr)Malloc(newergroups*sizeof(long)); for (i = 0; i < spp; i++) charorder[i] = (steptr)Malloc(newergroups*sizeof(long)); curnewergroups = newergroups; } if (newerfactor == NULL) { newerfactor = (steptr)Malloc(newersites*sizeof(long)); curnewersites = newersites; } } void freenewer() { /* Free memory allocated by allocnewer() */ /* spp must be the same as when allocnewer was called */ long i; if (newerwhere) { free(newerwhere); free(newerhowmany); free(newerfactor); for (i = 0; i < spp; i++) free(charorder[i]); free(charorder); } } void doinput(int argc, Char *argv[]) { /* reads the input data */ seqboot_inputnumbersseq(seqset); allocrest(); inputoptions(); seqboot_inputdataseq(seqset); } /* doinput */ void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; long grp = 0, site = 0; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; /* Count number of replicated groups */ newergroups = 0; newersites = 0; for (i = 0; i < newgroups; i++) { newergroups += weight[i]; newersites += newhowmany[i] * weight[i]; } if (newergroups < 1) { fprintf(stdout, "ERROR: sampling frequency or number of sites is too small\n"); exxit(-1); } /* reallocate "newer" arrays, sized by output groups: * newerfactor, newerwhere, newerhowmany, and charorder */ allocnewer(newergroups, newersites); /* Replicate each group i weight[i] times */ grp = 0; site = 0; for (i = 0; i < newgroups; i++) { for (j = 0; j < weight[i]; j++) { for (k = 0; k < newhowmany[i]; k++) { newerfactor[site] = grp + 1; site++; } newerwhere[grp] = newwhere[i]; newerhowmany[grp] = newhowmany[i]; grp++; } } } /* bootweights */ void permute_vec(long *a, long n) { long i, j, k; for (i = 1; i < n; i++) { k = (long)((i+1) * randum(seed)); j = a[i]; a[i] = a[k]; a[k] = j; } } void sppermute(long n) { /* permute the species order as given in array sppord */ permute_vec(sppord[n-1], spp); } /* sppermute */ void charpermute(long m, long n) { /* permute the n+1 characters of species m+1 */ permute_vec(charorder[m], n); } /* charpermute */ void writedata() { /* write out one set of bootstrapped sequences */ long i, j, k, l, m, n, n2=0; double x; Char charstate; sppord = (long **)Malloc(newergroups*sizeof(long *)); for (i = 0; i < (newergroups); i++) sppord[i] = (long *)Malloc(spp*sizeof(long)); for (j = 1; j <= spp; j++) sppord[0][j - 1] = j; for (i = 1; i < newergroups; i++) { for (j = 1; j <= (spp); j++) sppord[i][j - 1] = sppord[i - 1][j - 1]; } if (!justwts || permute) { if (data == restsites && enzymes) fprintf(outfile, "%5ld %5ld% 4ld\n", spp, newergroups, nenzymes); else if (data == genefreqs) fprintf(outfile, "%5ld %5ld\n", spp, newergroups); else { if ((data == seqs) && rewrite && xml) fprintf(outfile, "\n"); else if (rewrite && nexus) { fprintf(outfile, "#NEXUS\n"); fprintf(outfile, "BEGIN DATA;\n"); fprintf(outfile, " DIMENSIONS NTAX=%ld NCHAR=%ld;\n", spp, newersites); fprintf(outfile, " FORMAT"); if (interleaved) fprintf(outfile, " interleave=yes"); else fprintf(outfile, " interleave=no"); fprintf(outfile, " DATATYPE="); if (data == seqs) { switch (seq) { case (dna): fprintf(outfile, "DNA missing=N gap=-"); break; case (rna): fprintf(outfile, "RNA missing=N gap=-"); break; case (protein): fprintf(outfile, "protein missing=? gap=-"); break; } } if (data == morphology) fprintf(outfile, "STANDARD"); fprintf(outfile, ";\n MATRIX\n"); } else fprintf(outfile, "%5ld %5ld\n", spp, newersites); } if (data == genefreqs) { for (i = 0; i < (newergroups); i++) fprintf(outfile, " %3ld", alleles[factorr[newerwhere[i] - 1] - 1]); putc('\n', outfile); } } l = 1; if ((!(bootstrap || jackknife || permute || ild || lockhart | nexus)) && ((data == seqs) || (data == restsites))) { interleaved = !interleaved; if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) interleaved = false; } m = interleaved ? 60 : newergroups; do { if (m > newergroups) m = newergroups; for (j = 0; j < spp; j++) { n = 0; if ((l == 1) || (interleaved && nexus)) { if (rewrite && xml) { fprintf(outfile, " \n"); fprintf(outfile, " "); } n2 = nmlngth; if (rewrite && (xml || nexus)) { while (nayme[j][n2-1] == ' ') n2--; } if (nexus) fprintf(outfile, " "); for (k = 0; k < n2; k++) if (nexus && (nayme[j][k] == ' ') && (k < n2)) putc('_', outfile); else putc(nayme[j][k], outfile); if (rewrite && xml) fprintf(outfile, "\n "); } else { if (rewrite && xml) { fprintf(outfile, " "); } } if (!xml) { for (k = 0; k < nmlngth-n2; k++) fprintf(outfile, " "); fprintf(outfile, " "); } for (k = l - 1; k < m; k++) { if (permute && j + 1 == 1) sppermute(newerfactor[n]); /* we can assume chars not permuted */ for (n2 = -1; n2 <= (newerhowmany[charorder[j][k]] - 2); n2++) { n++; if (data == genefreqs) { if (n > 1 && (n & 7) == 1) fprintf(outfile, "\n "); x = nodef[sppord[charorder[j][k]][j] - 1] [newerwhere[charorder[j][k]] + n2]; fprintf(outfile, "%8.5f", x); } else { if (rewrite && xml && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); else if (!nexus && !interleaved && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); charstate = nodep[sppord[charorder[j][k]][j] - 1] [newerwhere[charorder[j][k]] + n2]; putc(charstate, outfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfile); } } } if (rewrite && xml) { fprintf(outfile, "\n \n"); } putc('\n', outfile); } if (interleaved) { if ((m <= newersites) && (newersites > 60)) putc('\n', outfile); l += 60; m += 60; } } while (interleaved && l <= newersites); if ((data == seqs) && (!(bootstrap || jackknife || permute || ild || lockhart) && xml)) fprintf(outfile, "\n"); if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) fprintf(outfile, " ;\nEND;\n"); for (i = 0; i < (newergroups); i++) free(sppord[i]); free(sppord); } /* writedata */ void writeweights() { /* write out one set of post-bootstrapping weights */ long j, k, l, m, n, o; j = 0; l = 1; if (interleaved) m = 60; else m = sites; do { if(m > sites) m = sites; n = 0; for (k = l - 1; k < m; k++) { for(o = 0 ; o < how_many[k] ; o++){ if(oldweight[k]==0){ fprintf(outweightfile, "0"); j++; } else{ if (weight[k-j] < 10) fprintf(outweightfile, "%c", (char)('0'+weight[k-j])); else fprintf(outweightfile, "%c", (char)('A'+weight[k-j]-10)); n++; if (!interleaved && n > 1 && n % 60 == 1) { fprintf(outweightfile, "\n"); if (n % 10 == 0 && n % 60 != 0) putc(' ', outweightfile); } } } } putc('\n', outweightfile); if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= sites); } /* writeweights */ void writecategories() { /* write out categories for the bootstrapped sequences */ long k, l, m, n, n2; Char charstate; if(justwts){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n=0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[k]; putc(charstate, outcatfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outcatfile, "\n"); return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[newerwhere[k] + n2]; putc(charstate, outcatfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outcatfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outcatfile, "\n"); } /* writecategories */ void writeauxdata(steptr auxdata, FILE *outauxfile) { /* write out auxiliary option data (mixtures, ancestors, etc.) to appropriate file. Samples parralel to data, or just gives one output entry if justwts is true */ long k, l, m, n, n2; Char charstate; /* if we just output weights (justwts), and this is first set just output the data unsampled */ if(justwts){ if(firstrep){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[k]; putc(charstate, outauxfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outauxfile, "\n"); } return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[newerwhere[k] + n2]; putc(charstate, outauxfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outauxfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outauxfile, "\n"); } /* writeauxdata */ void writefactors(void) { long i, k, l, m, n, writesites; char symbol; /*steptr wfactor;*/ long grp; if(!justwts || firstrep){ if(justwts){ writesites = sites; /*wfactor = factorr;*/ } else { writesites = newergroups; /*wfactor = newerfactor;*/ } symbol = '+'; if (interleaved) m = 60; else m = writesites; l=1; do { if(m > writesites) m = writesites; n = 0; for(k=l-1 ; k < m ; k++){ grp = charorder[0][k]; for(i = 0; i < newerhowmany[grp]; i++) { putc(symbol, outfactfile); n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outfactfile, "\n "); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfactfile); } symbol = (symbol == '+') ? '-' : '+'; } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= writesites); fprintf(outfactfile, "\n"); } } /* writefactors */ void bootwrite() { /* does bootstrapping and writes out data sets */ long i, j, rr, repdiv10; if (rewrite) reps = 1; repdiv10 = reps / 10; if (repdiv10 < 1) repdiv10 = 1; if (progress) putchar('\n'); firstrep = true; for (rr = 1; rr <= (reps); rr++) { bootweights(); for (i = 0; i < spp; i++) for (j = 0; j < newergroups; j++) charorder[i][j] = j; if (ild) { charpermute(0, newergroups); for (i = 1; i < spp; i++) for (j = 0; j < newergroups; j++) charorder[i][j] = charorder[0][j]; } if (lockhart) for (i = 0; i < spp; i++) charpermute(i, newergroups); if (!justwts || permute || ild || lockhart) writedata(); if (justwts && !(permute || ild || lockhart)) writeweights(); if (categories) writecategories(); if (factors) writefactors(); if (mixture) writeauxdata(mixdata, outmixfile); if (ancvar) writeauxdata(ancdata, outancfile); if (progress && !rewrite && ((reps < 10) || rr % repdiv10 == 0)) { printf("completed replicate number %4ld\n", rr); #ifdef WIN32 phyFillScreenColor(); #endif firstrep = false; } } if (progress) { if (justwts) printf("\nOutput weights written to file \"%s\"\n\n", outweightfilename); else printf("\nOutput written to file \"%s\"\n\n", outfilename); } } /* bootwrite */ int main(int argc, Char *argv[]) { /* Read in sequences or frequencies and bootstrap or jackknife them */ #ifdef MAC argc = 1; /* macsetup("SeqBoot",""); */ argv[0] = "SeqBoot"; #endif init(argc,argv); emboss_getoptions("fseqbootall", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; doinput(argc, argv); bootwrite(); freenewer(); freenew(); freerest(); if (nodep) matrix_char_delete(nodep, spp); if (nodef) matrix_double_delete(nodef, spp); FClose(infile); if (factors) { FClose(factfile); FClose(outfactfile); } if (weights) FClose(weightfile); if (categories) { FClose(catfile); FClose(outcatfile); } if(mixture) FClose(outmixfile); if(ancvar) FClose(outancfile); if (justwts && !permute) { FClose(outweightfile); } else FClose(outfile); #ifdef MAC fixmacfile(outfilename); if (justwts && !permute) fixmacfile(outweightfilename); if (categories) fixmacfile(outcatfilename); if (mixture) fixmacfile(outmixfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/protpars.c0000664000175000017500000013326711616234204013106 00000000000000 #include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxtrees 100 /* maximum number of tied trees stored */ typedef enum { universal, ciliate, mito, vertmito, flymito, yeastmito } codetype; /* nodes will form a binary tree */ typedef struct gseq { seqptr seq; struct gseq *next; } gseq; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; #ifndef OLDC /* function prototypes */ void protgnu(gseq **); void protchuck(gseq *); void code(void); void setup(void); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void protalloctree(void); void allocrest(void); void doinit(void); void protinputdata(AjPSeqset); void protmakevalues(void); void doinput(void); void protfillin(node *, node *, node *); void protpreorder(node *); void protadd(node *, node *, node *); void protre_move(node **, node **); void evaluate(node *); void protpostorder(node *); void protreroot(node *); void protsavetraverse(node *, long *, boolean *); void protsavetree(long *, boolean *); void tryadd(node *, node **, node **); void addpreorder(node *, node *, node *); void tryrearr(node *, boolean *); void repreorder(node *, boolean *); void rearrange(node **); void protgetch(Char *); void protaddelement(node **, long *, long *, boolean *, char **); void prottreeread(char**); void protancestset(long *, long *, long *, long *, long *); void prothyprint(long , long , boolean *, node *, boolean *, boolean *); void prothyptrav(node *, sitearray *, long, long, long *, boolean *, sitearray); void prothypstates(long *); void describe(void); void maketree(void); void reallocnode(node* p); void reallocchars(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], intreename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; node *root; long chars, col, msets, ith, njumble, jumb, numtrees; /* chars = number of sites in actual sequences */ long inseed, inseed0; boolean jumble, usertree, weights, thresh, trout, progress, stepbox, justwts, ancseq, mulsets, firstset; codetype whichcode; long fullset, fulldel; pointarray treenode; /* pointers to all nodes in tree */ double threshold; steptr threshwt; longer seed; long *enterorder; sitearray translate[(long)quest - (long)ala + 1]; aas trans[4][4][4]; long **fsteps; bestelm *bestrees; boolean dummy; gseq *garbage; node *temp, *temp1; Char ch; aas tmpa; char *progname; /* Local variables for maketree, propagated globally for c version: */ long minwhich; double like, bestyet, bestlike, minsteps, bstlike2; boolean lastrearr, recompute; node *there; double nsteps[maxuser]; long *place; boolean *names; void protgnu(gseq **p) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (garbage != NULL) { *p = garbage; free((*p)->seq); (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray)); garbage = garbage->next; } else { *p = (gseq *)Malloc(sizeof(gseq)); (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray)); } (*p)->next = NULL; } /* protgnu */ void protchuck(gseq *p) { /* collect garbage on p -- put it on front of garbage list */ p->next = garbage; garbage = p; } /* protchuck */ void code() { /* make up table of the code 1 = u, 2 = c, 3 = a, 4 = g */ trans[0][0][0] = phe; trans[0][0][1] = phe; trans[0][0][2] = leu; trans[0][0][3] = leu; trans[0][1][0] = ser; trans[0][1][1] = ser1; trans[0][1][2] = ser1; trans[0][1][3] = ser1; trans[0][2][0] = tyr; trans[0][2][1] = tyr; trans[0][2][2] = stop; trans[0][2][3] = stop; trans[0][3][0] = cys; trans[0][3][1] = cys; trans[0][3][2] = stop; trans[0][3][3] = trp; trans[1][0][0] = leu; trans[1][0][1] = leu; trans[1][0][2] = leu; trans[1][0][3] = leu; trans[1][1][0] = pro; trans[1][1][1] = pro; trans[1][1][2] = pro; trans[1][1][3] = pro; trans[1][2][0] = his; trans[1][2][1] = his; trans[1][2][2] = gln; trans[1][2][3] = gln; trans[1][3][0] = arg; trans[1][3][1] = arg; trans[1][3][2] = arg; trans[1][3][3] = arg; trans[2][0][0] = ileu; trans[2][0][1] = ileu; trans[2][0][2] = ileu; trans[2][0][3] = met; trans[2][1][0] = thr; trans[2][1][1] = thr; trans[2][1][2] = thr; trans[2][1][3] = thr; trans[2][2][0] = asn; trans[2][2][1] = asn; trans[2][2][2] = lys; trans[2][2][3] = lys; trans[2][3][0] = ser2; trans[2][3][1] = ser2; trans[2][3][2] = arg; trans[2][3][3] = arg; trans[3][0][0] = val; trans[3][0][1] = val; trans[3][0][2] = val; trans[3][0][3] = val; trans[3][1][0] = ala; trans[3][1][1] = ala; trans[3][1][2] = ala; trans[3][1][3] = ala; trans[3][2][0] = asp; trans[3][2][1] = asp; trans[3][2][2] = glu; trans[3][2][3] = glu; trans[3][3][0] = gly; trans[3][3][1] = gly; trans[3][3][2] = gly; trans[3][3][3] = gly; if (whichcode == mito) trans[0][3][2] = trp; if (whichcode == vertmito) { trans[0][3][2] = trp; trans[2][3][2] = stop; trans[2][3][3] = stop; trans[2][0][2] = met; } if (whichcode == flymito) { trans[0][3][2] = trp; trans[2][0][2] = met; trans[2][3][2] = ser2; } if (whichcode == yeastmito) { trans[0][3][2] = trp; trans[1][0][2] = thr; trans[2][0][2] = met; } } /* code */ void setup() { /* set up set table to get aasets from aas */ aas a, b; long i, j, k, l, s; for (a = ala; (long)a <= (long)stop; a = (aas)((long)a + 1)) { translate[(long)a - (long)ala][0] = 1L << ((long)a); translate[(long)a - (long)ala][1] = 1L << ((long)a); } for (i = 0; i <= 3; i++) { for (j = 0; j <= 3; j++) { for (k = 0; k <= 3; k++) { for (l = 0; l <= 3; l++) { translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[l][j][k]); translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][l][k]); translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][j][l]); } } } } translate[(long)del - (long)ala][1] = 1L << ((long)del); fulldel = (1L << ((long)stop + 1)) - (1L << ((long)ala)); fullset = fulldel & (~(1L << ((long)del))); translate[(long)asx - (long)ala][0] = (1L << ((long)asn)) | (1L << ((long)asp)); translate[(long)glx - (long)ala][0] = (1L << ((long)gln)) | (1L << ((long)glu)); translate[(long)ser - (long)ala][0] = (1L << ((long)ser1)) | (1L << ((long)ser2)); translate[(long)unk - (long)ala][0] = fullset; translate[(long)quest - (long)ala][0] = fulldel; translate[(long)asx - (long)ala][1] = translate[(long)asn - (long)ala][1] | translate[(long)asp - (long)ala][1]; translate[(long)glx - (long)ala][1] = translate[(long)gln - (long)ala][1] | translate[(long)glu - (long)ala][1]; translate[(long)ser - (long)ala][1] = translate[(long)ser1 - (long)ala][1] | translate[(long)ser2 - (long)ala][1]; translate[(long)unk - (long)ala][1] = fullset; translate[(long)quest - (long)ala][1] = fulldel; for (a = ala; (long)a <= (long)quest; a = (aas)((long)a + 1)) { s = 0; for (b = ala; (long)b <= (long)stop; b = (aas)((long)b + 1)) { if (((1L << ((long)b)) & translate[(long)a - (long)ala][1]) != 0) s |= translate[(long)b - (long)ala][1]; } translate[(long)a - (long)ala][2] = s; } } /* setup */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs=0; ajint numwts=0; AjPStr codestr; jumble = false; njumble = 1; outgrno = 1; outgropt = false; thresh = false; trout = true; weights = false; whichcode = universal; printdata = false; progress = true; stepbox = false; ancseq = false; treeprint = true; usertree = false; mulsets = false; justwts = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); while (seqsets[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees){ numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; } phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); dotdiff = ajAcdGetBoolean("dotdiff"); if(!usertree) { njumble = ajAcdGetInt("njumble"); if(njumble >0) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; thresh = ajAcdGetToggle("dothreshold"); if(thresh) threshold = ajAcdGetFloat("threshold"); codestr = ajAcdGetListSingle("whichcode"); if(ajStrMatchCaseC(codestr, "u")) whichcode = universal; else if (ajStrMatchCaseC(codestr, "m")) whichcode = mito; else if (ajStrMatchCaseC(codestr,"v")) whichcode = vertmito; else if (ajStrMatchCaseC(codestr,"f")) whichcode = flymito; else if (ajStrMatchCaseC(codestr,"y")) whichcode = yeastmito; else ajDie("Unknown option for 'which genetic code'"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); embossouttree = ajAcdGetOutfile("outtreefile"); if(trout) emboss_openfile(embossouttree, &outtree, &outtreename); fprintf(outfile, "\nProtein parsimony algorithm, version %s\n\n",VERSION); } /* emboss_getoptions */ void protalloctree() { /* allocate treenode dynamically */ long i, j; node *p, *q; treenode = (pointarray)Malloc(nonodes*sizeof(node *)); for (i = 0; i < (spp); i++) { treenode[i] = (node *)Malloc(sizeof(node)); treenode[i]->numsteps = (steptr)Malloc(chars*sizeof(long)); treenode[i]->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); treenode[i]->seq = (aas *)Malloc(chars*sizeof(aas)); } for (i = spp; i < (nonodes); i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node *)Malloc(sizeof(node)); p->numsteps = (steptr)Malloc(chars*sizeof(long)); p->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); p->seq = (aas *)Malloc(chars*sizeof(aas)); p->next = q; q = p; } p->next->next->next = p; treenode[i] = p; } } /* protalloctree */ void reallocnode(node* p) { free(p->numsteps); free(p->siteset); free(p->seq); p->numsteps = (steptr)Malloc(chars*sizeof(long)); p->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); p->seq = (aas *)Malloc(chars*sizeof(aas)); } void reallocchars(void) { /* reallocates variables that are dependand on the number of chars * do we need to reallocate the garbage list too? */ long i; node *p; if (usertree) for (i = 0; i < maxuser; i++) { free(fsteps[i]); fsteps[i] = (long *)Malloc(chars*sizeof(long)); } for (i = 0; i < nonodes; i++) { reallocnode(treenode[i]); if (i >= spp) { p=treenode[i]->next; while (p != treenode[i]) { reallocnode(p); p = p->next; } } } free(weight); free(threshwt); free(temp->numsteps); free(temp->siteset); free(temp->seq); free(temp1->numsteps); free(temp1->siteset); free(temp1->seq); weight = (steptr)Malloc(chars*sizeof(long)); threshwt = (steptr)Malloc(chars*sizeof(long)); temp->numsteps = (steptr)Malloc(chars*sizeof(long)); temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); temp->seq = (aas *)Malloc(chars*sizeof(aas)); temp1->numsteps = (steptr)Malloc(chars*sizeof(long)); temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); temp1->seq = (aas *)Malloc(chars*sizeof(aas)); } void allocrest() { /* allocate remaining global arrays and variables dynamically */ long i; if (usertree) { fsteps = (long **)Malloc(maxuser*sizeof(long *)); for (i = 0; i < maxuser; i++) fsteps[i] = (long *)Malloc(chars*sizeof(long)); } bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1].btree = (long *)Malloc(spp*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); place = (long *)Malloc(nonodes*sizeof(long)); weight = (steptr)Malloc(chars*sizeof(long)); threshwt = (steptr)Malloc(chars*sizeof(long)); temp = (node *)Malloc(sizeof(node)); temp->numsteps = (steptr)Malloc(chars*sizeof(long)); temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); temp->seq = (aas *)Malloc(chars*sizeof(aas)); temp1 = (node *)Malloc(sizeof(node)); temp1->numsteps = (steptr)Malloc(chars*sizeof(long)); temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); temp1->seq = (aas *)Malloc(chars*sizeof(aas)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &chars, &nonodes, 1); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars); protalloctree(); allocrest(); } /* doinit*/ void protinputdata(AjPSeqset seqset) { /* input the names and sequences for each species */ long i, j, k, l; Char charstate; aas aa = unk; /* temporary amino acid for input */ const AjPStr str; if (printdata) headings(chars, "Sequences", "---------"); for (i=1; i <= spp; i++) { initnameseq(seqset, i-1); str = ajSeqGetSeqS(ajSeqsetGetseqSeq(seqset, i-1)); j = 0; while (j < chars) { charstate = ajStrGetCharPos(str, j); uppercase(&charstate); if ((!isalpha((int)charstate) && charstate != '?' && charstate != '-' && charstate != '*') || charstate == 'J' || charstate == 'O' || charstate == 'U') { printf("WARNING -- BAD AMINO ACID:%c",charstate); printf(" AT POSITION%5ld OF SPECIES %3ld\n",j,i); embExitBad(); } j++; aa = (charstate == 'A') ? ala : (charstate == 'B') ? asx : (charstate == 'C') ? cys : (charstate == 'D') ? asp : (charstate == 'E') ? glu : (charstate == 'F') ? phe : (charstate == 'G') ? gly : aa; aa = (charstate == 'H') ? his : (charstate == 'I') ? ileu : (charstate == 'K') ? lys : (charstate == 'L') ? leu : (charstate == 'M') ? met : (charstate == 'N') ? asn : (charstate == 'P') ? pro : (charstate == 'Q') ? gln : (charstate == 'R') ? arg : aa; aa = (charstate == 'S') ? ser : (charstate == 'T') ? thr : (charstate == 'V') ? val : (charstate == 'W') ? trp : (charstate == 'X') ? unk : (charstate == 'Y') ? tyr : (charstate == 'Z') ? glx : (charstate == '*') ? stop : (charstate == '?') ? quest: (charstate == '-') ? del : aa; treenode[i - 1]->seq[j - 1] = aa; memcpy(treenode[i - 1]->siteset[j - 1], translate[(long)aa - (long)ala], sizeof(sitearray)); } } if (printdata) { for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { for (j = 1; j <= (spp); j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j - 1][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (j > 1 && treenode[j - 1]->seq[k - 1] == treenode[0]->seq[k - 1]) charstate = '.'; else { tmpa = treenode[j-1]->seq[k-1]; charstate = (tmpa == ala) ? 'A' : (tmpa == asx) ? 'B' : (tmpa == cys) ? 'C' : (tmpa == asp) ? 'D' : (tmpa == glu) ? 'E' : (tmpa == phe) ? 'F' : (tmpa == gly) ? 'G' : (tmpa == his) ? 'H' : (tmpa ==ileu) ? 'I' : (tmpa == lys) ? 'K' : (tmpa == leu) ? 'L' : charstate; charstate = (tmpa == met) ? 'M' : (tmpa == asn) ? 'N' : (tmpa == pro) ? 'P' : (tmpa == gln) ? 'Q' : (tmpa == arg) ? 'R' : (tmpa == ser) ? 'S' : (tmpa ==ser1) ? 'S' : (tmpa ==ser2) ? 'S' : charstate; charstate = (tmpa == thr) ? 'T' : (tmpa == val) ? 'V' : (tmpa == trp) ? 'W' : (tmpa == unk) ? 'X' : (tmpa == tyr) ? 'Y' : (tmpa == glx) ? 'Z' : (tmpa == del) ? '-' : (tmpa ==stop) ? '*' : (tmpa==quest) ? '?' : charstate; } putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* protinputdata */ void protmakevalues() { /* set up fractional likelihoods at tips */ long i, j; node *p; for (i = 1; i <= nonodes; i++) { treenode[i - 1]->back = NULL; treenode[i - 1]->tip = (i <= spp); treenode[i - 1]->index = i; for (j = 0; j < (chars); j++) treenode[i - 1]->numsteps[j] = 0; if (i > spp) { p = treenode[i - 1]->next; while (p != treenode[i - 1]) { p->back = NULL; p->tip = false; p->index = i; for (j = 0; j < (chars); j++) p->numsteps[j] = 0; p = p->next; } } } } /* protmakevalues */ void doinput() { /* reads the input data */ long i; if (justwts) { if (firstset) protinputdata(seqsets[0]); for (i = 0; i < chars; i++) weight[i] = 1; inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); if (justwts) { fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } else { if (!firstset){ samenumspseq(seqsets[ith-1], &chars, ith); reallocchars(); } for (i = 0; i < chars; i++) weight[i] = 1; if (weights) { inputweightsstr(phyloweights->Str[0], chars, weight, &weights); } if (weights) printweights(outfile, 0, chars, weight, "Sites"); protinputdata(seqsets[ith-1]); } if(!thresh) threshold = spp * 3.0; for(i = 0 ; i < (chars) ; i++){ weight[i]*=10; threshwt[i] = (long)(threshold * weight[i] + 0.5); } protmakevalues(); } /* doinput */ void protfillin(node *p, node *left, node *rt) { /* sets up for each node in the tree the aa set for site m at that point and counts the changes. The program spends much of its time in this function */ boolean counted, done; aas aa; long s = 0; sitearray ls, rs, qs; long i, j, m, n; for (m = 0; m < chars; m++) { if (left != NULL) memcpy(ls, left->siteset[m], sizeof(sitearray)); if (rt != NULL) memcpy(rs, rt->siteset[m], sizeof(sitearray)); if (left == NULL) { n = rt->numsteps[m]; memcpy(qs, rs, sizeof(sitearray)); } else if (rt == NULL) { n = left->numsteps[m]; memcpy(qs, ls, sizeof(sitearray)); } else { n = left->numsteps[m] + rt->numsteps[m]; if ((ls[0] == rs[0]) && (ls[1] == rs[1]) && (ls[2] == rs[2])) { qs[0] = ls[0]; qs[1] = ls[1]; qs[2] = ls[2]; } else { counted = false; for (i = 0; (!counted) && (i <= 3); i++) { switch (i) { case 0: s = ls[0] & rs[0]; break; case 1: s = (ls[0] & rs[1]) | (ls[1] & rs[0]); break; case 2: s = (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]); break; case 3: s = ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0]; break; } if (s != 0) { qs[0] = s; counted = true; } else n += weight[m]; } switch (i) { case 1: qs[1] = qs[0] | (ls[0] & rs[1]) | (ls[1] & rs[0]); qs[2] = qs[1] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]); break; case 2: qs[1] = qs[0] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]); qs[2] = qs[1] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0]; break; case 3: qs[1] = qs[0] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0]; qs[2] = qs[1] | ls[1] | (ls[2] & rs[2]) | rs[1]; break; case 4: qs[1] = qs[0] | ls[1] | (ls[2] & rs[2]) | rs[1]; qs[2] = qs[1] | ls[2] | rs[2]; break; } for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) { done = false; for (i = 0; (!done) && (i <= 1); i++) { if (((1L << ((long)aa)) & qs[i]) != 0) { for (j = i+1; j <= 2; j++) qs[j] |= translate[(long)aa - (long)ala][j-i]; done = true; } } } } } p->numsteps[m] = n; memcpy(p->siteset[m], qs, sizeof(sitearray)); } } /* protfillin */ void protpreorder(node *p) { /* recompute number of steps in preorder taking both ancestoral and descendent steps into account */ if (p != NULL && !p->tip) { protfillin (p->next, p->next->next->back, p->back); protfillin (p->next->next, p->back, p->next->back); protpreorder (p->next->back); protpreorder (p->next->next->back); } } /* protpreorder */ void protadd(node *below, node *newtip, node *newfork) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant */ if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; below->back = newfork->next->next; newfork->next->next->back = below; newfork->next->back = newtip; newtip->back = newfork->next; if (root == below) root = newfork; root->back = NULL; if (recompute) { protfillin (newfork, newfork->next->back, newfork->next->next->back); protpreorder(newfork); if (newfork != root) protpreorder(newfork->back); } } /* protadd */ void protre_move(node **item, node **fork) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork */ node *p, *q, *other; if ((*item)->back == NULL) { *fork = NULL; return; } *fork = treenode[(*item)->back->index - 1]; if ((*item) == (*fork)->next->back) other = (*fork)->next->next->back; else other = (*fork)->next->back; if (root == *fork) root = other; p = (*item)->back->next->back; q = (*item)->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; do { p->back = NULL; p = p->next; } while (p != (*fork)); (*item)->back = NULL; if (recompute) { protpreorder(other); if (other != root) protpreorder(other->back); } } /* protre_move */ void evaluate(node *r) { /* determines the number of steps needed for a tree. this is the minimum number of steps needed to evolve sequences on this tree */ long i, steps, term; double sum; sum = 0.0; for (i = 0; i < (chars); i++) { steps = r->numsteps[i]; if (steps <= threshwt[i]) term = steps; else term = threshwt[i]; sum += term; if (usertree && which <= maxuser) fsteps[which - 1][i] = term; } if (usertree && which <= maxuser) { nsteps[which - 1] = sum; if (which == 1) { minwhich = 1; minsteps = sum; } else if (sum < minsteps) { minwhich = which; minsteps = sum; } } like = -sum; } /* evaluate */ void protpostorder(node *p) { /* traverses a binary tree, calling PROCEDURE fillin at a node's descendants before calling fillin at the node */ if (p->tip) return; protpostorder(p->next->back); protpostorder(p->next->next->back); protfillin(p, p->next->back, p->next->next->back); } /* protpostorder */ void protreroot(node *outgroup) { /* reorients tree, putting outgroup in desired position. */ node *p, *q; if (outgroup->back->index == root->index) return; p = root->next; q = root->next->next; p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; outgroup->back->back = q; outgroup->back = p; } /* protreroot */ void protsavetraverse(node *p, long *pos, boolean *found) { /* sets BOOLEANs that indicate which way is down */ p->bottom = true; if (p->tip) return; p->next->bottom = false; protsavetraverse(p->next->back, pos,found); p->next->next->bottom = false; protsavetraverse(p->next->next->back, pos,found); } /* protsavetraverse */ void protsavetree(long *pos, boolean *found) { /* record in place where each species has to be added to reconstruct this tree */ long i, j; node *p; boolean done; protreroot(treenode[outgrno - 1]); protsavetraverse(root, pos,found); for (i = 0; i < (nonodes); i++) place[i] = 0; place[root->index - 1] = 1; for (i = 1; i <= (spp); i++) { p = treenode[i - 1]; while (place[p->index - 1] == 0) { place[p->index - 1] = i; while (!p->bottom) p = p->next; p = p->back; } if (i > 1) { place[i - 1] = place[p->index - 1]; j = place[p->index - 1]; done = false; while (!done) { place[p->index - 1] = spp + i - 1; while (!p->bottom) p = p->next; p = p->back; done = (p == NULL); if (!done) done = (place[p->index - 1] != j); } } } } /* protsavetree */ void tryadd(node *p, node **item, node **nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ long pos; boolean found; node *rute, *q; if (p == root) protfillin(temp, *item, p); else { protfillin(temp1, *item, p); protfillin(temp, temp1, p->back); } evaluate(temp); if (lastrearr) { if (like < bestlike) { if ((*item) == (*nufork)->next->next->back) { q = (*nufork)->next; (*nufork)->next = (*nufork)->next->next; (*nufork)->next->next = q; q->next = (*nufork); } } else if (like >= bstlike2) { recompute = false; protadd(p, (*item), (*nufork)); rute = root->next->back; protsavetree(&pos,&found); protreroot(rute); if (like > bstlike2) { bestlike = bstlike2 = like; pos = 1; nextree = 1; addtree(pos, &nextree, dummy, place, bestrees); } else { pos = 0; findtree(&found, &pos, nextree, place, bestrees); if (!found) { if (nextree <= maxtrees) addtree(pos, &nextree, dummy, place, bestrees); } } protre_move (item, nufork); recompute = true; } } if (like >= bestyet) { bestyet = like; there = p; } } /* tryadd */ void addpreorder(node *p, node *item, node *nufork) { /* traverses a binary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ if (p == NULL) return; tryadd(p, &item,&nufork); if (!p->tip) { addpreorder(p->next->back, item, nufork); addpreorder(p->next->next->back, item, nufork); } } /* addpreorder */ void tryrearr(node *p, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success := TRUE and keeps the new tree. otherwise, restores the old tree */ node *frombelow, *whereto, *forknode, *q; double oldlike; if (p->back == NULL) return; forknode = treenode[p->back->index - 1]; if (forknode->back == NULL) return; oldlike = bestyet; if (p->back->next->next == forknode) frombelow = forknode->next->next->back; else frombelow = forknode->next->back; whereto = treenode[forknode->back->index - 1]; if (whereto->next->back == forknode) q = whereto->next->next->back; else q = whereto->next->back; protfillin(temp1, frombelow, q); protfillin(temp, temp1, p); protfillin(temp1, temp, whereto->back); evaluate(temp1); if (like - oldlike < LIKE_EPSILON) { if (p == forknode->next->next->back) { q = forknode->next; forknode->next = forknode->next->next; forknode->next->next = q; q->next = forknode; } } else { recompute = false; protre_move(&p, &forknode); protfillin(whereto, whereto->next->back, whereto->next->next->back); recompute = true; protadd(whereto, p, forknode); *success = true; bestyet = like; } } /* tryrearr */ void repreorder(node *p, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ if (p == NULL) return; tryrearr(p,success); if (!p->tip) { repreorder(p->next->back,success); repreorder(p->next->next->back,success); } } /* repreorder */ void rearrange(node **r) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ boolean success = true; while (success) { success = false; repreorder(*r, &success); } } /* rearrange */ void protaddelement(node **p,long *nextnode,long *lparens,boolean *names, char** treestr) { /* recursive procedure adds nodes to user-defined tree */ node *q; long i, n; boolean found; Char str[nmlngth]; ch = *(*treestr)++; if (ch == '(' ) { if ((*lparens) >= spp - 1) { printf("\nERROR IN USER TREE: TOO MANY LEFT PARENTHESES\n"); embExitBad(); } (*nextnode)++; (*lparens)++; q = treenode[(*nextnode) - 1]; protaddelement(&q->next->back, nextnode,lparens,names, treestr); q->next->back->back = q->next; do { ch = *(*treestr)++; } while (ch && ch != ','); protaddelement(&q->next->next->back, nextnode,lparens,names, treestr); q->next->next->back->back = q->next->next; do { ch = *(*treestr)++; } while (ch && ch != ')'); *p = q; return; } for (i = 0; i < nmlngth; i++) str[i] = ' '; n = 1; do { if (ch == '_') ch = ' '; str[n - 1] = ch; ch = *(*treestr)++; n++; } while (ch != ',' && ch != ')' && ch != ':' && n <= nmlngth); n = 1; do { found = true; for (i = 0; i < nmlngth; i++) found = (found && ((str[i] == nayme[n - 1][i]) || ((nayme[n - 1][i] == '_') && (str[i] == ' ')))); if (found) { if (names[n - 1] == false) { *p = treenode[n - 1]; names[n - 1] = true; } else { printf("\nERROR IN USER TREE: DUPLICATE NAME FOUND -- "); for (i = 0; i < nmlngth; i++) putchar(nayme[n - 1][i]); putchar('\n'); embExitBad(); } } else n++; } while (!(n > spp || found)); if (n <= spp) return; printf("CANNOT FIND SPECIES: "); for (i = 0; i < nmlngth; i++) putchar(str[i]); putchar('\n'); } /* protaddelement */ void prottreeread(char** treestr) { /* read in user-defined tree and set it up */ long nextnode, lparens, i; root = treenode[spp]; nextnode = spp; root->back = NULL; names = (boolean *)Malloc(spp*sizeof(boolean)); for (i = 0; i < (spp); i++) names[i] = false; lparens = 0; protaddelement(&root, &nextnode,&lparens,names, treestr); if (ch == '[') { do ch = *(*treestr)++; while (ch != ']'); ch = *(*treestr)++; } do ch = *(*treestr)++; while (ch != ';'); free(names); } /* prottreeread */ void protancestset(long *a, long *b, long *c, long *d, long *k) { /* sets up the aa set array. */ aas aa; long s, sa, sb; long i, j, m, n; boolean counted; counted = false; *k = 0; for (i = 0; i <= 5; i++) { if (*k < 3) { s = 0; if (i > 3) n = i - 3; else n = 0; for (j = n; j <= (i - n); j++) { if (j < 3) sa = a[j]; else sa = fullset; for (m = n; m <= (i - j - n); m++) { if (m < 3) sb = sa & b[m]; else sb = sa; if (i - j - m < 3) sb &= c[i - j - m]; s |= sb; } } if (counted || s != 0) { d[*k] = s; (*k)++; counted = true; } } } for (i = 0; i <= 1; i++) { for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) { if (((1L << ((long)aa)) & d[i]) != 0) { for (j = i + 1; j <= 2; j++) d[j] |= translate[(long)aa - (long)ala][j - i]; } } } } /* protancestset */ void prothyprint(long b1, long b2, boolean *bottom, node *r, boolean *nonzero, boolean *maybe) { /* print out states in sites b1 through b2 at node */ long i; boolean dot; Char ch = 0; aas aa; if (*bottom) { if (!outgropt) fprintf(outfile, " "); else fprintf(outfile, "root "); } else fprintf(outfile, "%3ld ", r->back->index - spp); if (r->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[r->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", r->index - spp); if (*bottom) fprintf(outfile, " "); else if (*nonzero) fprintf(outfile, " yes "); else if (*maybe) fprintf(outfile, " maybe "); else fprintf(outfile, " no "); for (i = b1 - 1; i < b2; i++) { aa = r->seq[i]; switch (aa) { case ala: ch = 'A'; break; case asx: ch = 'B'; break; case cys: ch = 'C'; break; case asp: ch = 'D'; break; case glu: ch = 'E'; break; case phe: ch = 'F'; break; case gly: ch = 'G'; break; case his: ch = 'H'; break; case ileu: ch = 'I'; break; case lys: ch = 'K'; break; case leu: ch = 'L'; break; case met: ch = 'M'; break; case asn: ch = 'N'; break; case pro: ch = 'P'; break; case gln: ch = 'Q'; break; case arg: ch = 'R'; break; case ser: ch = 'S'; break; case ser1: ch = 'S'; break; case ser2: ch = 'S'; break; case thr: ch = 'T'; break; case trp: ch = 'W'; break; case tyr: ch = 'Y'; break; case val: ch = 'V'; break; case glx: ch = 'Z'; break; case del: ch = '-'; break; case stop: ch = '*'; break; case unk: ch = 'X'; break; case quest: ch = '?'; break; } if (!(*bottom) && dotdiff) dot = (r->siteset[i] [0] == treenode[r->back->index - 1]->siteset[i][0] || ((r->siteset[i][0] & (~((1L << ((long)ser1)) | (1L << ((long)ser2)) | (1L << ((long)ser))))) == 0 && (treenode[r->back->index - 1]->siteset[i] [0] & (~((1L << ((long)ser1)) | (1L << ((long)ser2)) | (1L << ((long)ser))))) == 0)); else dot = false; if (dot) putc('.', outfile); else putc(ch, outfile); if ((i + 1) % 10 == 0) putc(' ', outfile); } putc('\n', outfile); } /* prothyprint */ void prothyptrav(node *r, sitearray *hypset, long b1, long b2, long *k, boolean *bottom, sitearray nothing) { boolean maybe, nonzero; long i; aas aa; long anc = 0, hset; gseq *ancset, *temparray; protgnu(&ancset); protgnu(&temparray); maybe = false; nonzero = false; for (i = b1 - 1; i < b2; i++) { if (!r->tip) { protancestset(hypset[i], r->next->back->siteset[i], r->next->next->back->siteset[i], temparray->seq[i], k); memcpy(r->siteset[i], temparray->seq[i], sizeof(sitearray)); } if (!(*bottom)) anc = treenode[r->back->index - 1]->siteset[i][0]; if (!r->tip) { hset = r->siteset[i][0]; r->seq[i] = quest; for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) { if (hset == 1L << ((long)aa)) r->seq[i] = aa; } if (hset == ((1L << ((long)asn)) | (1L << ((long)asp)))) r->seq[i] = asx; if (hset == ((1L << ((long)gln)) | (1L << ((long)gly)))) r->seq[i] = glx; if (hset == ((1L << ((long)ser1)) | (1L << ((long)ser2)))) r->seq[i] = ser; if (hset == fullset) r->seq[i] = unk; } nonzero = (nonzero || (r->siteset[i][0] & anc) == 0); maybe = (maybe || r->siteset[i][0] != anc); } prothyprint(b1, b2,bottom,r,&nonzero,&maybe); *bottom = false; if (!r->tip) { memcpy(temparray->seq, r->next->back->siteset, chars*sizeof(sitearray)); for (i = b1 - 1; i < b2; i++) protancestset(hypset[i], r->next->next->back->siteset[i], nothing, ancset->seq[i], k); prothyptrav(r->next->back, ancset->seq, b1, b2,k,bottom,nothing ); for (i = b1 - 1; i < b2; i++) protancestset(hypset[i], temparray->seq[i], nothing, ancset->seq[i],k); prothyptrav(r->next->next->back, ancset->seq, b1, b2, k,bottom,nothing); } protchuck(temparray); protchuck(ancset); } /* prothyptrav */ void prothypstates(long *k) { /* fill in and describe states at interior nodes */ boolean bottom; sitearray nothing; long i, n; seqptr hypset; fprintf(outfile, "\nFrom To Any Steps? State at upper node\n"); fprintf(outfile, " "); fprintf(outfile, "( . means same as in the node below it on tree)\n\n"); memcpy(nothing, translate[(long)quest - (long)ala], sizeof(sitearray)); hypset = (seqptr)Malloc(chars*sizeof(sitearray)); for (i = 0; i < (chars); i++) memcpy(hypset[i], nothing, sizeof(sitearray)); bottom = true; for (i = 1; i <= ((chars - 1) / 40 + 1); i++) { putc('\n', outfile); n = i * 40; if (n > chars) n = chars; bottom = true; prothyptrav(root, hypset, i * 40 - 39, n, k,&bottom,nothing); } free(hypset); } /* prothypstates */ void describe() { /* prints ancestors, steps and table of numbers of steps in each site */ long i,j,k; if (treeprint) fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10); if (stepbox) { putc('\n', outfile); if (weights) fprintf(outfile, "weighted "); fprintf(outfile, "steps in each position:\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%4ld", i); fprintf(outfile, "\n *-----------------------------------------\n"); for (i = 0; i <= (chars / 10); i++) { fprintf(outfile, "%5ld", i * 10); putc('!', outfile); for (j = 0; j <= 9; j++) { k = i * 10 + j; if (k == 0 || k > chars) fprintf(outfile, " "); else fprintf(outfile, "%4ld", root->numsteps[k - 1] / 10); } putc('\n', outfile); } } if (ancseq) { prothypstates(&k); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout(root, nextree, &col, root); } } /* describe */ void maketree() { /* constructs a binary tree from the pointers in treenode. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j; double gotlike; node *item, *nufork, *dummy; char* treestr; if (!usertree) { for (i = 1; i <= (spp); i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); root = treenode[enterorder[0] - 1]; recompute = true; protadd(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1], treenode[spp]); if (progress) { printf("\nAdding species:\n"); writename(0, 2, enterorder); } lastrearr = false; for (i = 3; i <= (spp); i++) { bestyet = -30.0*spp*chars; there = root; item = treenode[enterorder[i - 1] - 1]; nufork = treenode[spp + i - 2]; addpreorder(root, item, nufork); protadd(there, item, nufork); like = bestyet; rearrange(&root); if (progress) writename(i - 1, 1, enterorder); lastrearr = (i == spp); if (lastrearr) { if (progress) { printf("\nDoing global rearrangements\n"); printf(" !"); for (j = 1; j <= nonodes; j++) if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); } bestlike = bestyet; if (jumb == 1) { bstlike2 = bestlike = -30.0*spp*chars; nextree = 1; } do { if (progress) printf(" "); gotlike = bestlike; for (j = 0; j < (nonodes); j++) { bestyet = -30.0*spp*chars; item = treenode[j]; if (item != root) { nufork = treenode[treenode[j]->back->index - 1]; protre_move(&item, &nufork); there = root; addpreorder(root, item, nufork); protadd(there, item, nufork); } if (progress) { if ( j % (( nonodes / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); } } if (progress) putchar('\n'); } while (bestlike > gotlike); } } if (progress) putchar('\n'); for (i = spp - 1; i >= 1; i--) protre_move(&treenode[i], &dummy); if (jumb == njumble) { if (treeprint) { putc('\n', outfile); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); recompute = false; for (i = 0; i <= (nextree - 2); i++) { root = treenode[0]; protadd(treenode[0], treenode[1], treenode[spp]); for (j = 3; j <= (spp); j++) protadd(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1], treenode[spp + j - 2]); protreroot(treenode[outgrno - 1]); protpostorder(root); evaluate(root); printree(root, 1.0); describe(); for (j = 1; j < (spp); j++) protre_move(&treenode[j], &dummy); } } } else { if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n\n\n"); } which = 1; while (which <= numtrees) { treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); prottreeread(&treestr); if (outgropt) protreroot(treenode[outgrno - 1]); protpostorder(root); evaluate(root); printree(root, 1.0); describe(); which++; } printf("\n"); FClose(intree); putc('\n', outfile); if (numtrees > 1 && chars > 1 ) standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed); } if (jumb == njumble && progress) { printf("Output written to file \"%s\"\n", outfilename); if (trout) printf("\nTrees also written onto file \"%s\"\n", outtreename); } } /* maketree */ int main(int argc, Char *argv[]) { /* Protein parsimony by uphill search */ #ifdef MAC argc = 1; /* macsetup("Protpars",""); */ argv[0] = "Protpars"; #endif init(argc,argv); emboss_getoptions("fprotpars",argc,argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; garbage = NULL; firstset = true; code(); setup(); doinit(); for (ith = 1; ith <= msets; ith++) { doinput(); if (ith == 1) firstset = false; if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("Data set # %ld:\n\n",ith); } for (jumb = 1; jumb <= njumble; jumb++) maketree(); } FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Protein parsimony by uphill search */ PHYLIPNEW-3.69.650/src/restml.c0000664000175000017500000017025111605067345012544 00000000000000 /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include #include "phylip.h" #include "seq.h" #define initialv 0.1 /* starting value of branch length */ #define over 60 /* maximum width of a tree on screen */ extern sequence y; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; AjPPhyloState* phylostates = NULL; #ifndef OLDC /* function prototypes */ void restml_inputnumbers(AjPPhyloState); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void freerest(void); void setuppie(void); void doinit(void); void inputoptions(AjPPhyloState); void restml_inputdata(AjPPhyloState); void restml_sitesort(void); void restml_sitecombine(void); void makeweights(void); void restml_makevalues(void); void getinput(void); void copymatrix(transmatrix, transmatrix); void maketrans(double, boolean); void branchtrans(long, double); double evaluate(tree *, node *); boolean nuview(node *); void makenewv(node *); void update(node *); void smooth(node *); void insert_(node *p, node *); void restml_re_move(node **, node **); void restml_copynode(node *, node *); void restml_copy_(tree *, tree *); void buildnewtip(long , tree *); void buildsimpletree(tree *); void addtraverse(node *, node *, boolean); void rearrange(node *, node *); void restml_coordinates(node *, double, long *,double *, double *); void restml_fprintree(FILE *fp); void restml_printree(void); double sigma(node *, double *); void fdescribe(FILE *, node *); void summarize(void); void restml_treeout(node *); static phenotype2 restml_pheno_new(long endsite, long sitelength); /* static void restml_pheno_delete(phenotype2 x2); */ void initrestmlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr); static void restml_unroot(node* root, node** nodep, long nonodes); void inittravtree(tree* t,node *); static void adjust_lengths_r(node *p); void treevaluate(void); void maketree(void); void globrearrange(void); void adjust_lengths(tree *); double adjusted_v(double v); sitelike2 init_sitelike(long sitelength); void free_sitelike(sitelike2 sl); void copy_sitelike(sitelike2 dest, sitelike2 src,long sitelength); void reallocsites(void); static void set_branchnum(node *p, long branchnum); void alloctrans(tree *t, long nonodes, long sitelength); long get_trans(tree* t); void free_trans(tree* t, long trans); void free_all_trans(tree* t); void alloclrsaves(void); void freelrsaves(void); void resetlrsaves(void); void cleanup(void); void allocx2(long nonodes, long sitelength, pointarray, boolean usertree); void freex2(long nonodes, pointarray treenode); void freetrans(tree * t, long nonodes,long sitelength); /* function prototypes */ #endif Char infilename[FNMLNGTH]; Char intreename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; ajint numwts; long nonodes2, sites, enzymes, weightsum, sitelength, datasets, ith, njumble, jumb=0; long inseed, inseed0; /* User options */ boolean global; /* Perform global rearrangements? */ boolean jumble; /* Randomize input order? */ boolean lengths; /* Use lengths from user tree? */ boolean weights; boolean trout; /* Write tree to outtree? */ boolean trunc8; boolean usertree; /* Input user tree? Or search. */ boolean progress; /* Display progress */ boolean mulsets; /* Use multiple data sets */ /* Runtime state */ boolean firstset; boolean improve; boolean smoothit; boolean inserting = false; double bestyet; tree curtree, priortree, bestree, bestree2; longer seed; long *enterorder; steptr aliasweight; char *progname; node *qwhere,*addwhere; /* local rearrangements need to save views. created globally so that reallocation of the same variable is unnecessary */ node **lrsaves; /* Local variables for maketree, propagated globally for C version: */ long nextsp, numtrees, maxwhich, col, shimotrees; double maxlogl; boolean succeeded, smoothed; #define NTEMPMATS 7 transmatrix *tempmatrix, tempslope, tempcurve; sitelike2 pie; double *l0gl; double **l0gf; Char ch; /* variables added to keep treeread2() happy */ boolean goteof; double trweight; node *grbg = NULL; static void set_branchnum(node *p, long branchnum) { assert(p != NULL); assert(branchnum > 0); p->branchnum = branchnum; } void allocx2(long nonodes, long sitelength, pointarray treenode, boolean usertree) { /* its complement is freex2(nonodes,treenode) */ long i, j, k, l; node *p; for (i = 0; i < spp; i++) { treenode[i]->x2 = (phenotype2)Malloc((endsite+1)*sizeof(sitelike2)); for ( j = 0 ; j < endsite + 1 ; j++ ) treenode[i]->x2[j] = (double *)Malloc((sitelength + 1) * sizeof(double)); } if (!usertree) { for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->x2 = (phenotype2)Malloc((endsite+1)*sizeof(sitelike2)); for (k = 0; k < endsite + 1; k++) { p->x2[k] = (double *)Malloc((sitelength + 1) * sizeof(double)); for (l = 0; l < sitelength; l++) p->x2[k][l] = 1.0; } p = p->next; } } } } /* allocx2 */ void freex2(long nonodes, pointarray treenode) { long i, j, k; node *p; for (i = 0; i < spp; i++) { for (j = 0; j < endsite + 1; j++) { free(treenode[i]->x2[j]); } free(treenode[i]->x2); treenode[i]->x2 = NULL; } for (i = spp; i < nonodes; i++) { p = treenode[i]; if (p != NULL) { for (j = 1; j <= 3; j++) { for (k = 0; k < endsite + 1; k++) { free(p->x2[k]); } free(p->x2); p->x2 = NULL; p = p->next; } } } } /* freex2 */ void alloctrans(tree *t, long nonodes, long sitelength) { /* it's complement is freetrans(tree*,nonodes, sitelength) */ long i, j; t->trans = (transptr)Malloc(nonodes*sizeof(transmatrix)); for (i = 0; i < nonodes; ++i){ t->trans[i] = (transmatrix)Malloc((sitelength + 1) * sizeof(double *)); for (j = 0;j < sitelength + 1; ++j) t->trans[i][j] = (double *)Malloc((sitelength + 1) * sizeof(double)); } t->freetrans = Malloc(nonodes* sizeof(long)); for ( i = 0; i < nonodes; i++ ) t->freetrans[i] = i+1; t->transindex = nonodes - 1; } /* alloctrans */ void freetrans(tree * t, long nonodes,long sitelength) { long i ,j; for ( i = 0 ; i < nonodes ; i++ ) { for ( j = 0 ; j < sitelength + 1; j++) { free ((t->trans)[i][j]); } free ((t->trans)[i]); } free(t->trans); free(t->freetrans); } long get_trans(tree* t) { long ret; assert(t->transindex >= 0); ret = t->freetrans[t->transindex]; t->transindex--; return ret; } void free_trans(tree* t, long trans) { long i; /* FIXME This is a temporary workaround and probably slows things down a bit. * During rearrangements, this function is sometimes called more than once on * already freed nodes, causing the freetrans array to overrun other data. */ for ( i = 0 ; i < t->transindex; i++ ) { if ( t->freetrans[i] == trans ) { return; } } /* end of temporary fix */ t->transindex++; t->freetrans[t->transindex] = trans; } void free_all_trans(tree* t) { long i; for ( i = 0; i < nonodes2; i++ ) t->freetrans[i] = i; t->transindex = nonodes2 - 1; } sitelike2 init_sitelike(long sitelength) { return Malloc((sitelength+1) * sizeof(double)); } void free_sitelike(sitelike2 sl) { free(sl); } void copy_sitelike(sitelike2 dest, sitelike2 src,long sitelength) { memcpy(dest,src,(sitelength+1)*sizeof(double)); } void restml_inputnumbers(AjPPhyloState state) { /* read and print out numbers of species and sites */ spp = state->Size; sites = state->Len; enzymes = state->Count; nonodes2 = spp * 2 - 1; } /* restml_inputnumbers */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs; boolean rough; sitelength = 6; trunc8 = true; global = false; improve = false; jumble = false; njumble = 1; lengths = false; outgrno = 1; outgropt = false; trout = true; usertree = false; weights = false; printdata = false; progress = true; treeprint = true; interleaved = true; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("data"); numseqs = 0; while (phylostates[numseqs]) numseqs++; printf("numseqs: %d\n", numseqs); if (numseqs > 1) { mulsets = true; datasets = numseqs; } phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; } numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) weights = true; trunc8 = ajAcdGetBoolean("allsites"); if (!usertree) { rough = ajAcdGetBoolean("rough"); if(!rough) improve = true; global = ajAcdGetBoolean("global"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } sitelength = ajAcdGetInt("sitelength"); outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nRestriction site Maximum Likelihood"); fprintf(outfile, " method, version %s\n\n",VERSION); } /* emboss_getoptions */ void reallocsites() { long i; for (i = 0; i < spp; i++) { free(y[i]); y[i] = (Char *)Malloc(sites*sizeof(Char)); } free(weight); free(alias); free(aliasweight); weight = (steptr)Malloc((sites+1)*sizeof(long)); alias = (steptr)Malloc((sites+1)*sizeof(long)); aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); } void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc(sites*sizeof(Char)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); weight = (steptr)Malloc((sites+1)*sizeof(long)); alias = (steptr)Malloc((sites+1)*sizeof(long)); aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); } /* allocrest */ void freerest() { long i; for (i = 0; i < spp; i++) free(y[i]); free(y); free(nayme); free(enterorder); free(weight); free(alias); free(aliasweight); } /* freerest */ void freelrsaves() { long i,j; for ( i = 0 ; i < NLRSAVES ; i++ ) { for (j = 0; j < endsite + 1; j++) free(lrsaves[i]->x2[j]); free(lrsaves[i]->x2); free(lrsaves[i]->underflows); free(lrsaves[i]); } free(lrsaves); } void resetlrsaves() { freelrsaves(); alloclrsaves(); } void alloclrsaves() { long i,j; lrsaves = Malloc(NLRSAVES * sizeof(node*)); for ( i = 0 ; i < NLRSAVES ; i++ ) { lrsaves[i] = Malloc(sizeof(node)); lrsaves[i]->x2 = Malloc((endsite + 1)*sizeof(sitelike2)); for ( j = 0 ; j < endsite + 1 ; j++ ) { lrsaves[i]->x2[j] = Malloc((sitelength + 1) * sizeof(double)); } } } /* alloclrsaves */ void setuppie() { /* set up equilibrium probabilities of being a given number of bases away from a restriction site */ long i; double sum; pie = init_sitelike(sitelength); pie[0] = 1.0; sum = pie[0]; for (i = 1; i <= sitelength; i++) { pie[i] = 3 * pie[i - 1] * (sitelength - i + 1) / i; sum += pie[i]; } for (i = 0; i <= sitelength; i++) pie[i] /= sum; } /* setuppie */ void doinit() { /* initializes variables */ long i,j; restml_inputnumbers(phylostates[0]); if (!usertree) nonodes2--; if (printdata) fprintf(outfile, "%4ld Species, %4ld Sites,%4ld Enzymes\n", spp, sites, enzymes); tempmatrix = Malloc(NTEMPMATS * sizeof(transmatrix)); for ( i = 0 ; i < NTEMPMATS ; i++ ) { tempmatrix[i] = Malloc((sitelength+1) * sizeof(double *)); for ( j = 0 ; j <= sitelength ; j++) tempmatrix[i][j] = (double *)Malloc((sitelength+1) * sizeof(double)); } tempslope = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); for (i=0; i<=sitelength; i++) tempslope[i] = (double *)Malloc((sitelength+1) * sizeof(double)); tempcurve = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); for (i=0; i<=sitelength; i++) tempcurve[i] = (double *)Malloc((sitelength+1) * sizeof(double)); setuppie(); alloctrans(&curtree, nonodes2, sitelength); alloctree(&curtree.nodep, nonodes2, usertree); allocrest(); if (usertree) return; alloctrans(&bestree, nonodes2, sitelength); alloctree(&bestree.nodep, nonodes2, 0); alloctrans(&priortree, nonodes2, sitelength); alloctree(&priortree.nodep, nonodes2, 0); if (njumble == 1) return; alloctrans(&bestree2, nonodes2, sitelength); alloctree(&bestree2.nodep, nonodes2, 0); } /* doinit */ void cleanup() { long i, j; for (i = 0; i < NTEMPMATS; i++) { for (j = 0; j <= sitelength; j++) free(tempmatrix[i][j]); free(tempmatrix[i]); } free(tempmatrix); tempmatrix = NULL; for (i = 0; i <= sitelength; i++) { free(tempslope[i]); free(tempcurve[i]); } free(tempslope); tempslope = NULL; free(tempcurve); tempcurve = NULL; freelrsaves(); } void inputoptions(AjPPhyloState state) { /* read the options information */ long i, /*extranum,*/ cursp, curst, curenz; if (!firstset) { cursp = state->Size; curst = state->Len; curenz = state->Count; if (cursp != spp) { printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4ld\n", ith); embExitBad(); } if (curenz != enzymes) { printf("\nERROR: INCONSISTENT NUMBER OF ENZYMES IN DATA SET %4ld\n", ith); embExitBad(); } sites = curst; } if ( !firstset ) reallocsites(); for (i = 1; i <= sites; i++) weight[i] = 1; weightsum = sites; /*extranum = numwts;*/ for (i = 1; i <= numwts; i++) { inputweightsstr2(phyloweights->Str[i-1], 1, sites+1, &weightsum, weight, &weights, "RESTML"); } fprintf(outfile, "\n Recognition sequences all%2ld bases long\n", sitelength); if (trunc8) fprintf(outfile, "\nSites absent from all species are assumed to have been omitted\n\n"); if (weights) printweights(outfile, 1, sites, weight, "Sites"); } /* inputoptions */ void restml_inputdata(AjPPhyloState state) { /* read the species and sites data */ long i, j, k, l /*, sitesread*/; Char ch; boolean allread, done; AjPStr str; if (printdata) putc('\n', outfile); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 39) j = 39; if (printdata) { fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Sites\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "-----\n\n"); } /*sitesread = 0;*/ allread = false; while (!(allread)) { i = 1; while (i <= spp ) { str = state->Str[i-1]; initnamestate(state, i - 1); j = 0; done = false; while (!done) { while (j < sites) { ch = ajStrGetCharPos(str, j); uppercase(&ch); if (ch != '1' && ch != '0' && ch != '+' && ch != '-' && ch != '?') { printf(" ERROR: Bad symbol %c", ch); printf(" at position %ld of species %ld\n", j+1, i); embExitBad(); } if (ch == '1') ch = '+'; if (ch == '0') ch = '-'; j++; y[i - 1][j - 1] = ch; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (printdata) { for (i = 1; i <= ((sites - 1) / 60 + 1); i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; for (k = (i - 1) * 60 + 1; k <= l; k++) { putc(y[j][k - 1], outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* restml_inputdata */ void restml_sitesort() { /* Shell sort keeping alias, aliasweight in same order */ long gap, i, j, jj, jg, k, itemp; boolean flip, tied; gap = sites / 2; while (gap > 0) { for (i = gap + 1; i <= sites; i++) { j = i - gap; flip = true; while (j > 0 && flip) { jj = alias[j]; jg = alias[j + gap]; flip = false; tied = true; k = 1; while (k <= spp && tied) { flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); k++; } if (tied) { aliasweight[j] += aliasweight[j + gap]; aliasweight[j + gap] = 0; } if (!flip) break; itemp = alias[j]; alias[j] = alias[j + gap]; alias[j + gap] = itemp; itemp = aliasweight[j]; aliasweight[j] = aliasweight[j + gap]; aliasweight[j + gap] = itemp; j -= gap; } } gap /= 2; } } /* restml_sitesort */ void restml_sitecombine() { /* combine sites that have identical patterns */ long i, j, k; boolean tied; i = 1; while (i < sites) { j = i + 1; tied = true; while (j <= sites && tied) { k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i] - 1] == y[k - 1][alias[j] - 1]); k++; } if (tied && aliasweight[j] > 0) { aliasweight[i] += aliasweight[j]; aliasweight[j] = 0; alias[j] = alias[i]; } j++; } i = j - 1; } } /* restml_sitecombine */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i] = i; aliasweight[i] = weight[i]; } restml_sitesort(); restml_sitecombine(); sitescrunch2(sites + 1, 2, 3, aliasweight); for (i = 1; i <= sites; i++) { weight[i] = aliasweight[i]; if (weight[i] > 0) endsite = i; } weight[0] = 1; } /* makeweights */ void restml_makevalues() { /* set up fractional likelihoods at tips */ long i, j, k, l, m; boolean found; for (k = 1; k <= endsite; k++) { j = alias[k]; for (i = 0; i < spp; i++) { for (l = 0; l <= sitelength; l++) curtree.nodep[i]->x2[k][l] = 1.0; switch (y[i][j - 1]) { case '+': for (m = 1; m <= sitelength; m++) curtree.nodep[i]->x2[k][m] = 0.0; break; case '-': curtree.nodep[i]->x2[k][0] = 0.0; break; case '?': /* blank case */ break; } } } for (i = 0; i < spp; i++) { for (k = 1; k <= sitelength; k++) curtree.nodep[i]->x2[0][k] = 1.0; curtree.nodep[i]->x2[0][0] = 0.0; } if (trunc8) return; found = false; i = 1; while (!found && i <= endsite) { found = true; for (k = 0; k < spp; k++) found = (found && y[k][alias[i] - 1] == '-'); if (!found) i++; } if (found) { weightsum += (enzymes - 1) * weight[i]; weight[i] *= enzymes; } } /* restml_makevalues */ void getinput() { /* reads the input data */ inputoptions(phylostates[ith-1]); restml_inputdata(phylostates[ith-1]); if ( !firstset ) freelrsaves(); makeweights(); alloclrsaves(); if (!usertree) { setuptree2(&curtree); setuptree2(&priortree); setuptree2(&bestree); if (njumble > 1) setuptree2(&bestree2); } allocx2(nonodes2, sitelength, curtree.nodep, usertree); if (!usertree) { allocx2(nonodes2, sitelength, priortree.nodep, 0); allocx2(nonodes2, sitelength, bestree.nodep, 0); if (njumble > 1) allocx2(nonodes2, sitelength, bestree2.nodep, 0); } restml_makevalues(); } /* getinput */ void copymatrix(transmatrix tomat, transmatrix frommat) { /* copy a matrix the size of transition matrix */ int i,j; for (i=0;i<=sitelength;++i){ for (j=0;j<=sitelength;++j) tomat[i][j] = frommat[i][j]; } } /* copymatrix */ void maketrans(double p, boolean nr) { /* make transition matrix, product matrix with change probability p. Put the results in tempmatrix, tempslope, tempcurve */ long i, j, k, m1, m2; double sump, sums=0, sumc=0, pover3, pijk, term; sitelike2 binom1, binom2; binom1 = init_sitelike(sitelength); binom2 = init_sitelike(sitelength); pover3 = p / 3; for (i = 0; i <= sitelength; i++) { if (p > 1.0 - epsilon) p = 1.0 - epsilon; if (p < epsilon) p = epsilon; binom1[0] = exp((sitelength - i) * log(1 - p)); for (k = 1; k <= sitelength - i; k++) binom1[k] = binom1[k - 1] * (p / (1 - p)) * (sitelength - i - k + 1) / k; binom2[0] = exp(i * log(1 - pover3)); for (k = 1; k <= i; k++) binom2[k] = binom2[k - 1] * (pover3 / (1 - pover3)) * (i - k + 1) / k; for (j = 0; j <= sitelength; ++j) { sump = 0.0; if (nr) { sums = 0.0; sumc = 0.0; } if (i - j > 0) m1 = i - j; else m1 = 0; if (sitelength - j < i) m2 = sitelength - j; else m2 = i; for (k = m1; k <= m2; k++) { pijk = binom1[j - i + k] * binom2[k]; sump += pijk; if (nr) { term = (j-i+2*k)/p - (sitelength-j-k)/(1.0-p) - (i-k)/(3.0-p); sums += pijk * term; sumc += pijk * (term * term - (j-i+2*k)/(p*p) - (sitelength-j-k)/((1.0-p)*(1.0-p)) - (i-k)/((3.0-p)*(3.0-p)) ); } } tempmatrix[0][i][j] = sump; if (nr) { tempslope[i][j] = sums; tempcurve[i][j] = sumc; } } } free_sitelike(binom1); free_sitelike(binom2); } /* maketrans */ void branchtrans(long i, double p) { /* make branch transition matrix for branch i with probability of change p */ boolean nr; nr = false; maketrans(p, nr); copymatrix(curtree.trans[i - 1], tempmatrix[0]); } /* branchtrans */ double evaluate(tree *tr, node *p) { /* evaluates the likelihood, using info. at one branch */ double sum, sum2, /*y,*/ liketerm, like0, lnlike0=0, term; long i, j, k,branchnum; node *q; sitelike2 x1, x2; x1 = init_sitelike(sitelength); x2 = init_sitelike(sitelength); sum = 0.0; q = p->back; nuview(p); nuview(q); /*y = p->v;*/ branchnum = p->branchnum; copy_sitelike(x1,p->x2[0],sitelength); copy_sitelike(x2,q->x2[0],sitelength); if (trunc8) { like0 = 0.0; for (j = 0; j <= sitelength; j++) { liketerm = pie[j] * x1[j]; for (k = 0; k <= sitelength; k++) like0 += liketerm * tr->trans[branchnum-1][j][k] * x2[k]; } lnlike0 = log(enzymes * (1.0 - like0)); } for (i = 1; i <= endsite; i++) { copy_sitelike(x1,p->x2[i],sitelength); copy_sitelike(x2,q->x2[i],sitelength); sum2 = 0.0; for (j = 0; j <= sitelength; j++) { liketerm = pie[j] * x1[j]; for (k = 0; k <= sitelength; k++) sum2 += liketerm * tr->trans[branchnum-1][j][k] * x2[k]; } term = log(sum2); if (trunc8) term -= lnlike0; if (usertree && (which <= shimotrees)) l0gf[which - 1][i - 1] = term; sum += weight[i] * term; } /* *** debug put a variable "saveit" in evaluate as third argument as to whether to save the KHT suff */ if (usertree) { if(which <= shimotrees) l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; } else if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } } tr->likelihood = sum; free_sitelike(x1); free_sitelike(x2); return sum; } /* evaluate */ boolean nuview(node *p) { /* recompute fractional likelihoods for one part of tree */ long i, j, k, lowlim; double sumq; node *q, *s; if (p->tip) return false; for (s = p->next; s != p; s = s->next) { if ( nuview(s->back) ) p->initialized = false; } if (p->initialized) return false; lowlim = trunc8 ? 0 : 1; /* recalculates p->x2[*][*] in place */ for (i = lowlim; i <= endsite; i++) { for (j = 0; j <= sitelength; j++) p->x2[i][j] = 1.0; for (s = p->next; s != p; s = s->next) { q = s->back; for (j = 0; j <= sitelength; j++) { sumq = 0.0; for (k = 0; k <= sitelength; k++) sumq += curtree.trans[q->branchnum-1][j][k] * q->x2[i][k]; p->x2[i][j] *= sumq; } } } return true; } /* nuview */ void makenewv(node *p) { /* Newton-Raphson algorithm improvement of a branch length */ long i, j, k, lowlim, it, ite; double sum, sums, sumc, like, slope, curve, liketerm, liket, y, yold=0, yorig, like0=0, slope0=0, curve0=0, oldlike=0, temp; boolean done, nr, firsttime, better; node *q; sitelike2 xx1, xx2; double *tm, *ts, *tc; q = p->back; y = p->v; yorig = y; if (trunc8) lowlim = 0; else lowlim = 1; done = false; nr = true; firsttime = true; it = 1; ite = 0; while ((it < iterations) && (ite < 20) && (!done)) { like = 0.0; slope = 0.0; curve = 0.0; maketrans(y, nr); for (i = lowlim; i <= endsite; i++) { xx1 = p->x2[i]; xx2 = q->x2[i]; sum = 0.0; sums = 0.0; sumc = 0.0; for (j = 0; j <= sitelength; j++) { liket = xx1[j] * pie[j]; tm = tempmatrix[0][j]; ts = tempslope[j]; tc = tempcurve[j]; for (k = 0; k <= sitelength; k++) { liketerm = liket * xx2[k]; sum += tm[k] * liketerm; sums += ts[k] * liketerm; sumc += tc[k] * liketerm; } } if (i == 0) { like0 = sum; slope0 = sums; curve0 = sumc; } else { like += weight[i] * log(sum); slope += weight[i] * sums/sum; temp = sums/sum; curve += weight[i] * (sumc/sum-temp*temp); } } if (trunc8 && fabs(like0 - 1.0) > 1.0e-10) { like -= weightsum * log(enzymes * (1.0 - like0)); slope += weightsum * slope0 /(1.0 - like0); curve += weightsum * (curve0 /(1.0 - like0) + slope0*slope0/((1.0 - like0)*(1.0 - like0))); } better = false; if (firsttime) { yold = y; oldlike = like; firsttime = false; better = true; } else { if (like > oldlike) { yold = y; oldlike = like; better = true; it++; } } if (better) { y = y + slope/fabs(curve); if (y < epsilon) y = 10.0 * epsilon; if (y > 0.75) y = 0.75; } else { if (fabs(y - yold) < epsilon) ite = 20; y = (y + yold) / 2.0; } ite++; done = fabs(y-yold) < epsilon; } smoothed = (fabs(yold-yorig) < epsilon) && (yorig > 1000.0*epsilon); p->v = yold; q->v = yold; branchtrans(p->branchnum, yold); curtree.likelihood = oldlike; } /* makenewv */ void update(node *p) { /* improve branch length and views for one branch */ nuview(p); nuview(p->back); if ( !(usertree && lengths) ) { makenewv(p); if (smoothit ) { inittrav(p); inittrav(p->back); } else { if (inserting && !p->tip) { p->next->initialized = false; p->next->next->initialized = false; } } } } /* update */ void smooth(node *p) { /* update nodes throughout the tree, recursively */ smoothed = false; update(p); if (!p->tip) { if (smoothit && !smoothed) { smooth(p->next->back); } if (smoothit && !smoothed) { smooth(p->next->next->back); } } } /* smooth */ void insert_(node *p, node *q) { /* insert a subtree into a branch, improve lengths in tree */ long i; node *r; r = p->next->next; hookup(r, q->back); hookup(p->next, q); if (q->v >= 0.75) q->v = 0.75; else q->v = 0.75 * (1 - sqrt(1 - 1.333333 * q->v)); if ( q->v < epsilon) q->v = epsilon; q->back->v = q->v; r->v = q->v; r->back->v = r->v; set_branchnum(q->back, q->branchnum); set_branchnum(r, get_trans(&curtree)); set_branchnum(r->back, r->branchnum); branchtrans(q->branchnum, q->v); branchtrans(r->branchnum, r->v); if ( smoothit ) { inittrav(p); inittrav(p->back); } p->initialized = false; i = 1; inserting = true; while (i <= smoothings) { smooth(p); if (!p->tip) { smooth (p->next->back); smooth (p->next->next->back); } i++; } inserting = false; } /* insert */ void restml_re_move(node **p, node **q) { /* remove p and record in q where it was */ long i; *q = (*p)->next->back; hookup(*q, (*p)->next->next->back); free_trans(&curtree,(*q)->back->branchnum); set_branchnum((*q)->back, (*q)->branchnum); (*q)->v = 0.75*(1 - (1 - 1.333333*(*q)->v) * (1 - 1.333333*(*p)->next->v)); if ( (*q)->v > 1 - epsilon) (*q)->v = 1 - epsilon; else if ( (*q)->v < epsilon) (*q)->v = epsilon; (*q)->back->v = (*q)->v; branchtrans((*q)->branchnum, (*q)->v); (*p)->next->back = NULL; (*p)->next->next->back = NULL; if ( smoothit ) { inittrav((*q)->back); inittrav(*q); } if ( smoothit ) { for ( i = 0 ; i < smoothings ; i++ ) { smooth(*q); smooth((*q)->back); } } else ( smooth(*q)); } /* restml_re_move */ void restml_copynode(node *c, node *d) { /* copy a node */ long i; set_branchnum(d, c->branchnum); for ( i = 0 ; i <= endsite ; i++) copy_sitelike(d->x2[i],c->x2[i],sitelength); d->v = c->v; d->iter = c->iter; d->xcoord = c->xcoord; d->ycoord = c->ycoord; d->ymin = c->ymin; d->ymax = c->ymax; d->initialized = c->initialized; } /* restml_copynode */ void restml_copy_(tree *a, tree *b) { /* copy tree a to tree b */ long i,j; node *p, *q; for (i = 0; i < spp; i++) { restml_copynode(a->nodep[i], b->nodep[i]); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes2; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { restml_copynode(p, q); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->likelihood = a->likelihood; for (i=0;itrans[i],a->trans[i]); b->transindex = a->transindex; memcpy(b->freetrans,a->freetrans,nonodes*sizeof(long)); b->start = a->start; } /* restml_copy */ void buildnewtip(long m,tree *tr) { /* set up a new tip and interior node it is connected to */ node *p; long i, j; p = tr->nodep[nextsp + spp - 3]; for (i = 0; i <= endsite; i++) { for (j = 0; j < sitelength; j++) { /* trunc8 */ p->x2[i][j] = 1.0; p->next->x2[i][j] = 1.0; p->next->next->x2[i][j] = 1.0; } } hookup(tr->nodep[m - 1], p); p->v = initialv; p->back->v = initialv; set_branchnum(p, get_trans(tr)); set_branchnum(p->back, p->branchnum); branchtrans(p->branchnum, initialv); } /* buildnewtip */ void buildsimpletree(tree *tr) { /* set up and adjust branch lengths of a three-species tree */ long branch; hookup(tr->nodep[enterorder[0] - 1], tr->nodep[enterorder[1] - 1]); tr->nodep[enterorder[0] - 1]->v = initialv; tr->nodep[enterorder[1] - 1]->v = initialv; branchtrans(enterorder[1], initialv); branch = get_trans(tr); set_branchnum(tr->nodep[enterorder[0] - 1], branch); set_branchnum(tr->nodep[enterorder[1] - 1], branch); buildnewtip(enterorder[2], tr); insert_(tr->nodep[enterorder[2] - 1]->back, tr->nodep[enterorder[1] - 1]); tr->start = tr->nodep[enterorder[2]-1]->back; } /* buildsimpletree */ void addtraverse(node *p, node *q, boolean contin) { /* try adding p at q, proceed recursively through tree */ double like, vsave = 0; node *qback =NULL; if (!smoothit) { copymatrix (tempmatrix[1], curtree.trans[q->branchnum - 1]); vsave = q->v; qback = q->back; } insert_(p, q); like = evaluate(&curtree, p); if (like > bestyet) { bestyet = like; if (smoothit) { restml_copy_(&curtree, &bestree); addwhere = q; } else qwhere = q; succeeded = true; } if (smoothit) restml_copy_(&priortree, &curtree); else { hookup (q, qback); q->v = vsave; q->back->v = vsave; free_trans(&curtree,q->back->branchnum); set_branchnum(q->back, q->branchnum); copymatrix (curtree.trans[q->branchnum - 1], tempmatrix[1]); /* curtree.likelihood = bestyet; */ evaluate(&curtree, curtree.start); } if (!q->tip && contin) { /* assumes bifurcation (OK) */ addtraverse(p, q->next->back, contin); addtraverse(p, q->next->next->back, contin); } if ( contin && q == curtree.root ) { /* FIXME!! curtree.root->back == NULL? curtree.root == NULL? */ addtraverse(p,q->back,contin); } } /* addtraverse */ void globrearrange(void) { /* does global rearrangements */ tree globtree; tree oldtree; int i,j,k,l,num_sibs,num_sibs2; node *where,*sib_ptr,*sib_ptr2; double oldbestyet = curtree.likelihood; int success = false; printf("\n "); alloctree(&globtree.nodep,nonodes2,0); alloctree(&oldtree.nodep,nonodes2,0); alloctrans(&globtree, nonodes2, sitelength); alloctrans(&oldtree, nonodes2, sitelength); setuptree2(&globtree); setuptree2(&oldtree); allocx2(nonodes2, sitelength,globtree.nodep, 0); allocx2(nonodes2, sitelength,oldtree.nodep, 0); restml_copy_(&curtree,&globtree); restml_copy_(&curtree,&oldtree); bestyet = curtree.likelihood; for ( i = spp ; i < nonodes2 ; i++ ) { num_sibs = count_sibs(curtree.nodep[i]); sib_ptr = curtree.nodep[i]; if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); for ( j = 0 ; j <= num_sibs ; j++ ) { restml_re_move(&sib_ptr,&where); restml_copy_(&curtree,&priortree); qwhere = where; if (where->tip) { restml_copy_(&oldtree,&curtree); restml_copy_(&oldtree,&bestree); sib_ptr=sib_ptr->next; continue; } else num_sibs2 = count_sibs(where); sib_ptr2 = where; for ( k = 0 ; k < num_sibs2 ; k++ ) { addwhere = NULL; addtraverse(sib_ptr,sib_ptr2->back,true); if ( !smoothit ) { if (succeeded && qwhere != where && qwhere != where->back) { insert_(sib_ptr,qwhere); smoothit = true; for (l = 1; l<=smoothings; l++) { smooth (where); smooth (where->back); } smoothit = false; success = true; restml_copy_(&curtree,&globtree); } restml_copy_(&priortree,&curtree); } else if ( addwhere && where != addwhere && where->back != addwhere && bestyet > globtree.likelihood) { restml_copy_(&bestree,&globtree); success = true; } sib_ptr2 = sib_ptr2->next; } restml_copy_(&oldtree,&curtree); restml_copy_(&oldtree,&bestree); sib_ptr = sib_ptr->next; } } restml_copy_(&globtree,&curtree); restml_copy_(&globtree,&bestree); if (success && globtree.likelihood > oldbestyet) { succeeded = true; } else { succeeded = false; } bestyet = globtree.likelihood; freex2(nonodes2,globtree.nodep); freex2(nonodes2,oldtree.nodep); freetrans(&globtree, nonodes2, sitelength); freetrans(&oldtree, nonodes2, sitelength); freetree2(globtree.nodep,nonodes2); freetree2(oldtree.nodep,nonodes2); } void printnode(node* p); void printnode(node* p) { if (p->back) printf("p->index = %3ld, p->back->index = %3ld, p->branchnum = %3ld,evaluates" " to %f\n",p->index,p->back->index,p->branchnum,evaluate(&curtree,p)); else printf("p->index = %3ld, p->back->index =none, p->branchnum = %3ld,evaluates" " to nothing\n",p->index,p->branchnum); } void printvals(void); void printvals(void) { int i; node* p; for ( i = 0 ; i < nextsp ; i++ ) { p = curtree.nodep[i]; printnode(p); } for ( i = spp ; i <= spp + nextsp - 3 ; i++ ) { p = curtree.nodep[i]; printnode(p); printnode(p->next); printnode(p->next->next); } } void rearrange(node *p, node *pp) { /* rearranges the tree locally */ long i; node *q; node *r; node *rnb; node *rnnb; if (p->tip) return; if (p->back->tip) { rearrange(p->next->back, p); rearrange(p->next->next->back, p); return; } else /* if !p->tip && !p->back->tip */ { /* evaluate(&curtree, curtree.start); bestyet = curtree.likelihood; */ if (p->back->next != pp) r = p->back->next; else r = p->back->next->next; if (smoothit) { /* Copy the whole tree, because we may change all lengths */ restml_copy_(&curtree, &bestree); restml_re_move(&r, &q); nuview(p->next); nuview(p->next->next); restml_copy_(&curtree, &priortree); addtraverse(r, p->next->back, false); addtraverse(r, p->next->next->back, false); restml_copy_(&bestree, &curtree); } else { /* Save node data and matrices, so we can undo */ rnb = r->next->back; rnnb = r->next->next->back; restml_copynode(r,lrsaves[0]); restml_copynode(r->next,lrsaves[1]); restml_copynode(r->next->next,lrsaves[2]); restml_copynode(p->next,lrsaves[3]); restml_copynode(p->next->next,lrsaves[4]); copymatrix (tempmatrix[2], curtree.trans[r->branchnum - 1]); copymatrix (tempmatrix[3], curtree.trans[r->next->branchnum - 1]); copymatrix (tempmatrix[4], curtree.trans[r->next->next->branchnum-1]); copymatrix (tempmatrix[5], curtree.trans[p->next->branchnum-1]); copymatrix (tempmatrix[6], curtree.trans[p->next->next->branchnum-1]); restml_re_move(&r, &q); nuview(p->next); nuview(p->next->next); qwhere = q; addtraverse(r, p->next->back, false); addtraverse(r, p->next->next->back, false); if (qwhere == q) { hookup(rnb,r->next); hookup(rnnb,r->next->next); restml_copynode(lrsaves[0],r); restml_copynode(lrsaves[1],r->next); restml_copynode(lrsaves[2],r->next->next); restml_copynode(lrsaves[3],p->next); restml_copynode(lrsaves[4],p->next->next); r->back->v = r->v; r->next->back->v = r->next->v; r->next->next->back->v = r->next->next->v; p->next->back->v = p->next->v; p->next->next->back->v = p->next->next->v; set_branchnum(r->back, r->branchnum); set_branchnum(r->next->back, r->next->branchnum); set_branchnum(p->next->back, p->next->branchnum); set_branchnum(p->next->next->back, p->next->next->branchnum); copymatrix (curtree.trans[r->branchnum-1], tempmatrix[2]); copymatrix (curtree.trans[r->next->branchnum-1], tempmatrix[3]); copymatrix (curtree.trans[r->next->next->branchnum-1], tempmatrix[4]); copymatrix (curtree.trans[p->next->branchnum-1], tempmatrix[5]); copymatrix (curtree.trans[p->next->next->branchnum-1], tempmatrix[6]); curtree.likelihood = bestyet; } else { smoothit = true; insert_(r, qwhere); for (i = 1; i<=smoothings; i++) { smooth (r); smooth (r->back); } smoothit = false; } } } } /* rearrange */ void restml_coordinates(node *p, double lengthsum, long *tipy, double *tipmax, double *x) { /* establishes coordinates of nodes */ node *q, *first, *last; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { (*x) = -0.75 * log(1.0 - 1.333333 * q->v); restml_coordinates(q->back, lengthsum + (*x),tipy,tipmax,x); q = q->next; } while ((p == curtree.start || p != q) && (p != curtree.start || p->next != q)); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if (p == curtree.start) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* restml_coordinates */ void restml_fprintree(FILE *fp) { /* prints out diagram of the tree */ long tipy,i; double scale, tipmax, x; putc('\n', fp); if (!treeprint) return; putc('\n', fp); tipy = 1; tipmax = 0.0; restml_coordinates(curtree.start, 0.0, &tipy,&tipmax,&x); scale = 1.0 / (tipmax + 1.000); for (i = 1; i <= tipy - down; i++) fdrawline2(fp, i, scale, &curtree); putc('\n', fp); } /* restml_fprintree */ void restml_printree(void) { restml_fprintree(outfile); } double sigma(node *q, double *sumlr) { /* get 1.95996 * approximate standard error of branch length */ double sump, sumr, sums, sumc, p, pover3, pijk, Qjk, liketerm, f; double slopef,curvef; long i, j, k, m1, m2; sitelike2 binom1, binom2; transmatrix Prob, slopeP, curveP; node *r; sitelike2 x1, x2; double term, TEMP; x1 = init_sitelike(sitelength); x2 = init_sitelike(sitelength); binom1 = init_sitelike(sitelength); binom2 = init_sitelike(sitelength); Prob = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); slopeP = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); curveP = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); for (i=0; i<=sitelength; ++i) { Prob[i] = (double *)Malloc((sitelength+1) * sizeof(double)); slopeP[i] = (double *)Malloc((sitelength+1) * sizeof(double)); curveP[i] = (double *)Malloc((sitelength+1) * sizeof(double)); } p = q->v; pover3 = p / 3; for (i = 0; i <= sitelength; i++) { binom1[0] = exp((sitelength - i) * log(1 - p)); for (k = 1; k <= (sitelength - i); k++) binom1[k] = binom1[k - 1] * (p / (1 - p)) * (sitelength - i - k + 1) / k; binom2[0] = exp(i * log(1 - pover3)); for (k = 1; k <= i; k++) binom2[k] = binom2[k - 1] * (pover3 / (1 - pover3)) * (i - k + 1) / k; for (j = 0; j <= sitelength; j++) { sump = 0.0; sums = 0.0; sumc = 0.0; if (i - j > 0) m1 = i - j; else m1 = 0; if (sitelength - j < i) m2 = sitelength - j; else m2 = i; for (k = m1; k <= m2; k++) { pijk = binom1[j - i + k] * binom2[k]; sump += pijk; term = (j-i+2*k)/p - (sitelength-j-k)/(1.0-p) - (i-k)/(3.0-p); sums += pijk * term; sumc += pijk * (term * term - (j-i+2*k)/(p*p) - (sitelength-j-k)/((1.0-p)*(1.0-p)) - (i-k)/((3.0-p)*(3.0-p)) ); } Prob[i][j] = sump; slopeP[i][j] = sums; curveP[i][j] = sumc; } } (*sumlr) = 0.0; sumc = 0.0; sums = 0.0; r = q->back; for (i = 1; i <= endsite; i++) { f = 0.0; slopef = 0.0; curvef = 0.0; sumr = 0.0; copy_sitelike(x1,q->x2[i],sitelength); copy_sitelike(x2,r->x2[i],sitelength); for (j = 0; j <= sitelength; j++) { liketerm = pie[j] * x1[j]; sumr += liketerm * x2[j]; for (k = 0; k <= sitelength; k++) { Qjk = liketerm * x2[k]; f += Qjk * Prob[j][k]; slopef += Qjk * slopeP[j][k]; curvef += Qjk * curveP[j][k]; } } (*sumlr) += weight[i] * log(f / sumr); sums += weight[i] * slopef / f; TEMP = slopef / f; sumc += weight[i] * (curvef / f - TEMP * TEMP); } if (trunc8) { f = 0.0; slopef = 0.0; curvef = 0.0; sumr = 0.0; copy_sitelike(x1,q->x2[0],sitelength); copy_sitelike(x2,r->x2[0],sitelength); for (j = 0; j <= sitelength; j++) { liketerm = pie[j] * x1[j]; sumr += liketerm * x2[j]; for (k = 0; k <= sitelength; k++) { Qjk = liketerm * x2[k]; f += Qjk * Prob[j][k]; slopef += Qjk * slopeP[j][k]; curvef += Qjk * curveP[j][k]; } } (*sumlr) += weightsum * log((1.0 - sumr) / (1.0 - f)); sums += weightsum * slopef / (1.0 - f); TEMP = slopef / (1.0 - f); sumc += weightsum * (curvef / (1.0 - f) + TEMP * TEMP); } for (i=0;i<=sitelength;++i){ free(Prob[i]); free(slopeP[i]); free(curveP[i]); } free(Prob); free(slopeP); free(curveP); free_sitelike(x1); free_sitelike(x2); free_sitelike(binom1); free_sitelike(binom2); if (sumc < -1.0e-6) return ((-sums - sqrt(sums * sums - 3.841 * sumc)) / sumc); else return -1.0; } /* sigma */ void fdescribe(FILE *fp, node *p) { /* print out information on one branch */ double sumlr; long i; node *q; double s; double realv; q = p->back; fprintf(fp, "%4ld ", q->index - spp); fprintf(fp, " "); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index - 1][i], fp); } else fprintf(fp, "%4ld ", p->index - spp); if (q->v >= 0.75) fprintf(fp, " infinity"); else { realv = -0.75 * log(1 - 4.0/3.0 * q->v); fprintf(fp, "%13.5f", realv); } if (p->iter) { s = sigma(q, &sumlr); if (s < 0.0) fprintf(fp, " ( zero, infinity)"); else { fprintf(fp, " ("); if (q->v - s <= 0.0) fprintf(fp, " zero"); else fprintf(fp, "%9.5f", -0.75 * log(1 - 1.333333 * (q->v - s))); putc(',', fp); if (q->v + s >= 0.75) fprintf(fp, " infinity"); else fprintf(fp, "%12.5f", -0.75 * log(1 - 1.333333 * (q->v + s))); putc(')', fp); } if (sumlr > 1.9205) fprintf(fp, " *"); if (sumlr > 2.995) putc('*', fp); } else fprintf(fp, " (not varied)"); putc('\n', fp); if (!p->tip) { for (q = p->next; q != p; q = q->next) fdescribe(fp, q->back); } } /* fdescribe */ void summarize() { /* print out information on branches of tree */ node *q; fprintf(outfile, "\nremember: "); if (outgropt) fprintf(outfile, "(although rooted by outgroup) "); fprintf(outfile, "this is an unrooted tree!\n\n"); fprintf(outfile, "Ln Likelihood = %11.5f\n\n", curtree.likelihood); fprintf(outfile, " \n"); fprintf(outfile, " Between And Length"); fprintf(outfile, " Approx. Confidence Limits\n"); fprintf(outfile, " ------- --- ------"); fprintf(outfile, " ------- ---------- ------\n"); for (q = curtree.start->next; q != curtree.start; q = q->next) fdescribe(outfile, q->back); fdescribe(outfile, curtree.start->back); fprintf(outfile, "\n * = significantly positive, P < 0.05\n"); fprintf(outfile, " ** = significantly positive, P < 0.01\n\n\n"); } /* summarize */ void restml_treeout(node *p) { /* write out file with representation of final tree */ long i, n, w; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { putc('(', outtree); col++; restml_treeout(p->next->back); putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } restml_treeout(p->next->next->back); if (p == curtree.start) { putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } restml_treeout(p->back); } putc(')', outtree); col++; } if (p->v >= 0.75) x = -1.0; else x = -0.75 * log(1 - 1.333333 * p->v); if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p == curtree.start) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.5f", (int)(w + 7), x); col += w + 8; } } /* restml_treeout */ static phenotype2 restml_pheno_new(long endsite, long sitelength) { phenotype2 ret; long k, l; endsite++; ret = (phenotype2)Malloc(endsite*sizeof(sitelike2)); for (k = 0; k < endsite; k++) { ret[k] = Malloc((sitelength + 1) * sizeof(double)); for (l = 0; l < sitelength; l++) ret[k][l] = 1.0; } return ret; } /* unused */ /* static void restml_pheno_delete(phenotype2 x2) { long k; for (k = 0; k < endsite+1; k++) free(x2[k]); free(x2); } */ void initrestmlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; (*p)->branchnum = 0; (*p)->x2 = restml_pheno_new(endsite, sitelength); nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); (*p)->x2 = restml_pheno_new(endsite, sitelength); (*p)->index = nodei; break; case tip: match_names_to_data (str, nodep, p, spp); break; case iter: (*p)->initialized = false; (*p)->v = initialv; (*p)->iter = true; if ((*p)->back != NULL){ (*p)->back->iter = true; (*p)->back->v = initialv; (*p)->back->initialized = false; } break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); (*p)->v = valyew / divisor; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; case hsnolength: break; default: /* cases hslength, treewt, unittrwt */ break; } } /* initrestmlnode */ static void restml_unroot(node* root, node** nodep, long nonodes) { node *p,*r,*q; double newl; long i; long numsibs; numsibs = count_sibs(root); if ( numsibs > 2 ) { q = root; r = root; while (!(q->next == root)) q = q->next; q->next = root->next; /* FIXME? for(i=0 ; i < endsite ; i++){ free(r->x[i]); r->x[i] = NULL; } free(r->x); r->x = NULL; */ chuck(&grbg, r); curtree.nodep[spp] = q; } else { /* Bifurcating root - remove entire root fork */ /* Join v on each side of root */ newl = root->next->v + root->next->next->v; root->next->back->v = newl; root->next->next->back->v = newl; /* Connect root's children */ hookup(root->next->back, root->next->next->back); /* Move nodep entries down one and set indices */ for ( i = spp; i < nonodes-1; i++ ) { p = nodep[i+1]; nodep[i] = p; nodep[i+1] = NULL; if ( nodep[i] == NULL ) /* This may happen in a multifurcating intree */ break; do { p->index = i+1; p = p->next; } while (p != nodep[i]); } /* Free protx arrays from old root */ /* for(i=0 ; i < endsite ; i++){ free(root->x[i]); free(root->next->x[i]); free(root->next->next->x[i]); root->x[i] = NULL; root->next->x[i] = NULL; root->next->next->x[i] = NULL; } free(root->x); free(root->next->x); free(root->next->next->x); */ chuck(&grbg,root->next->next); chuck(&grbg,root->next); chuck(&grbg,root); } } /* dnaml_unroot */ void inittravtree(tree* t,node *p) { /* traverse tree to set initialized and v to initial values */ node* q; if ( p->branchnum == 0) { set_branchnum(p, get_trans(t)); set_branchnum(p->back, p->branchnum); } p->initialized = false; p->back->initialized = false; if ( usertree && (!lengths || p->iter)) { branchtrans(p->branchnum, initialv); p->v = initialv; p->back->v = initialv; } else branchtrans(p->branchnum, p->v); if ( !p->tip ) { q = p->next; while ( q != p ) { inittravtree(t,q->back); q = q->next; } } } /* inittravtree */ double adjusted_v(double v) { return 3.0/4.0 * (1.0-exp(-4.0/3.0 * v)); } static void adjust_lengths_r(node *p) { node *q; p->v = adjusted_v(p->v); p->back->v = p->v; if (!p->tip) { for (q = p->next; q != p; q = q->next) adjust_lengths_r(q->back); } } void adjust_lengths(tree *t) { assert(t->start->back->tip); adjust_lengths_r(t->start); } void treevaluate() { /* find maximum likelihood branch lengths of user tree */ long i; if ( lengths) adjust_lengths(&curtree); nonodes2--; inittravtree(&curtree,curtree.start); inittravtree(&curtree,curtree.start->back); smoothit = true; for (i = 1; i <= smoothings * 4; i++) { smooth (curtree.start); smooth (curtree.start->back); } evaluate(&curtree, curtree.start); nonodes2++; } /* treevaluate */ void maketree() { /* construct and rearrange tree */ long i,j; long nextnode; char* treestr; if (usertree) { if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); l0gl = (double *) Malloc(shimotrees * sizeof(double)); l0gf = (double **) Malloc(shimotrees * sizeof(double *)); for (i=0; i < shimotrees; ++i) l0gf[i] = (double *)Malloc(endsite * sizeof(double)); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } which = 1; while (which <= numtrees) { treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); /* treeread2 (intree, &curtree.start, curtree.nodep, lengths, &trweight, &goteof, &haslengths, &spp,false,nonodes2); */ /* These initializations required each time through the loop since multiple trees require re-initialization */ nextnode = 0; goteof = false; treeread(&treestr, &curtree.start, NULL, &goteof, NULL, curtree.nodep, &nextnode, NULL, &grbg, initrestmlnode, false, nonodes2); restml_unroot(curtree.start, curtree.nodep, nonodes2); if ( outgropt ) curtree.start = curtree.nodep[outgrno - 1]->back; else curtree.start = curtree.nodep[0]->back; treevaluate(); restml_fprintree(outfile); summarize(); if (trout) { col = 0; restml_treeout(curtree.start); } clear_connections(&curtree,nonodes2); which++; } FClose(intree); if (numtrees > 1 && weightsum > 1 ) standev2(numtrees, maxwhich, 0, endsite-1, maxlogl, l0gl, l0gf, aliasweight, seed); } else { free_all_trans(&curtree); for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); if (progress) { printf("\nAdding species:\n"); writename(0, 3, enterorder); } nextsp = 3; smoothit = improve; buildsimpletree(&curtree); curtree.start = curtree.nodep[enterorder[0] - 1]->back; nextsp = 4; while (nextsp <= spp) { buildnewtip(enterorder[nextsp - 1], &curtree); /* bestyet = - nextsp*sites*sitelength*log(4.0); */ bestyet = -DBL_MAX; if (smoothit) restml_copy_(&curtree, &priortree); addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, curtree.nodep[enterorder[0]-1]->back, true); if (smoothit) restml_copy_(&bestree, &curtree); else { smoothit = true; insert_(curtree.nodep[enterorder[nextsp - 1] - 1]->back, qwhere); for (i = 1; i<=smoothings; i++) { smooth (curtree.start); smooth (curtree.start->back); } smoothit = false; /* bestyet = curtree.likelihood; */ } if (progress) writename(nextsp - 1, 1, enterorder); if (global && nextsp == spp) { if (progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = spp ; j < nonodes2 ; j++) if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('-'); putchar('!'); } } succeeded = true; while (succeeded) { succeeded = false; if (global && nextsp == spp) globrearrange(); else rearrange(curtree.start, curtree.start->back); } nextsp++; } if (global && progress) { putchar('\n'); fflush(stdout); } restml_copy_(&curtree, &bestree); if (njumble > 1) { if (jumb == 1) restml_copy_(&bestree, &bestree2); else if (bestree2.likelihood < bestree.likelihood) restml_copy_(&bestree, &bestree2); } if (jumb == njumble) { if (njumble > 1) restml_copy_(&bestree2, &curtree); curtree.start = curtree.nodep[outgrno - 1]->back; restml_fprintree(outfile); summarize(); if (trout) { col = 0; restml_treeout(curtree.start); } } } if ( jumb < njumble ) return; freex2(nonodes2, curtree.nodep); if (!usertree) { freex2(nonodes2, priortree.nodep); freex2(nonodes2, bestree.nodep); if (njumble > 1) freex2(nonodes2, bestree2.nodep); } else { free(l0gl); for (i=0;i 1) { fprintf(outfile, "Data set # %ld:\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } getinput(); if (ith == 1) firstset = false; for (jumb = 1; jumb <= njumble; jumb++) maketree(); } cleanup(); FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif ajPhyloStateDelarray(&phylostates); ajPhyloTreeDelarray(&phylotrees); ajPhyloPropDel(&phyloweights); ajFileClose(&embossoutfile); ajFileClose(&embossouttree); if(!usertree) { freetree(nonodes2, bestree.nodep); freetrans(&bestree, nonodes2, sitelength); freetree(nonodes2, curtree.nodep); freetrans(&curtree, nonodes2, sitelength); freetree(nonodes2, priortree.nodep); freetrans(&priortree, nonodes2, sitelength); if(njumble != 1) { freetree(nonodes2, bestree2.nodep); freetrans(&bestree2, nonodes2, sitelength); } } freerest(); free(pie); embExit(); return 0; } /* maximum likelihood phylogenies from restriction sites */ PHYLIPNEW-3.69.650/src/drawtree.c0000664000175000017500000023642611605067345013062 00000000000000 #ifdef OSX_CARBON #include #endif #include "phylip.h" #include "draw.h" /* Version 3.6. Copyright (c) 1986-2004 by the University of Washington and Written by Joseph Felsenstein and Christopher A. Meacham. Additional code written by Sean Lamont, Andrew Keefe, Hisashi Horino and Akiko Fuseki. Permission is granted to copy, distribute, and modify this program provided that (1) this copyright message is not removed and (2) no fee is charged for this program. */ #ifdef MAC char* about_message = "Drawtree unrooted tree plotting program\r" "PHYLIP version 3.6 (c) Copyright 1986-2004\r" "by The University of Washington.\r" "Written by Joseph Felsenstein and Christopher A. Meacham.\r" "Additional code written by Sean Lamont, Andrew Keefe, Hisashi Horino,\r" "Akiko Fuseki, Doug Buxton and Michal Palczewski.\r" "Permission is granted to copy, distribute and modify this program\r" "provided that\r" "(1) This copyright message is not removed and\r" "(2) no fee is charged for this program."; #endif #define GAP 0.5 #define MAXITERATIONS 100 #define MINIMUMCHANGE 0.0001 /* When 2 Nodes are on top of each other, this is the max. force that's allowed. */ #ifdef INFINITY #undef INFINITY #endif #define INFINITY (double) 9999999999.0 typedef enum {fixed, radial, along, middle} labelorient; FILE *plotfile; AjPFile embossplotfile; const char *pltfilename; long nextnode, strpwide, strpdeep, strptop, strpbottom, payge, numlines,hpresolution; double xmargin, ymargin, topoflabels, rightoflabels, leftoflabels, bottomoflabels, ark, maxx, maxy, minx, miny, scale, xscale, yscale, xoffset, yoffset, charht, xnow, ynow, xunitspercm, yunitspercm, xsize, ysize, xcorner, ycorner,labelheight, labelrotation, treeangle, expand, bscale, maxchange; boolean canbeplotted, preview, previewing, dotmatrix,haslengths, uselengths, regular, rotate, empty, rescaled, notfirst, improve, nbody, firstscreens, labelavoid; boolean pictbold,pictitalic,pictshadow,pictoutline; striptype stripe; plottertype plotter, oldplotter, previewer; growth grows; labelorient labeldirec; node *root, *where; pointarray nodep; fonttype font; enum { yes, no } penchange, oldpenchange; char ch,resopts; char *progname; AjPPhyloTree* phylotrees = NULL; long filesize; long strpdiv; double pagex,pagey,paperx,papery,hpmargin,vpmargin; double *textlength, *firstlet; double trweight; /* starting here, needed to make sccs version happy */ boolean goteof; node *grbg; winactiontype winaction; long maxNumOfIter; extern double pie; struct stackElem { /* This is actually equivalent to a reversed link list; pStackElemBack point toward the direction of the bottom of the stack */ struct stackElem *pStackElemBack; node *pNode; }; typedef struct stackElem stackElemType; #ifndef X_DISPLAY_MISSING String res[]= { "*.input: True", "*.menubar.orientation: horizontal", "*.menubar.borderWidth: 0", "*.drawing_area.background: #CCFFFF", "*.drawing_area.foreground: #000000", "*.menubar.right: ChainLeft", "*.menubar.bottom: ChainTop", "*.menubar.top: ChainTop", "*.menubar.left: ChainLeft", "*.drawing_area.fromVert: menubar", "*.drawing_area.top: ChainTop", "*.drawing_area.bottom: ChainBottom", "*.drawing_area.left: ChainLeft", "*.drawing_area.right: ChainRight", "*.dialog.label: " "Drawtree unrooted tree plotting program\\n" "PHYLIP version 3.6 (c) Copyright 1986-2004\\n" "by The University of Washington.\\n" "Written by Joseph Felsenstein and Christopher A. Meacham.\\n" "Additional code written by Sean Lamont, Andrew Keefe, Hisashi Horino,\\n" "Akiko Fuseki, Doug Buxton and Michal Palczewski.\\n" "Permission is granted to copy, distribute and modify this program\\n" "provided that\\n" "(1) This copyright message is not removed and\\n" "(2) no fee is charged for this program.", NULL }; #endif #ifndef OLDC /* function prototypes */ void emboss_getoptions(char *pgm, int argc, char *argv[]); void initdrawtreenode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void initialparms(void); char showparms(void); void getparms(char); void getwidth(node *); void plrtrans(node *, double, double, double); void coordtrav(node *, double *, double *); double angleof(double , double ); void polartrav(node *, double, double, double, double, double *, double *, double *, double *); void tilttrav(node *, double *, double *, double *, double *); void leftrightangle(node *, double, double); void improvtrav(node *); void force_1to1(node *, node *, double *, double *, double); void totalForceOnNode(node *, node *, double *, double *, double); double dotProduct(double, double, double, double ); double capedAngle(double); double angleBetVectors(double, double, double, double); double signOfMoment(double, double, double, double); double forcePerpendicularOnNode(node *, node *, double); void polarizeABranch(node *, double *, double *); void pushNodeToStack(stackElemType **, node *); void popNodeFromStack(stackElemType **, node **); double medianOfDistance(node *, boolean); void leftRightLimits(node *, double *, double *); void branchLRHelper(node *, node *, double *, double *); void branchLeftRightAngles(node *, double *, double *); void improveNodeAngle(node *, double); void improvtravn(node *); void coordimprov(double *, double *); void calculate(void); void rescale(void); void user_loop(void); void setup_environment(int argc, Char *argv[]); void polarize(node *p, double *xx, double *yy); double vCounterClkwiseU(double Xu, double Yu, double Xv, double Yv); /* function prototypes */ #endif void initdrawtreenode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ long i; boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; for (i=0;inayme[i] = '\0'; nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); (*p)->index = nodei; break; case tip: (*ntips)++; gnu(grbg, p); nodep[(*ntips) - 1] = *p; setupnode(*p, *ntips); (*p)->tip = true; (*p)->naymlength = len ; strncpy ((*p)->nayme, str, MAXNCH); break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); if (!minusread) (*p)->oldlen = valyew / divisor; else (*p)->oldlen = fabs(valyew/divisor); if ((*p)->oldlen < epsilon) (*p)->oldlen = epsilon; if ((*p)->back != NULL) (*p)->back->oldlen = (*p)->oldlen; break; case hsnolength: haslengths = false; break; default: /* cases hslength,iter,treewt,unitrwt */ break; /* should not occur */ } } /* initdrawtreenode */ void initialparms() { /* initialize parameters */ paperx = 20.6375; pagex = 20.6375; papery = 26.9875; pagey = 26.9875; strcpy(fontname,"Times-Roman"); plotrparms(spp); grows = vertical; treeangle = pi / 2.0; ark = 2 * pi; improve = true; nbody = false; regular = false; rescaled = true; bscale = 1.0; labeldirec = middle; xmargin = 0.08 * xsize; ymargin = 0.08 * ysize; labelrotation = 0.0; charht = 0.3333; /* these are set by emboss_getoptions */ /* // preview = true; // plotter = DEFPLOTTER; // previewer = DEFPREV; */ hpmargin = 0.02*pagex; vpmargin = 0.02*pagey; labelavoid = false; uselengths = haslengths; } /* initialparms */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { /* get from user the relevant parameters for the plotter and diagram */ int m, n; AjPStr plottercode = NULL; AjPStr getpreviewer = NULL; AjPStr labeldirection = NULL; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); n = (int)((pagex-hpmargin-0.01)/(paperx-hpmargin)+1.0); m = (int)((pagey-vpmargin-0.01)/(papery-vpmargin)+1.0); phylotrees = ajAcdGetTree("intreefile"); plottercode = ajAcdGetListSingle("plotter"); getplotter(ajStrGetCharFirst(plottercode)); labeldirection = ajAcdGetListSingle("labeldirection"); labeldirec = middle; if(ajStrMatchC(labeldirection, "a")) labeldirec = along; else if(ajStrMatchC(labeldirection, "f")) labeldirec = fixed; else if(ajStrMatchC(labeldirection, "r")) labeldirec = radial; else if(ajStrMatchC(labeldirection, "m")) labeldirec = middle; getpreviewer = ajAcdGetListSingle("previewer"); if(ajStrMatchC(getpreviewer, "n")) { preview = false; previewer = other; /* Added by Dan F. */ } else if(ajStrMatchC(getpreviewer, "i")) previewer = ibm; else if(ajStrMatchC(getpreviewer, "m")) previewer = mac; else if(ajStrMatchC(getpreviewer, "x")) previewer = xpreview; else if(ajStrMatchC(getpreviewer, "w")) previewer = winpreview; else if(ajStrMatchC(getpreviewer, "i")) previewer = tek; else if(ajStrMatchC(getpreviewer, "i")) previewer = decregis; else if(ajStrMatchC(getpreviewer, "o")) previewer = other; uselengths = ajAcdGetBoolean("lengths"); /* needed */ labelrotation = ajAcdGetFloat("labelrotation"); if(plotter==ray) { xmargin = ajAcdGetFloat("xrayshade"); ymargin = ajAcdGetFloat("yrayshade"); } else { xmargin = ajAcdGetFloat("xmargin"); ymargin = ajAcdGetFloat("ymargin"); } rescaled = ajAcdGetToggle("rescaled"); if(rescaled) bscale = ajAcdGetFloat("bscale"); m = ajAcdGetFloat("pagesheight"); n = ajAcdGetFloat("pageswidth"); paperx = ajAcdGetFloat("paperx"); papery = ajAcdGetFloat("papery"); hpmargin = ajAcdGetFloat("hpmargin"); vpmargin = ajAcdGetFloat("vpmargin"); pagex = ((double)n * (paperx-hpmargin)+hpmargin); pagey = ((double)m * (papery-vpmargin)+vpmargin); embossplotfile = ajAcdGetOutfile("plotfile"); emboss_openfile(embossplotfile, &plotfile, &pltfilename); } /* getparms */ char showparms() { long loopcount; char numtochange; Char ch,input[64]; double treea; char options[32]; strcpy(options,"#YN0OPVBLRAIDSMC"); if (strcmp(fontname,"Hershey") !=0 && (((plotter == pict || plotter == mac) && (((grows == vertical && labelrotation == 0.0) || (grows == horizontal && labelrotation == 90.0)))))) strcat(options,"Q"); if (plotter == lw || plotter == idraw || plotter == pict || plotter == mac) strcat(options,"F"); if (!improve) strcat(options,"G"); if (!firstscreens) clearit(); printf("\nUnrooted tree plotting program version %s\n", VERSION); putchar('\n'); printf("Here are the settings: \n\n"); printf(" 0 Screen type (IBM PC, ANSI)? %s\n", ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"); printf(" P Final plotting device: "); switch (plotter) { case lw: printf(" Postscript printer\n"); break; case pcl: printf(" HP Laserjet compatible printer (%d DPI)\n", (int) hpresolution); break; case epson: printf(" Epson dot-matrix printer\n"); break; case pcx: printf(" PCX file for PC Paintbrush drawing program (%s)\n", (resopts == 1) ? "EGA 640x350" : (resopts == 2) ? "VGA 800x600" : "VGA 1024x768"); break; case pict: printf(" Macintosh PICT file for drawing program\n"); break; case idraw: printf(" Idraw drawing program\n"); break; case fig: printf(" Xfig drawing program\n"); break; case hp: printf(" HPGL graphics language for HP plotters\n"); break; case bmp: printf(" MS-Windows Bitmap (%d by %d resolution)\n", (int)xsize,(int)ysize); break; case xbm: printf(" X Bitmap file format (%d by %d resolution)\n", (int)xsize,(int)ysize); break; case ibm: printf(" IBM PC graphics (CGA, EGA, or VGA)\n"); break; case tek: printf(" Tektronix graphics screen\n"); break; case decregis: printf(" DEC ReGIS graphics (VT240 or DECTerm)\n"); break; case houston: printf(" Houston Instruments plotter\n"); break; case toshiba: printf(" Toshiba 24-pin dot matrix printer\n"); break; case citoh: printf(" Imagewriter or C.Itoh/TEC/NEC 9-pin dot matrix printer\n"); break; case oki: printf(" old Okidata 9-pin dot matrix printer\n"); break; case ray: printf(" Rayshade ray-tracing program file format\n"); break; case pov: printf(" POV ray-tracing program file format\n"); break; case vrml: printf(" VRML, Virtual Reality Markup Language\n"); case mac: case gif: case other: break ; default: /* case xpreview not handled */ break; } printf(" V Previewing device: "); if (!preview) printf(" (none)\n"); else { switch (previewer) { case ibm: printf(" IBM PC graphics (CGA, EGA, or VGA)\n"); break; case xpreview: printf(" X Windows display\n"); break; case tek: printf(" Tektronix graphics screen\n"); break; case mac: printf(" Macintosh graphics screen\n"); break; case decregis: printf(" DEC ReGIS graphics (VT240 or DECTerm)\n"); break; case winpreview: printf(" MS Windows display\n"); break; case lw: case hp: case houston: case epson: case oki: case fig: case citoh: case toshiba: case pcx: case pcl: case pict: case ray: case pov: case bmp: case xbm: case gif: case idraw: case other: break ; default: /* case vrml not handled */ break; } } printf(" B Use branch lengths: "); if (haslengths) printf("%s\n",uselengths ? "Yes" : "No"); else printf("(no branch lengths available)\n"); printf(" L Angle of labels:"); if (labeldirec == fixed) { printf(" Fixed angle of"); if (labelrotation >= 10.0) printf("%6.1f", labelrotation); else if (labelrotation <= -10.0) printf("%7.1f", labelrotation); else if (labelrotation < 0.0) printf("%6.1f", labelrotation); else printf("%5.1f", labelrotation); printf(" degrees\n"); } else if (labeldirec == radial) printf(" Radial\n"); else if (labeldirec == middle) printf(" branch points to Middle of label\n"); else printf(" Along branches\n"); printf(" R Rotation of tree:"); treea = treeangle * 180 / pi; if (treea >= 100.0) printf("%7.1f\n", treea); else if (treea >= 10.0) printf("%6.1f\n", treea); else if (treea <= -100.0) printf("%8.1f\n", treea); else if (treea <= -10.0) printf("%7.1f\n", treea); else if (treea < 0.0) printf("%6.1f\n", treea); else printf("%5.1f\n", treea); printf(" A Angle of arc for tree:"); treea = 180 * ark / pi; if (treea >= 100.0) printf("%7.1f\n", treea); else if (treea >= 10.0) printf("%6.1f\n", treea); else if (treea <= -100.0) printf("%8.1f\n", treea); else if (treea <= -10.0) printf("%7.1f\n", treea); else if (treea < 0.0) printf("%6.1f\n", treea); else printf("%5.1f\n", treea); /* printf(" I Iterate to improve tree: %s\n", (improve ? "Yes" : "No")); */ printf(" I Iterate to improve tree: "); if (improve) { if (nbody) printf("n-Body algorithm\n"); else printf("Equal-Daylight algorithm\n"); } else printf("No\n"); if (improve) printf(" D Try to avoid label overlap? %s\n", (labelavoid? "Yes" : "No")); printf(" S Scale of branch length:"); if (rescaled) printf(" Automatically rescaled\n"); else printf(" Fixed:%6.2f cm per unit branch length\n", bscale); if (!improve) { printf(" G Regularize the angles: %s\n", (regular ? "Yes" : "No")); } printf(" C Relative character height:%8.4f\n", charht); if ((((plotter == pict || plotter == mac) && (((grows == vertical && labelrotation == 0.0) || (grows == horizontal && labelrotation == 90.0)))))) printf(" F Font: %s\n Q" " Pict Font Attributes: %s, %s, %s, %s\n", fontname, (pictbold ? "Bold" : "Medium"), (pictitalic ? "Italic" : "Regular"), (pictshadow ? "Shadowed": "Unshadowed"), (pictoutline ? "Outlined" : "Unoutlined")); else if (plotter == lw || plotter == idraw) printf(" F Font: %s\n",fontname); if (plotter == ray) { printf(" M Horizontal margins:%6.2f pixels\n", xmargin); printf(" M Vertical margins:%6.2f pixels\n", ymargin); } else { printf(" M Horizontal margins:%6.2f cm\n", xmargin); printf(" M Vertical margins:%6.2f cm\n", ymargin); } printf(" # Page size submenu: "); /* Add 0.5 to clear up truncation problems. */ if (((int) ((pagex / paperx) + 0.5) == 1) && ((int) ((pagey / papery) + 0.5) == 1)) /* If we're only using one page per tree, */ printf ("one page per tree\n") ; else printf ("%.0f by %.0f pages per tree\n", (pagey-vpmargin) / (papery-vpmargin), (pagex-hpmargin) / (paperx-hpmargin)) ; loopcount = 0; for (;;) { printf("\n Y to accept these or type the letter for one to change\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); uppercase(&input[0]); ch=input[0]; if (strchr(options,ch)) { numtochange = ch; break; } printf(" That letter is not one of the menu choices. Type\n"); countup(&loopcount, 100); } return numtochange; } /* showparms */ void getparms(char numtochange) { /* get from user the relevant parameters for the plotter and diagram */ long loopcount2; Char ch; boolean ok; char options[32]; char line[32]; char input[100]; int m, n; AjPStr plottercode = NULL; n = (int)((pagex-hpmargin-0.01)/(paperx-hpmargin)+1.0); m = (int)((pagey-vpmargin-0.01)/(papery-vpmargin)+1.0); strcpy(options,"YNOPVBLRAIDSMC"); if ((((plotter == pict || plotter == mac) && (((grows == vertical && labelrotation == 0.0) || (grows == horizontal && labelrotation == 90.0)))))) strcat(options,"Q"); if (plotter == lw || plotter == idraw) strcat(options,"F"); if (!improve) strcat(options,"G"); if (numtochange == '*') { do { printf(" Type the number of one that you want to change:\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(line); numtochange = line[0]; } while (strchr(options,numtochange)); } switch (numtochange) { case '0': initterminal(&ibmpc, &ansi); break; case 'P': plottercode = ajAcdGetListSingle("plotter"); getplotter(ajStrGetCharFirst(plottercode)); break; case 'V': getpreview(); break; case '#': loopcount2 = 0; for (;;){ clearit(); printf(" Page Specifications Submenu\n\n"); printf(" L Output size in pages: %.0f down by %.0f across\n", (pagey / papery), (pagex / paperx)); printf(" P Physical paper size: %1.5f by %1.5f cm\n",paperx,papery); printf(" O Overlap Region: %1.5f %1.5f cm\n",hpmargin,vpmargin); printf(" M main menu\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); ch = input[0]; uppercase(&ch); switch (ch){ case 'L': printf("Number of pages in height:\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); m = atoi(input); printf("Number of pages in width:\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); n = atoi(input); break; case 'P': printf("Paper Width (in cm):\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); paperx = atof(input); printf("Paper Height (in cm):\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); papery = atof(input); break; case 'O': printf("Horizontal Overlap (in cm):"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); hpmargin = atof(input); printf("Vertical Overlap (in cm):"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); vpmargin = atof(input); case 'M': break; default: printf("Please enter L, P, O , or M.\n"); break; } pagex = ((double)n * (paperx-hpmargin)+hpmargin); pagey = ((double)m * (papery-vpmargin)+vpmargin); if (ch == 'M') break; countup(&loopcount2, 20); } break; case 'B': if (haslengths) uselengths = !uselengths; else { printf("Cannot use lengths since not all of them exist\n"); uselengths = false; } break; case 'L': printf("\nDo you want labels to be Fixed angle, Radial, Along,"); printf(" or Middle?\n"); loopcount2 = 0; do { printf(" Type F, R, A, or M\n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%c%*[^\n]", &ch); (void)getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); countup(&loopcount2, 10); } while (ch != 'F' && ch != 'R' && ch != 'A' && ch != 'M'); switch (ch) { case 'A': labeldirec = along; break; case 'F': labeldirec = fixed; break; case 'R': labeldirec = radial; break; case 'M': labeldirec = middle; break; } if (labeldirec == fixed) { printf("Are the labels to be plotted vertically (90),\n"); printf(" horizontally (0), or downwards (-90) ?\n"); loopcount2 = 0; do { printf(" Choose an angle in degrees from 90 to -90: \n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &labelrotation); (void)getchar(); countup(&loopcount2, 10); } while ((labelrotation < -90.0 || labelrotation > 90.0) && labelrotation != -99.0); } break; case 'R': printf("\n At what angle is the tree to be plotted?\n"); loopcount2 = 0; do { printf(" Choose an angle in degrees from 360 to -360: \n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &treeangle); (void)getchar(); uppercase(&ch); countup(&loopcount2, 10); } while (treeangle < -360.0 && treeangle > 360.0); treeangle = treeangle * pi / 180; break; case 'A': printf(" How many degrees (up to 360) of arc\n"); printf(" should the tree occupy? (Currently it is %5.1f)\n", 180 * ark / pi); loopcount2 = 0; do { printf("Enter a number of degrees from 0 up to 360)\n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &ark); (void)getchar(); countup(&loopcount2, 10); } while (ark <= 0.0 || ark > 360.0); ark = ark * pi / 180; break; case 'I': if (nbody) { improve = false; nbody = false; } else { if (improve) nbody = true; else improve = true; } break; case 'D': labelavoid = !labelavoid; break; case 'S': rescaled = !rescaled; if (!rescaled) { printf("Centimeters per unit branch length?\n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &bscale); (void)getchar(); } break; case 'M': clearit(); printf("\nThe tree will be drawn to fit in a rectangle which has \n"); printf(" margins in the horizontal and vertical directions of:\n"); if (plotter == ray) { printf( "%6.2f pixels (horizontal margin) and%6.2f pixels (vertical margin)\n", xmargin, ymargin); } else { printf("%6.2f cm (horizontal margin) and%6.2f cm (vertical margin)\n", xmargin, ymargin); } loopcount2 = 0; do { printf(" New value (in cm) of horizontal margin?\n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &xmargin); (void)getchar(); ok = ((unsigned)xmargin < xsize / 2.0); if (!ok) printf(" Impossible value. Please retype it.\n"); countup(&loopcount2, 10); } while (!ok); loopcount2 = 0; do { printf(" New value (in cm) of vertical margin?\n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &ymargin); (void)getchar(); ok = ((unsigned)ymargin < ysize / 2.0); if (!ok) printf(" Impossible value. Please retype it.\n"); countup(&loopcount2, 10); } while (!ok); break; case 'C': printf("New value of character height?\n"); #ifdef WIN32 phyFillScreenColor(); #endif scanf("%lf%*[^\n]", &charht); (void)getchar(); break; case 'F': printf("Enter font name or \"Hershey\" for default font\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(fontname); break; case 'G': regular = !regular; break; case 'Q': clearit(); loopcount2 = 0; do { printf("Italic? (Y/N)\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); input[0] = toupper((int)input[0]); countup(&loopcount2, 10); } while (input[0] != 'Y' && input[0] != 'N'); pictitalic = (input[0] == 'Y'); loopcount2 = 0; do { printf("Bold? (Y/N)\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); input[0] = toupper((int)input[0]); countup(&loopcount2, 10); } while (input[0] != 'Y' && input[0] != 'N'); pictbold = (input[0] == 'Y'); loopcount2 = 0; do { printf("Shadow? (Y/N)\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); input[0] = toupper((int)input[0]); countup(&loopcount2, 10); } while (input[0] != 'Y' && input[0] != 'N'); pictshadow = (input[0] == 'Y'); loopcount2 = 0; do { printf("Outline? (Y/N)\n"); #ifdef WIN32 phyFillScreenColor(); #endif getstryng(input); input[0] = toupper((int)input[0]); countup(&loopcount2, 10); } while (input[0] != 'Y' && input[0] != 'N'); pictoutline = (input[0] == 'Y'); break; } } /* getparms */ void getwidth(node *p) { /* get width and depth beyond each node */ double nw, nd; node *pp, *qq; nd = 0.0; if (p->tip) nw = 1.0; else { nw = 0.0; qq = p; pp = p->next; do { getwidth(pp->back); nw += pp->back->width; if (pp->back->depth > nd) nd = pp->back->depth; pp = pp->next; } while (((p != root) && (pp != qq)) || ((p == root) && (pp != p->next))); } p->depth = nd + p->length; p->width = nw; } /* getwidth */ void plrtrans(node *p, double theta, double lower, double upper) { /* polar coordinates of a node relative to start */ long num; double nn, pr, ptheta, angle, angle2, subangle, len; node *pp, *qq; nn = p->width; subangle = (upper - lower) / nn; qq = p; pp = p->next; if (p->tip) return; angle = upper; do { angle -= pp->back->width / 2.0 * subangle; pr = p->r; ptheta = p->theta; if (regular) { num = 1; while (num * subangle < 2 * pi) num *= 2; if (angle >= 0.0) angle2 = 2 * pi / num * (long)(num * angle / (2 * pi) + 0.5); else angle2 = 2 * pi / num * (long)(num * angle / (2 * pi) - 0.5); } else angle2 = angle; if (uselengths) len = fabs(pp->back->oldlen); else len = 1.0; pp->back->r = sqrt(len * len + pr * pr + 2 * len * pr * cos(angle2 - ptheta)); if (fabs(pr * cos(ptheta) + len * cos(angle2)) > epsilon) pp->back->theta = atan((pr * sin(ptheta) + len * sin(angle2)) / (pr * cos(ptheta) + len * cos(angle2))); else if (pr * sin(ptheta) + len * sin(angle2) >= 0.0) pp->back->theta = pi / 2; else pp->back->theta = 1.5 * pi; if (pr * cos(ptheta) + len * cos(angle2) < -epsilon) pp->back->theta += pi; if (!pp->back->tip) plrtrans(pp->back, pp->back->theta, angle - pp->back->width * subangle / 2.0, angle + pp->back->width * subangle / 2.0); else pp->back->oldtheta = angle2; angle -= pp->back->width / 2.0 * subangle; pp = pp->next; } while (((p != root) && (pp != qq)) || ((p == root) && (pp != p->next))); } /* plrtrans */ void coordtrav(node *p, double *xx, double *yy) { /* compute x and y coordinates */ node *pp; if (!p->tip) { pp = p->next; while (pp != p) { coordtrav(pp->back, xx, yy); pp = pp->next; if (p == root) coordtrav(p->back, xx, yy); } } (*xx) = p->r * cos(p->theta); (*yy) = p->r * sin(p->theta); if ((*xx) > maxx) maxx = (*xx); if ((*xx) < minx) minx = (*xx); if ((*yy) > maxy) maxy = (*yy); if ((*yy) < miny) miny = (*yy); p->xcoord = (*xx); p->ycoord = (*yy); } /* coordtrav */ double angleof(double x, double y) { /* compute the angle of a vector */ double theta; if (fabs(x) > epsilon) theta = atan(y / x); else if (y >= 0.0) theta = pi / 2; else theta = 1.5 * pi; if (x < -epsilon) theta = pi + theta; while (theta > 2 * pi) theta -= 2 * pi; while (theta < 0.0) theta += 2 * pi; return theta; } /* angleof */ void polartrav(node *p, double xx, double yy, double firstx, double firsty, double *leftx, double *lefty, double *rightx, double *righty) { /* go through subtree getting left and right vectors */ double x, y, xxx, yyy, labangle = 0; boolean lookatit; node *pp; lookatit = true; if (!p->tip) lookatit = (p->next->next != p || p->index != root->index); if (lookatit) { x = nodep[p->index - 1]->xcoord; y = nodep[p->index - 1]->ycoord; if (p->tip) { if (labeldirec == fixed) { labangle = pi * labelrotation / 180.0; if (cos(p->oldtheta) < 0.0) labangle = labangle - pi; } if (labeldirec == radial) labangle = p->theta; else if (labeldirec == along) labangle = p->oldtheta; else if (labeldirec == middle) labangle = 0.0; xxx = x; yyy = y; if (labelavoid) { if (labeldirec == middle) { xxx += GAP * labelheight * cos(p->oldtheta); yyy += GAP * labelheight * sin(p->oldtheta); xxx += labelheight * cos(labangle) * textlength[p->index - 1]; if (textlength[p->index - 1] * sin(p->oldtheta) < 1.0) xxx += labelheight * cos(labangle) * textlength[p->index - 1]; else xxx += 0.5 * labelheight * cos(labangle) * textlength[p->index - 1]; yyy += labelheight * sin(labangle) * textlength[p->index - 1]; } else { xxx += GAP * labelheight * cos(p->oldtheta); yyy += GAP * labelheight * sin(p->oldtheta); xxx -= labelheight * cos(labangle) * 0.5 * firstlet[p->index - 1]; yyy -= labelheight * sin(labangle) * 0.5 * firstlet[p->index - 1]; xxx += labelheight * cos(labangle) * textlength[p->index - 1]; yyy += labelheight * sin(labangle) * textlength[p->index - 1]; } } if ((yyy - yy) * firstx - (xxx - xx) * firsty < 0.0) { if ((yyy - yy) * (*rightx) - (xxx - xx) * (*righty) < 0.0) { (*rightx) = xxx - xx; (*righty) = yyy - yy; } } if ((yyy - yy) * firstx - (xxx - xx) * firsty > 0.0) { if ((yyy - yy) * (*leftx) - (xxx - xx) * (*lefty) > 0.0) { (*leftx) = xxx - xx; (*lefty) = yyy - yy; } } } if ((y - yy) * firstx - (x - xx) * firsty < 0.0) { if ((y - yy) * (*rightx) - (x - xx) * (*righty) < 0.0) { (*rightx) = x - xx; (*righty) = y - yy; } } if ((y - yy) * firstx - (x - xx) * firsty > 0.0) { if ((y - yy) * (*leftx) - (x - xx) * (*lefty) > 0.0) { (*leftx) = x - xx; (*lefty) = y - yy; } } } if (p->tip) return; pp = p->next; while (pp != p) { if (pp != NULL) polartrav(pp->back,xx,yy,firstx,firsty,leftx,lefty,rightx,righty); pp = pp->next; } } /* polartrav */ void tilttrav(node *q, double *xx, double *yy, double *sinphi, double *cosphi) { /* traverse to move successive nodes */ double x, y; node *pp; pp = nodep[q->index - 1]; x = pp->xcoord; y = pp->ycoord; pp->xcoord = (*xx) + (x - (*xx)) * (*cosphi) + ((*yy) - y) * (*sinphi); pp->ycoord = (*yy) + (x - (*xx)) * (*sinphi) + (y - (*yy)) * (*cosphi); if (q->tip) return; pp = q->next; while (pp != q) { /* if (pp != root) */ if (pp->back != NULL) tilttrav(pp->back,xx,yy,sinphi,cosphi); pp = pp->next; } } /* tilttrav */ void polarize(node *p, double *xx, double *yy) { double TEMP, TEMP1; if (fabs(p->xcoord - (*xx)) > epsilon) p->oldtheta = atan((p->ycoord - (*yy)) / (p->xcoord - (*xx))); else if (p->ycoord - (*yy) > epsilon) p->oldtheta = pi / 2; if (p->xcoord - (*xx) < -epsilon) p->oldtheta += pi; if (fabs(p->xcoord - root->xcoord) > epsilon) p->theta = atan((p->ycoord - root->ycoord) / (p->xcoord - root->xcoord)); else if (p->ycoord - root->ycoord > 0.0) p->theta = pi / 2; else p->theta = 1.5 * pi; if (p->xcoord - root->xcoord < -epsilon) p->theta += pi; TEMP = p->xcoord - root->xcoord; TEMP1 = p->ycoord - root->ycoord; p->r = sqrt(TEMP * TEMP + TEMP1 * TEMP1); } /* polarize */ void leftrightangle(node *p, double xx, double yy) { /* get leftmost and rightmost angle of subtree, put them in node p */ double firstx, firsty, leftx, lefty, rightx, righty; double langle, rangle; firstx = nodep[p->back->index-1]->xcoord - xx; firsty = nodep[p->back->index-1]->ycoord - yy; leftx = firstx; lefty = firsty; rightx = firstx; righty = firsty; if (p->back != NULL) polartrav(p->back,xx,yy,firstx,firsty,&leftx,&lefty,&rightx,&righty); if ((fabs(leftx) < epsilon) && (fabs(lefty) < epsilon)) langle = p->back->oldtheta; else langle = angleof(leftx, lefty); if ((fabs(rightx) < epsilon) && (fabs(righty) < epsilon)) rangle = p->back->oldtheta; else rangle = angleof(rightx, righty); while (langle - rangle > 2*pi) langle -= 2 * pi; while (rangle > langle) { if (rangle > 2*pi) rangle -= 2 * pi; else langle += 2 * pi; } while (langle > 2*pi) { rangle -= 2 * pi; langle -= 2 * pi; } p->lefttheta = langle; p->righttheta = rangle; } /* leftrightangle */ void improvtrav(node *p) { /* traverse tree trying different tiltings at each node */ double xx, yy, cosphi, sinphi; double langle, rangle, sumrot, olddiff; node *pp, *qq, *ppp;; if (p->tip) return; xx = p->xcoord; yy = p->ycoord; pp = p->next; do { leftrightangle(pp, xx, yy); pp = pp->next; } while ((pp != p->next)); if (p == root) { pp = p->next; do { qq = pp; pp = pp->next; } while (pp != root); p->righttheta = qq->righttheta; p->lefttheta = p->next->lefttheta; } qq = p; pp = p->next; ppp = p->next->next; do { langle = qq->righttheta - pp->lefttheta; rangle = pp->righttheta - ppp->lefttheta; while (langle > pi) langle -= 2*pi; while (langle < -pi) langle += 2*pi; while (rangle > pi) rangle -= 2*pi; while (rangle < -pi) rangle += 2*pi; olddiff = fabs(langle-rangle); sumrot = (langle - rangle) /2.0; if (sumrot > langle) sumrot = langle; if (sumrot < -rangle) sumrot = -rangle; cosphi = cos(sumrot); sinphi = sin(sumrot); if (p != root) { if (fabs(sumrot) > maxchange) maxchange = fabs(sumrot); pp->back->oldtheta += sumrot; tilttrav(pp->back,&xx,&yy,&sinphi,&cosphi); polarize(pp->back,&xx,&yy); leftrightangle(pp, xx, yy); langle = qq->righttheta - pp->lefttheta; rangle = pp->righttheta - ppp->lefttheta; while (langle > pi) langle -= 2*pi; while (langle < -pi) langle += 2*pi; while (rangle > pi) rangle -= 2*pi; while (rangle < -pi) rangle += 2*pi; while ((fabs(langle-rangle) > olddiff) && (fabs(sumrot) > 0.01)) { sumrot = sumrot /2.0; cosphi = cos(-sumrot); sinphi = sin(-sumrot); pp->back->oldtheta -= sumrot; tilttrav(pp->back,&xx,&yy,&sinphi,&cosphi); polarize(pp->back,&xx,&yy); leftrightangle(pp, xx, yy); langle = qq->righttheta - pp->lefttheta; rangle = pp->righttheta - ppp->lefttheta; if (langle > pi) langle -= 2*pi; if (langle < -pi) langle += 2*pi; if (rangle > pi) rangle -= 2*pi; if (rangle < -pi) rangle += 2*pi; } } qq = pp; pp = pp->next; ppp = ppp->next; } while (((p == root) && (pp != p->next)) || ((p != root) && (pp != p))); pp = p->next; do { improvtrav(pp->back); pp = pp->next; } while (((p == root) && (pp != p->next)) || ((p != root) && (pp != p))); } /* improvtrav */ void force_1to1(node *pFromSubNode, node *pToSubNode, double *pForce, double *pAngle, double medianDistance) { /* calculate force acting between 2 nodes and return the force in pForce. Remember to pass the index subnodes to this function if needed. Force should always be positive for repelling. Angle changes to indicate the direction of the force. The value of INFINITY is the cap to the value of Force. There might have problem (error msg.) if pFromSubNode and pToSubNode are the same node or the coordinates are identical even with double precision. */ double distanceX, distanceY, distance, norminalDistance; distanceX = pFromSubNode->xcoord - pToSubNode->xcoord; distanceY = pFromSubNode->ycoord - pToSubNode->ycoord; distance = sqrt( distanceX*distanceX + distanceY*distanceY ); norminalDistance = distance/medianDistance; if (norminalDistance < epsilon) { *pForce = INFINITY; } else { *pForce = (double)1 / (norminalDistance * norminalDistance); if (*pForce > INFINITY) *pForce = INFINITY; } *pAngle = computeAngle(pFromSubNode->xcoord, pFromSubNode->ycoord, pToSubNode->xcoord, pToSubNode->ycoord); return; } /* force_1to1 */ void totalForceOnNode(node *pPivotSubNode, node *pToSubNode, double *pTotalForce, double *pAngle, double medianDistance) { /* pToSubNode is where all the relevent nodes apply forces to. All branches are visited except the branch contains pToSubNode. pToSubNode must be one of the branch out of the current Node (= out of one of the subnode in the current subnodes set.) Most likely pPivotSubNode is not the index subNode! In any case, only the leafs are consider in repelling force; so, no worry about index subNode. pTotalForce and pAngle must be set to 0 before calling this function for the first time, or the result will be invalid. pPivotSubNode is named for external interface. When calling totalForceOnNode() recursively, pPivotSubNode should be thought of as pFromSubNode. */ node *pSubNode; double force, angle, forceX, forceY, prevForceX, prevForceY; pSubNode = pPivotSubNode; /* visit the rest of the branches of current node; the branch attaches to the current subNode may be visited in the code down below. */ while (pSubNode->next != NULL && pSubNode->next != pPivotSubNode) { pSubNode = pSubNode->next; if ( pSubNode->back != NULL && pSubNode->back != pToSubNode) totalForceOnNode(pSubNode->back, pToSubNode, pTotalForce, pAngle, medianDistance); } /* visit this branch; You need to visit it for the first time - at root only! * * Modified so that all nodes are visited and calculated forces, instead of * just the leafs only. * use pPivotSubNode instead of pSubNode here because pSubNode stop short * just before pPivotSubNode (the entry node) */ if ( pPivotSubNode == root && pPivotSubNode->back != NULL && pPivotSubNode->back != pToSubNode) totalForceOnNode(pPivotSubNode->back, pToSubNode, pTotalForce, pAngle, medianDistance); /* Break down the previous sum of forces to components form */ prevForceX = *pTotalForce * cos(*pAngle); prevForceY = *pTotalForce * sin(*pAngle); force_1to1(nodep[pPivotSubNode->index-1], pToSubNode, &force, &angle, medianDistance); /* force between 2 nodes */ forceX = force * cos(angle); forceY = force * sin(angle); /* Combined force */ forceX = forceX + prevForceX; forceY = forceY + prevForceY; /* Write to output parameters */ *pTotalForce = sqrt( forceX*forceX + forceY*forceY ); *pAngle = computeAngle((double)0, (double)0, forceX, forceY); return; } /* totalForceOnNode */ double dotProduct(double Xu, double Yu, double Xv, double Yv) { return Xu * Xv + Yu * Yv; } /* dotProduct */ double capedAngle(double angle) { /* Return the equivalent value of angle that is within 0 to 2*pie */ while (angle < 0 || angle >= 2*pie) { if(angle < 0) { angle = angle + 2*pie; } else if (angle >= 2*pie) { angle = angle - 2*pie; } } return angle; } /* capedAngle */ double angleBetVectors(double Xu, double Yu, double Xv, double Yv) { /* Calculate angle between 2 vectors; use capedAngle() if needed to get the equivalent angle in positive value. - I guess it is never necessary. Use vCounterClkwiseU() to get the relative position of the vectors. */ double dotProd, cosTheta, theta, lengthsProd; /*double angleU, angleV, angleUToV;*/ dotProd = dotProduct(Xu, Yu, Xv, Yv); lengthsProd = sqrt(Xu*Xu+Yu*Yu) * sqrt(Xv*Xv+Yv*Yv); if (lengthsProd < epsilon) { printf("ERROR: drawtree - division by zero in angleBetVectors()!\n"); printf("Xu %f Yu %f Xv %f Yv %f\n", Xu, Yu, Xv, Yv); embExitBad(); } cosTheta = dotProd / lengthsProd; if (cosTheta > 1 || cosTheta < -1) { printf("ERROR: drawtree - acos of an invalid value in angleBetVectors()!\n"); embExitBad(); } theta = acos(cosTheta); if (theta < 0) { printf("ERROR: theta not supposed to be negative in angleBetVectors()!\n"); printf("theta = %f\n", theta); embExitBad(); } return theta; } /* angleBetVectors */ double signOfMoment(double xReferenceVector, double yReferenceVector, double xForce, double yForce) { /* it return the sign of the moment caused by the force, applied to the tip of the refereceVector; the root of the refereceVector is the pivot. */ double angleReference, angleForce, sign; angleReference = computeAngle((double)0, (double)0, xReferenceVector, yReferenceVector); angleForce = computeAngle((double)0, (double)0, xForce, yForce); angleForce = capedAngle(angleForce); angleReference = capedAngle(angleReference); /* reduce angleReference to 0 */ angleForce = angleForce - angleReference; angleForce = capedAngle(angleForce); if (angleForce > 0 && angleForce < pie) { /* positive sign - force pointing toward the left of the reference line/vector. */ sign = 1; } else { /* negative sign */ sign = -1; } return sign; } /* signOfMoment */ double vCounterClkwiseU(double Xu, double Yu, double Xv, double Yv) { /* Return 1 if vector v is counter clockwise from u */ /* signOfMoment() is doing just that! */ return signOfMoment(Xu, Yu, Xv, Yv); } /* vCounterClkwiseU */ double forcePerpendicularOnNode(node *pPivotSubNode, node *pToSubNode, double medianDistance) { /* Read comment for totalForceOnNode */ /* It supposed to return a positive value to indicate that it has a positive moment; and negative return value to indicate negative moment. force perpendicular at norminal distance 1 is taken to be 1. medianDistance is the median of Distances in this graph. */ /* / Force / | ToNode o > alpha | \ yDelta | \ theta = pie/2 + alpha | beta = vector (or angle) from Pivot to ToNode Pivot o----------- xDelta alpha = theta + beta */ double totalForce, forceAngle, xDelta, yDelta; double alpha, theta, forcePerpendicular, sinForceAngle, cosForceAngle; totalForce = (double)0; forceAngle = (double)0; totalForceOnNode(pPivotSubNode, pToSubNode, &totalForce, &forceAngle, medianDistance); xDelta = nodep[pToSubNode->index-1]->xcoord - nodep[pPivotSubNode->index-1]->xcoord; yDelta = nodep[pToSubNode->index-1]->ycoord - nodep[pPivotSubNode->index-1]->ycoord; /* Try to avoid the case where 2 nodes are on top of each other. */ /* if (xDelta < 0) tempx = -xDelta; else tempx = xDelta; if (yDelta < 0) tempy = -yDelta; else tempy = yDelta; if (tempx < epsilon && tempy < epsilon) { return; } */ sinForceAngle = sin(forceAngle); cosForceAngle = cos(forceAngle); theta = angleBetVectors(xDelta, yDelta, cosForceAngle, sinForceAngle); if (theta > pie/2) { alpha = theta - pie/2; } else { alpha = pie/2 - theta; } forcePerpendicular = totalForce * cos(alpha); if (forcePerpendicular < -epsilon) { printf("ERROR: drawtree - forcePerpendicular applied at an angle should" " not be less than zero (in forcePerpendicularOnNode()). \n"); printf("alpha = %f\n", alpha); embExitBad(); } /* correct the sign of the moment */ forcePerpendicular = signOfMoment(xDelta, yDelta, cosForceAngle, sinForceAngle) * forcePerpendicular; return forcePerpendicular; } /* forcePerpendicularOnNode */ void polarizeABranch(node *pStartingSubNode, double *xx, double *yy) { /* added - danieyek 990128 */ /* After calling tilttrav(), if you don't polarize all the nodes on the branch to convert the x-y coordinates to theta and radius, you won't get result on the plot! This function takes a subnode and branch out of all other subnode except the starting subnode (where the parent is), thus converting the x-y to polar coordinates for the branch only. xx and yy are purely "inherited" features of polarize(). They should have been passed as values not addresses. */ node *pSubNode; pSubNode = pStartingSubNode; /* convert the current node (note: not subnode) to polar coordinates. */ polarize( nodep[pStartingSubNode->index - 1], xx, yy); /* visit the rest of the branches of current node */ while (pSubNode->next != NULL && pSubNode->next != pStartingSubNode) { pSubNode = pSubNode->next; if ( pSubNode->tip != true ) polarizeABranch(pSubNode->back, xx, yy); } return; } /* polarizeABranch */ void pushNodeToStack(stackElemType **ppStackTop, node *pNode) { /* added - danieyek 990204 */ /* pStackTop must be the current top element of the stack, where we add another element on top of it. ppStackTop must be the location where we can find pStackTop. This function "returns" the revised top (element) of the stack through the output parameter, ppStackTop. The last element on the stack has the "back" (pStackElemBack) pointer set to NULL. So, when the last element is poped, ppStackTop will be automatically set to NULL. If popNodeFromStack() is called with ppStackTop = NULL, we assume that it is the error caused by over popping the stack. */ stackElemType *pStackElem; if (ppStackTop == NULL) { /* NULL can be stored in the location, but the location itself can't be NULL! */ printf("ERROR: drawtree - error using pushNodeToStack(); " "ppStackTop is NULL.\n"); embExitBad(); } pStackElem = (stackElemType*)Malloc( sizeof(stackElemType) ); pStackElem->pStackElemBack = *ppStackTop; /* push an element onto the stack */ pStackElem->pNode = pNode; *ppStackTop = pStackElem; return; } /* pushNodeToStack */ void popNodeFromStack(stackElemType **ppStackTop, node **ppNode) { /* added - danieyek 990205 */ /* pStackTop must be the current top element of the stack, where we pop an element from the top of it. ppStackTop must be the location where we can find pStackTop. This function "returns" the revised top (element) of the stack through the output parameter, ppStackTop. The last element on the stack has the "back" (pStackElemBack) pointer set to NULL. So, when the last element is poped, ppStackTop will be automatically set to NULL. If popNodeFromStack() is called with ppStackTop = NULL, we assume that it is the error caused by over popping the stack. */ stackElemType *pStackT; if (ppStackTop == NULL) { printf("ERROR: drawtree - a call to pop while the stack is empty.\n"); embExitBad(); } pStackT = *ppStackTop; *ppStackTop = pStackT->pStackElemBack; *ppNode = pStackT->pNode; free(pStackT); return; } /* popNodeFromStack */ double medianOfDistance(node *pRootSubNode, boolean firstRecursiveCallP) { /* added - danieyek 990208 */ /* Find the median of the distance; used to compute the angle to rotate in proportion to the size of the graph and forces. It is assumed that pRootSubNode is also the pivot (during the first call to this function) - the center, with respect to which node the distances are calculated. If there are only 3 element, element #2 is returned, ie. (2+1)/2. This function now finds the median of distances of all nodes, not only the leafs! */ node *pSubNode; double xDelta, yDelta, distance; long i, j; struct dblLinkNode { double value; /* Implement reverse Linked List */ struct dblLinkNode *pBack; } *pLink, *pBackElem, *pMidElem, *pFrontElem, junkLink; /* must use static to retain values over calls */ static node *pReferenceNode; static long count; static struct dblLinkNode *pFrontOfLinkedList; /* Remember the reference node so that it doesn't have to be passed arround in the function parameter. */ if (firstRecursiveCallP == true) { pReferenceNode = pRootSubNode; pFrontOfLinkedList = NULL; count = 0; } pSubNode = pRootSubNode; /* visit the rest of the branches of current node; the branch attaches to the current subNode may be visited in the code further down below. */ while (pSubNode->next != NULL && pSubNode->next != pRootSubNode) { pSubNode = pSubNode->next; if ( pSubNode->back != NULL) medianOfDistance(pSubNode->back, false); } /* visit this branch; You need to visit it for the first time - at root only! use pRootSubNode instead of pSubNode here because pSubNode stop short just before pRootSubNode (the entry node) */ if ( firstRecursiveCallP == true && pRootSubNode->back != NULL) medianOfDistance(pRootSubNode->back, false); /* Why only leafs count? Modifying it! */ xDelta = nodep[pSubNode->index-1]->xcoord - nodep[pReferenceNode->index-1]->xcoord; yDelta = nodep[pSubNode->index-1]->ycoord - nodep[pReferenceNode->index-1]->ycoord; distance = sqrt( xDelta*xDelta + yDelta*yDelta ); /* Similar to pushing onto the stack */ pLink = (struct dblLinkNode*) Malloc( sizeof(struct dblLinkNode) ); if (pLink == NULL) { printf("Fatal ERROR: drawtree - Insufficient Memory in" " medianOfDistance()!\n"); embExitBad(); } pLink->value = distance; pLink->pBack = pFrontOfLinkedList; pFrontOfLinkedList = pLink; count = count + 1; if (firstRecursiveCallP == true) { if (count == 0) { return (double)0; } else if (count == 1) { distance = pFrontOfLinkedList->value; free(pFrontOfLinkedList); return distance; } else if (count == 2) { distance = (pFrontOfLinkedList->value + pFrontOfLinkedList->pBack->value)/(double)2; free(pFrontOfLinkedList->pBack); free(pFrontOfLinkedList); return distance; } else { junkLink.pBack = pFrontOfLinkedList; /* SORT first - use bubble sort; we start with at least 3 elements here. */ /* We are matching backward when sorting the list and comparing MidElem and BackElem along the path; junkLink is there just to make a symmetric operation at the front end. */ for (j = 0; j < count - 1; j++) { pFrontElem = &junkLink; pMidElem = junkLink.pBack; pBackElem = junkLink.pBack->pBack; for (i = j; i < count - 1; i++) { if(pMidElem->value < pBackElem->value) { /* Swap - carry the smaller value to the root of the linked list. */ pMidElem->pBack = pBackElem->pBack; pBackElem->pBack = pMidElem; pFrontElem->pBack = pBackElem; /* Correct the order of pFrontElem, pMidElem, pBackElem and match one step */ pFrontElem = pBackElem; pBackElem = pMidElem->pBack; } else { pFrontElem = pMidElem; pMidElem = pBackElem; pBackElem = pBackElem->pBack; } } pFrontOfLinkedList = junkLink.pBack; } /* Sorted; now get the middle element. */ for (i = 1; i < (count + 1)/(long) 2; i++) { /* Similar to Poping the stack */ pLink = pFrontOfLinkedList; pFrontOfLinkedList = pLink->pBack; free(pLink); } /* Get the return value!! - only the last return value is the valid one. */ distance = pFrontOfLinkedList->value; /* Continue from the same i value left off by the previous for loop! */ for (; i <= count; i++) { /* Similar to Poping the stack */ pLink = pFrontOfLinkedList; pFrontOfLinkedList = pLink->pBack; free(pLink); } } } return distance; } /* medianOfDistance */ void leftRightLimits(node *pToSubNode, double *pLeftLimit, double *pRightLimit) /* As usual, pToSubNode->back is the angle leftLimit is the max angle you can rotate on the left and rightLimit vice versa. *pLeftLimit and *pRightLimit must be initialized to 0; without initialization, it would introduce bitter bugs into the program; they are initialized in this routine. */ { /* pPivotNode is nodep[pToSubNode->back->index-1], not pPivotSubNode which is just pToSubNode->back! */ node *pLeftSubNode, *pRightSubNode, *pPivotNode, *pSubNode; double leftLimit, rightLimit, xToNodeVector, yToNodeVector, xLeftVector, yLeftVector, xRightVector, yRightVector, lengthsProd; *pLeftLimit = 0; *pRightLimit = 0; /* Make an assumption first - guess "pToSubNode->back->next" is the right and the opposite direction is the left! */ /* It shouldn't be pivoted at a left, but just checking. */ if (pToSubNode->back->tip == true) { /* Logically this should not happen. But we actually can return pi as the limit. */ printf("ERROR: In leftRightLimits() - Pivoted at a leaf! Unable to " "calculate left and right limit.\n"); embExitBad(); } else if (pToSubNode->back->next->next == pToSubNode->back) { printf("ERROR: leftRightLimits() - 2-branches-only case not handled!!"); embExitBad(); } /* Else, do this */ pPivotNode = nodep[pToSubNode->back->index-1]; /* 3 or more branches - the regular case. */ /* First, initialize the pRightSubNode - non-repeative portion of the code */ pRightSubNode = pToSubNode->back; pLeftSubNode = pToSubNode->back; xToNodeVector = nodep[pToSubNode->index-1]->xcoord - pPivotNode->xcoord; yToNodeVector = nodep[pToSubNode->index-1]->ycoord - pPivotNode->ycoord; /* If both x and y are 0, then the length must be 0; but this check is not enough yet, we need to check the product of length also. */ if ( fabs(xToNodeVector) < epsilon && fabs(yToNodeVector) < epsilon ) { /* If the branch to rotate is too short, don't rotate it. */ *pLeftLimit = 0; *pRightLimit = 0; return; } while( nodep[pRightSubNode->index-1]->tip != true ) { /* Repeative code */ pRightSubNode = pRightSubNode->next->back; xRightVector = nodep[pRightSubNode->index-1]->xcoord - pPivotNode->xcoord; yRightVector = nodep[pRightSubNode->index-1]->ycoord - pPivotNode->ycoord; lengthsProd = sqrt(xToNodeVector*xToNodeVector+yToNodeVector*yToNodeVector) * sqrt(xRightVector*xRightVector+yRightVector*yRightVector); if ( lengthsProd < epsilon ) { continue; } rightLimit = angleBetVectors(xToNodeVector, yToNodeVector, xRightVector, yRightVector); if ( (*pRightLimit) < rightLimit) *pRightLimit = rightLimit; } while( nodep[pLeftSubNode->index-1]->tip != true ) { /* First, let pSubNode be 1 subnode after rightSubNode. */ pSubNode = pLeftSubNode->next->next; /* Then, loop until the last subNode before getting back to the pivot */ while (pSubNode->next != pLeftSubNode) { pSubNode = pSubNode->next; } pLeftSubNode = pSubNode->back; xLeftVector = nodep[pLeftSubNode->index-1]->xcoord - pPivotNode->xcoord; yLeftVector = nodep[pLeftSubNode->index-1]->ycoord - pPivotNode->ycoord; lengthsProd = sqrt(xToNodeVector*xToNodeVector+yToNodeVector*yToNodeVector) * sqrt(xLeftVector*xLeftVector+yLeftVector*yLeftVector); if ( lengthsProd < epsilon ) { continue; } leftLimit = angleBetVectors(xToNodeVector, yToNodeVector, xLeftVector, yLeftVector); if ( (*pLeftLimit) < leftLimit) *pLeftLimit = leftLimit; } return; } /* leftRightLimits */ void branchLRHelper(node *pPivotSubNode, node *pCurSubNode, double *pBranchL, double *pBranchR) { /* added - danieyek 990226 */ /* Recursive helper function for branchLeftRightAngles(). pPivotSubNode->back is the pToNode, to which node you apply the forces! */ /* Abandoned as it is similar to day-light algorithm; the first part is done implementing but not tested, the second part yet to be implemented if necessary. */ double xCurNodeVector, yCurNodeVector, xPivotVector, yPivotVector; /* Base case : a leaf - return 0 & 0. */ if ( nodep[pCurSubNode->index-1]->tip == true ) { xPivotVector = nodep[pPivotSubNode->back->index-1]->xcoord - nodep[pPivotSubNode->index-1]->xcoord; yPivotVector = nodep[pPivotSubNode->back->index-1]->ycoord - nodep[pPivotSubNode->index-1]->ycoord; xCurNodeVector = nodep[pCurSubNode->index-1]->xcoord - nodep[pPivotSubNode->index-1]->xcoord; yCurNodeVector = nodep[pCurSubNode->index-1]->ycoord - nodep[pPivotSubNode->index-1]->ycoord; if ( vCounterClkwiseU(xPivotVector, yPivotVector, xCurNodeVector, yCurNodeVector) == 1) { /* Relevant to Left Angle */ *pBranchL = angleBetVectors(xPivotVector, yPivotVector, xCurNodeVector, yCurNodeVector); *pBranchR = (double)0; } else { /* Relevant to Right Angle */ *pBranchR = angleBetVectors(xPivotVector, yPivotVector, xCurNodeVector, yCurNodeVector); *pBranchL = (double)0; } return; } else { /* not a leaf */ } } /* branchLRHelper */ void improveNodeAngle(node *pToNode, double medianDistance) { /* added - danieyek 990204 */ /* Assume calling pToNode->back will bring me to the Pivot! */ double forcePerpendicular, distance, xDistance, yDistance, angleRotate, sinAngleRotate, cosAngleRotate, norminalDistance, leftLimit, rightLimit, limitFactor; node *pPivot; /* Limit factor determinte how close the rotation can approach the absolute limit before colliding with other branches */ limitFactor = (double)4 / (double)5; pPivot = pToNode->back; xDistance = nodep[pPivot->index-1]->xcoord - nodep[pToNode->index-1]->xcoord; yDistance = nodep[pPivot->index-1]->ycoord - nodep[pToNode->index-1]->ycoord; distance = sqrt( xDistance*xDistance + yDistance*yDistance ); /* convert distance to absolute value and test if it is zero */ if ( fabs(distance) < epsilon) { angleRotate = (double)0; } else { leftRightLimits(pToNode, &leftLimit, &rightLimit); norminalDistance = distance / medianDistance; forcePerpendicular = forcePerpendicularOnNode(pPivot, pToNode, medianDistance); angleRotate = forcePerpendicular / norminalDistance; /* Limiting the angle of rotation */ if ( angleRotate > 0 && angleRotate > limitFactor * leftLimit) { /* Left */ angleRotate = limitFactor * leftLimit; } else if ( -angleRotate > limitFactor * rightLimit ) /* angleRotate < 0 && */ { /* Right */ angleRotate = - limitFactor * rightLimit; } } angleRotate = (double).1 * angleRotate; sinAngleRotate = sin(angleRotate); cosAngleRotate = cos(angleRotate); tilttrav(pToNode, &(nodep[pPivot->index - 1]->xcoord), &(nodep[pPivot->index - 1]->ycoord), &sinAngleRotate, &cosAngleRotate); polarizeABranch(pToNode, &(nodep[pPivot->index - 1]->xcoord), &(nodep[pPivot->index - 1]->ycoord)); } /* improveNodeAngle */ void improvtravn(node *pStartingSubNode) { /* function modified - danieyek 990125 */ /* improvtrav for n-body. */ /* POPStack is the stack that is currently being used (popped); PUSHStack is the stack that is for the use of the next round (is pushed now) */ stackElemType *pPUSHStackTop, *pPOPStackTop, *pTempStack; node *pSubNode, *pBackStartNode, *pBackSubNode; double medianDistance; long noOfIteration; /* Stack starts with no element on it */ pPUSHStackTop = NULL; pPOPStackTop = NULL; /* Get the median to relate force to angle proportionally. */ medianDistance = medianOfDistance(root, true); /* Set max. number of iteration */ for ( noOfIteration = (long)0; noOfIteration < maxNumOfIter; noOfIteration++) { /* First, push all subNodes in the root node onto the stack-to-be-used to kick up the process */ pSubNode = pStartingSubNode; pushNodeToStack(&pPUSHStackTop, pSubNode); while(pSubNode->next != pStartingSubNode) { pSubNode = pSubNode->next; pushNodeToStack(&pPUSHStackTop, pSubNode); } while (true) { /* Finishes with the current POPStack; swap the function of the stacks if PUSHStack is not empty */ if (pPUSHStackTop == NULL) { /* Exit infinity loop here if empty. */ break; } else { /* swap */ pTempStack = pPUSHStackTop; pPUSHStackTop = pPOPStackTop; pPOPStackTop = pTempStack; } while (pPOPStackTop != NULL) { /* We always push the pivot subNode onto the stack! That's when we pop that pivot subNode, subNode.back is the node we apply the force to (ToNode). Also, when we pop a pivot subNode, always push all pivot subNodes in the same ToNode onto the stack. */ popNodeFromStack(&pPOPStackTop, &pSubNode); pBackStartNode = pSubNode->back; if (pBackStartNode->tip == true) { /* tip indicates if a node is a leaf */ improveNodeAngle(pSubNode->back, medianDistance); } else { /* Push all subNodes in this pSubNode->back onto the * stack-to-be-used, after poping a pivot subNode. If * pSubNode->back is a leaf, no push on stack. */ pBackSubNode = pBackStartNode; /* Do not push this pBackStartNode onto the stack! Or the * process will never stop. */ while(pBackSubNode->next != pBackStartNode) { pBackSubNode = pBackSubNode->next; pushNodeToStack(&pPOPStackTop, pBackSubNode); } /* improve the node even if it is not a leaf */ improveNodeAngle(pSubNode->back, medianDistance); } } } } } /* improvtravn */ void coordimprov(double *xx, double *yy) { /* use angles calculation to improve node coordinate placement */ long i, its; if (nbody) { /* n-body algorithm */ /* modified - danieyek 990125 */ /* its = 5; */ its = 1; /* modified - danieyek 990125 */ /* why its = 5 ?? - to be modified as equal-daylight below - introduce a condition to stop */ /* for (i=1; i++; i<=its) improvtravn(root); */ for (i=1; i<=its; i++) improvtravn(root); } else { /* equal-daylight algorithm */ i = 0; do { maxchange = 0.0; improvtrav(root); i++; } while ((i < MAXITERATIONS) && (maxchange > MINIMUMCHANGE)); } } /* coordimprov */ void calculate() { /* compute coordinates for tree */ double xx, yy; long i; double nttot, fontheight, labangle=0, top, bot, rig, lef; for (i = 0; i < nextnode; i++) nodep[i]->width = 1.0; for (i = 0; i < nextnode; i++) nodep[i]->xcoord = 0.0; for (i = 0; i < nextnode; i++) nodep[i]->ycoord = 0.0; if (!uselengths) { for (i = 0; i < nextnode; i++) nodep[i]->length = 1.0; } else { for (i = 0; i < nextnode; i++) nodep[i]->length = fabs(nodep[i]->oldlen); } getwidth(root); nttot = root->width; for (i = 0; i < nextnode; i++) nodep[i]->width = nodep[i]->width * spp / nttot; plrtrans(root, treeangle, treeangle - ark / 2.0, treeangle + ark / 2.0); maxx = 0.0; minx = 0.0; maxy = 0.0; miny = 0.0; coordtrav(root, &xx,&yy); fontheight = heighttext(font,fontname); if (labeldirec == fixed) labangle = pi * labelrotation / 180.0; textlength = (double*) Malloc(nextnode*sizeof(double)); firstlet = (double*) Malloc(nextnode*sizeof(double)); for (i = 0; i < nextnode; i++) { if (nodep[i]->tip) { textlength[i] = lengthtext(nodep[i]->nayme, nodep[i]->naymlength, fontname,font); textlength[i] /= fontheight; firstlet[i] = lengthtext(nodep[i]->nayme,1L,fontname,font) / fontheight; } } if (spp > 1) labelheight = charht * (maxx - minx) / (spp - 1); else labelheight = charht * (maxx - minx); if (improve) { coordimprov(&xx,&yy); maxx = 0.0; minx = 0.0; maxy = 0.0; miny = 0.0; coordtrav(root, &xx,&yy); } topoflabels = 0.0; bottomoflabels = 0.0; rightoflabels = 0.0; leftoflabels = 0.0; for (i = 0; i < nextnode; i++) { if (nodep[i]->tip) { if (labeldirec == radial) labangle = nodep[i]->theta; else if (labeldirec == along) labangle = nodep[i]->oldtheta; else if (labeldirec == middle) labangle = 0.0; if (cos(labangle) < 0.0 && labeldirec != fixed) labangle -= pi; firstlet[i] = lengthtext(nodep[i]->nayme,1L,fontname,font) / fontheight; top = (nodep[i]->ycoord - maxy) / labelheight + sin(nodep[i]->oldtheta); rig = (nodep[i]->xcoord - maxx) / labelheight + cos(nodep[i]->oldtheta); bot = (miny - nodep[i]->ycoord) / labelheight - sin(nodep[i]->oldtheta); lef = (minx - nodep[i]->xcoord) / labelheight - cos(nodep[i]->oldtheta); if (cos(labangle) * cos(nodep[i]->oldtheta) + sin(labangle) * sin(nodep[i]->oldtheta) > 0.0) { if (sin(labangle) > 0.0) top += sin(labangle) * textlength[i]; top += sin(labangle - 1.25 * pi) * GAP * firstlet[i]; if (sin(labangle) < 0.0) bot -= sin(labangle) * textlength[i]; bot -= sin(labangle - 0.75 * pi) * GAP * firstlet[i]; if (sin(labangle) > 0.0) rig += cos(labangle - 0.75 * pi) * GAP * firstlet[i]; else rig += cos(labangle - 1.25 * pi) * GAP * firstlet[i]; rig += cos(labangle) * textlength[i]; if (sin(labangle) > 0.0) lef -= cos(labangle - 1.25 * pi) * GAP * firstlet[i]; else lef -= cos(labangle - 0.75 * pi) * GAP * firstlet[i]; } else { if (sin(labangle) < 0.0) top -= sin(labangle) * textlength[i]; top += sin(labangle + 0.25 * pi) * GAP * firstlet[i]; if (sin(labangle) > 0.0) bot += sin(labangle) * textlength[i]; bot -= sin(labangle - 0.25 * pi) * GAP * firstlet[i]; if (sin(labangle) > 0.0) rig += cos(labangle - 0.25 * pi) * GAP * firstlet[i]; else rig += cos(labangle + 0.25 * pi) * GAP * firstlet[i]; if (sin(labangle) < 0.0) rig += cos(labangle) * textlength[i]; if (sin(labangle) > 0.0) lef -= cos(labangle + 0.25 * pi) * GAP * firstlet[i]; else lef -= cos(labangle - 0.25 * pi) * GAP * firstlet[i]; lef += cos(labangle) * textlength[i]; } if (top > topoflabels) topoflabels = top; if (bot > bottomoflabels) bottomoflabels = bot; if (rig > rightoflabels) rightoflabels = rig; if (lef > leftoflabels) leftoflabels = lef; } } topoflabels *= labelheight; bottomoflabels *= labelheight; leftoflabels *= labelheight; rightoflabels *= labelheight; } /* calculate */ void rescale() { /* compute coordinates of tree for plot or preview device */ long i; double treeheight, treewidth, extrax, extray, temp; treeheight = maxy - miny + topoflabels + bottomoflabels; treewidth = maxx - minx + rightoflabels + leftoflabels; if (grows == vertical) { if (!rescaled) expand = bscale; else { expand = (xsize - 2 * xmargin) / treewidth; if ((ysize - 2 * ymargin) / treeheight < expand) expand = (ysize - 2 * ymargin) / treeheight; } extrax = (xsize - 2 * xmargin - treewidth * expand) / 2.0; extray = (ysize - 2 * ymargin - treeheight * expand) / 2.0; } else { if (!rescaled) expand = bscale; else { expand = (ysize - 2 * ymargin) / treewidth; if ((xsize - 2 * xmargin) / treeheight < expand) expand = (xsize - 2 * xmargin) / treeheight; } extrax = (xsize - 2 * xmargin - treeheight * expand) / 2.0; extray = (ysize - 2 * ymargin - treewidth * expand) / 2.0; } for (i = 0; i < (nextnode); i++) { nodep[i]->xcoord = expand * (nodep[i]->xcoord - minx + leftoflabels); nodep[i]->ycoord = expand * (nodep[i]->ycoord - miny + bottomoflabels); if (grows == horizontal) { temp = nodep[i]->ycoord; nodep[i]->ycoord = expand * treewidth - nodep[i]->xcoord; nodep[i]->xcoord = temp; } nodep[i]->xcoord += xmargin + extrax; nodep[i]->ycoord += ymargin + extray; } } /* rescale */ void plottree(node *p, node *q) { /* plot part or all of tree on the plotting device */ double x1, y1, x2, y2; node *pp; x2 = xscale * (xoffset + p->xcoord); y2 = yscale * (yoffset + p->ycoord); if (p != root) { x1 = xscale * (xoffset + q->xcoord); y1 = yscale * (yoffset + q->ycoord); plot(penup, x1, y1); plot(pendown, x2, y2); } if (p->tip) return; pp = p->next; do { plottree(pp->back, p); pp = pp->next; } while (((p == root) && (pp != p->next)) || ((p != root) && (pp != p))); } /* plottree */ void plotlabels(char *fontname) { long i; double compr, dx = 0, dy = 0, labangle, sino, coso, cosl, sinl, cosv, sinv, vec; boolean right; node *lp; compr = xunitspercm / yunitspercm; if (penchange == yes) changepen(labelpen); for (i = 0; i < (nextnode); i++) { if (nodep[i]->tip) { lp = nodep[i]; labangle = labelrotation * pi / 180.0; if (labeldirec == radial) labangle = nodep[i]->theta; else if (labeldirec == along) labangle = nodep[i]->oldtheta; else if (labeldirec == middle) labangle = 0.0; if (cos(labangle) < 0.0) labangle -= pi; sino = sin(nodep[i]->oldtheta); coso = cos(nodep[i]->oldtheta); cosl = cos(labangle); sinl = sin(labangle); right = ((coso*cosl+sino*sinl) > 0.0) || (labeldirec == middle); vec = sqrt(1.0+firstlet[i]*firstlet[i]); cosv = firstlet[i]/vec; sinv = 1.0/vec; if (labeldirec == middle) { if ((textlength[i]+1.0)*fabs(tan(nodep[i]->oldtheta)) > 2.0) { dx = -0.5 * textlength[i] * labelheight * expand; if (sino > 0.0) { dy = 0.5 * labelheight * expand; if (fabs(nodep[i]->oldtheta - pi/2.0) > 1000.0) dx += labelheight * expand / (2.0*tan(nodep[i]->oldtheta)); } else { dy = -1.5 * labelheight * expand; if (fabs(nodep[i]->oldtheta - pi/2.0) > 1000.0) dx += labelheight * expand / (2.0*tan(nodep[i]->oldtheta)); } } else { if (coso > 0.0) { dx = 0.5 * labelheight * expand; dy = (-0.5 + (0.5*textlength[i]+0.5)*tan(nodep[i]->oldtheta)) * labelheight * expand; } else { dx = -(textlength[i]+0.5) * labelheight * expand; dy = (-0.5 - (0.5*textlength[i]+0.5)*tan(nodep[i]->oldtheta)) * labelheight * expand; } } } else { if (right) { dx = labelheight * expand * coso; dy = labelheight * expand * sino; dx += labelheight * expand * 0.5 * vec * (-cosl*cosv+sinl*sinv); dy += labelheight * expand * 0.5 * vec * (-sinl*cosv-cosl*sinv); } else { dx = labelheight * expand * coso; dy = labelheight * expand * sino; dx += labelheight * expand * 0.5 * vec * (cosl*cosv+sinl*sinv); dy += labelheight * expand * 0.5 * vec * (sinl*cosv-cosl*sinv); dx -= textlength[i] * labelheight * expand * cosl; dy -= textlength[i] * labelheight * expand * sinl; } } plottext(lp->nayme, lp->naymlength, labelheight * expand * xscale / compr, compr, xscale * (lp->xcoord + dx + xoffset), yscale * (lp->ycoord + dy + yoffset), -180 * labangle / pi, font,fontname); } } if (penchange == yes) changepen(treepen); } /* plotlabels */ void user_loop() { /* loop to make preview window and decide what to do with it */ /* long loopcount;*/ /* char input_char;*/ while (!canbeplotted) { /* // loopcount = 0; // do { // input_char=showparms(); // firstscreens = false; // if ( input_char != 'Y') // getparms(input_char); // countup(&loopcount, 10); // } while (input_char != 'Y'); */ xscale = xunitspercm; yscale = yunitspercm; plotrparms(spp); numlines = dotmatrix ? ((long)floor(yunitspercm * ysize + 0.5) / strpdeep):1; calculate(); rescale(); canbeplotted = true; if (preview) { printf("Preview window displayed... press \"Change\" button to return to menu.\n"); canbeplotted=plotpreview(fontname,&xoffset,&yoffset,&scale,spp,root); } else { canbeplotted = true; } if ((previewer == winpreview || previewer == xpreview || previewer == mac) && (winaction == quitnow)) canbeplotted = true; } } /* user_loop */ void setup_environment(int argc, Char *argv[]) { /* Set up all kinds of fun stuff */ node *q, *r; char* treestr; char *pChar; double i; boolean firsttree; pointarray treenode = NULL; #ifdef MAC OSErr retcode; FInfo fndrinfo; macsetup("Drawtree","Preview"); #endif #ifdef TURBOC if ((registerbgidriver(EGAVGA_driver) <0) || (registerbgidriver(Herc_driver) <0) || (registerbgidriver(CGA_driver) <0)){ fprintf(stderr,"Graphics error: %s ",grapherrormsg(graphresult())); embExitBad();} #endif printf("DRAWTREE from PHYLIP version %s\n", VERSION); printf("Reading tree ... \n"); firsttree = true; treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); allocate_nodep(&nodep, treestr, &spp); treeread (&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdrawtreenode,true,-1); q = root; r = root; while (!(q->next == root)) q = q->next; q->next = root->next; root = q; chuck(&grbg, r); nodep[spp] = q; where = root; rotate = true; printf("Tree has been read.\n"); printf("Loading the font ... \n"); loadfont(font,argv[0]); printf("Font loaded.\n"); previewing = false; ansi = ANSICRT; ibmpc = IBMCRT; firstscreens = true; initialparms(); canbeplotted = false; if (argc > 1) { pChar = argv[1]; for (i = 0; i < strlen(pChar); i++) { if ( ! isdigit((int)*pChar) ) { /* set to default if the 2nd. parameter is not a number */ maxNumOfIter = 50; return; } else if ( isspace((int)*pChar) ) { printf("ERROR: Number of iteration should not contain space!\n"); embExitBad(); } } sscanf(argv[1], "%li", &maxNumOfIter); } else { /* 2nd. argument is not entered; use default. */ maxNumOfIter = 50; } return; } /* setup_environment */ int main(int argc, Char *argv[]) { long stripedepth; #ifdef MAC boolean wasplotted = false; char filename1[FNMLNGTH]; OSErr retcode; FInfo fndrinfo; #ifdef OSX_CARBON FSRef fileRef; FSSpec fileSpec; #endif #ifdef __MWERKS__ SIOUXSetTitle("\pPHYLIP: Drawtree"); #endif argv[0] = "Drawtree"; #endif #ifndef X_DISPLAY_MISSING nargc=argc; nargv=argv; #endif init(argc,argv); emboss_getoptions("fdrawtree",argc,argv); progname = argv[0]; grbg = NULL; setup_environment(argc, argv); user_loop(); if (dotmatrix) { stripedepth = allocstripe(stripe,(strpwide/8), ((long)(yunitspercm * ysize))); strpdeep = stripedepth; strpdiv = stripedepth; } if (!((previewer == winpreview || previewer == xpreview || previewer == mac) && (winaction == quitnow))) { previewing = false; initplotter(spp,fontname); numlines = dotmatrix ? ((long)floor(yunitspercm * ysize + 0.5)/strpdeep) : 1; if (plotter != ibm) printf("\nWriting plot file ...\n"); drawit(fontname,&xoffset,&yoffset,numlines,root); finishplotter(); #ifdef MAC wasplotted = true; #endif FClose(plotfile); printf("\nPlot written to file \"%s\"\n", pltfilename); } FClose(intree); printf("\nDone.\n\n"); #ifdef MAC if (plotter == pict && wasplotted){ #ifdef OSX_CARBON FSPathMakeRef((unsigned char *)pltfilename, &fileRef, NULL); FSGetCatalogInfo(&fileRef, kFSCatInfoNone, NULL, NULL, &fileSpec, NULL); FSpGetFInfo(&fileSpec, &fndrinfo); fndrinfo.fdType='PICT'; fndrinfo.fdCreator='MDRW'; FSpSetFInfo(&fileSpec, &fndrinfo); #else strcpy(filename1, pltfilename); retcode=GetFInfo(CtoPstr(filename1),0,&fndrinfo); fndrinfo.fdType='PICT'; fndrinfo.fdCreator='MDRW'; strcpy(filename1, pltfilename); retcode=SetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); #endif } if (plotter == lw && wasplotted){ #ifdef OSX_CARBON FSPathMakeRef((unsigned char *)pltfilename, &fileRef, NULL); FSGetCatalogInfo(&fileRef, kFSCatInfoNone, NULL, NULL, &fileSpec, NULL); FSpGetFInfo(&fileSpec, &fndrinfo); fndrinfo.fdType='TEXT'; FSpSetFInfo(&fileSpec, &fndrinfo); #else retcode=GetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); fndrinfo.fdType='TEXT'; retcode=SetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); #endif } #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/dnainvar.c0000664000175000017500000005323011305225544013027 00000000000000 #include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2002 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxsp 4 /* maximum number of species -- must be 4 */ typedef enum { xx, yy, zz, ww } simbol; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void dnainvar_sitecombine(void); void makeweights(void); void doinput(void); void prntpatterns(void); void makesymmetries(void); void prntsymbol(simbol); void prntsymmetries(void); void tabulate(long,long,long,long,double *,double *,double *,double *); void dnainvar_writename(long); void writetree(long, long, long, long); void exacttest(long, long); void invariants(void); void makeinv(void); void reallocsites(void); /* function prototypes */ #endif extern sequence y; Char infilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; long sites, msets, ith; boolean weights, progress, prntpat, printinv, mulsets, firstset, justwts; steptr aliasweight; long f[(long)ww - (long)xx + 1][(long)ww - (long)xx + 1] [(long)ww - (long)xx + 1]; /* made global from being local to makeinv */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { printdata = false; weights = false; dotdiff = true; progress = true; prntpat = true; printinv = true; numwts = 0; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } printdata = ajAcdGetBoolean("printdata"); if(printdata) dotdiff = ajAcdGetBoolean("dotdiff"); progress = ajAcdGetBoolean("progress"); prntpat = ajAcdGetBoolean("printpattern"); printinv = ajAcdGetBoolean("printinvariant"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); fprintf(outfile, "\nNucleic acid sequence Invariants "); fprintf(outfile, "method, version %s\n\n",VERSION); } /* emboss_getoptions */ void reallocsites(void) { long i; for (i=0; i < spp; i++) { free(y[i]); y[i] = (Char *)Malloc(sites*sizeof(Char)); } free(weight); free(alias); free(aliasweight); weight = (steptr)Malloc(sites * sizeof(long)); alias = (steptr)Malloc(sites * sizeof(long)); aliasweight = (steptr)Malloc(sites * sizeof(long)); } void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc(sites*sizeof(Char)); nayme = (naym *)Malloc(maxsp * sizeof(naym)); weight = (steptr)Malloc(sites * sizeof(long)); alias = (steptr)Malloc(sites * sizeof(long)); aliasweight = (steptr)Malloc(sites * sizeof(long)); } void doinit() { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &sites, &nonodes, 1); if (spp > maxsp){ printf("TOO MANY SPECIES: only 4 allowed\n"); embExitBad();} if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); allocrest(); } /* doinit*/ void dnainvar_sitecombine() { /* combine sites that have identical patterns */ long i, j, k; boolean tied; i = 1; while (i < sites) { j = i + 1; tied = true; while (j <= sites && tied) { k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); k++; } if (tied && aliasweight[j - 1] > 0) { aliasweight[i - 1] += aliasweight[j - 1]; aliasweight[j - 1] = 0; } j++; } i = j - 1; } } /* dnainvar_sitecombine */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; aliasweight[i - 1] = weight[i - 1]; } sitesort(sites, aliasweight); dnainvar_sitecombine(); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { weight[i - 1] = aliasweight[i - 1]; if (weight[i - 1] > 0) endsite = i; } } /* makeweights */ void doinput() { /* reads the input data */ long i; if (justwts) { if (firstset) seq_inputdata(seqsets[ith-1], sites); for (i = 0; i < sites; i++) weight[i] = 1; inputweightsstr(phyloweights->Str[ith-1], sites, weight, &weights); if (justwts) { fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata) printweights(outfile, 0, sites, weight, "Sites"); } else { if (!firstset){ samenumspseq(seqsets[ith-1], &sites, ith); reallocsites(); } seq_inputdata(seqsets[ith-1], sites); for (i = 0; i < sites; i++) weight[i] = 1; if (weights) { inputweightsstr(phyloweights->Str[0], sites, weight, &weights); if (printdata) printweights(outfile, 0, sites, weight, "Sites"); } } makeweights(); } /* doinput */ void prntpatterns() { /* print out patterns */ long i, j; fprintf(outfile, "\n Pattern"); if (prntpat) fprintf(outfile, " Number of times"); fprintf(outfile, "\n\n"); for (i = 0; i < endsite; i++) { fprintf(outfile, " "); for (j = 0; j < spp; j++) putc(y[j][alias[i] - 1], outfile); if (prntpat) fprintf(outfile, " %8ld", weight[i]); putc('\n', outfile); } putc('\n', outfile); } /* prntpatterns */ void makesymmetries() { /* get frequencies of symmetrized patterns */ long i, j; boolean drop, usedz; Char ch, ch1, zchar; simbol s1, s2, s3; simbol t[maxsp - 1]; for (s1 = xx; (long)s1 <= (long)ww; s1 = (simbol)((long)s1 + 1)) { for (s2 = xx; (long)s2 <= (long)ww; s2 = (simbol)((long)s2 + 1)) { for (s3 = xx; (long)s3 <= (long)ww; s3 = (simbol)((long)s3 + 1)) f[(long)s1 - (long)xx][(long)s2 - (long)xx] [(long)s3 - (long)xx] = 0; } } for (i = 0; i < endsite; i++) { drop = false; for (j = 0; j < spp; j++) { ch = y[j][alias[i] - 1]; drop = (drop || (ch != 'A' && ch != 'C' && ch != 'G' && ch != 'T' && ch != 'U')); } ch1 = y[0][alias[i] - 1]; if (!drop) { usedz = false; zchar = ' '; for (j = 2; j <= spp; j++) { ch = y[j - 1][alias[i] - 1]; if (ch == ch1) t[j - 2] = xx; else if ((ch1 == 'A' && ch == 'G') || (ch1 == 'G' && ch == 'A') || (ch1 == 'C' && (ch == 'T' || ch == 'U')) || ((ch1 == 'T' || ch1 == 'U') && ch == 'C')) t[j - 2] = yy; else if (!usedz) { t[j - 2] = zz; usedz = true; zchar = ch; } else if (usedz && ch == zchar) t[j - 2] = zz; else if (usedz && ch != zchar) t[j - 2] = ww; } f[(long)t[0] - (long)xx][(long)t[1] - (long)xx] [(long)t[2] - (long)xx] += weight[i]; } } } /* makesymmetries */ void prntsymbol(simbol s) { /* print 1, 2, 3, 4 as appropriate */ switch (s) { case xx: putc('1', outfile); break; case yy: putc('2', outfile); break; case zz: putc('3', outfile); break; case ww: putc('4', outfile); break; } } /* prntsymbol */ void prntsymmetries() { /* print out symmetrized pattern numbers */ simbol s1, s2, s3; fprintf(outfile, "\nSymmetrized patterns (1, 2 = the two purines "); fprintf(outfile, "and 3, 4 = the two pyrimidines\n"); fprintf(outfile, " or 1, 2 = the two pyrimidines "); fprintf(outfile, "and 3, 4 = the two purines)\n\n"); for (s1 = xx; (long)s1 <= (long)ww; s1 = (simbol)((long)s1 + 1)) { for (s2 = xx; (long)s2 <= (long)ww; s2 = (simbol)((long)s2 + 1)) { for (s3 = xx; (long)s3 <= (long)ww; s3 = (simbol)((long)s3 + 1)) { if (f[(long)s1 - (long)xx][(long)s2 - (long)xx] [(long)s3 - (long)xx] > 0) { fprintf(outfile, " 1"); prntsymbol(s1); prntsymbol(s2); prntsymbol(s3); if (prntpat) fprintf(outfile, " %7ld", f[(long)s1 - (long)xx][(long)s2 - (long)xx] [(long)s3 - (long)xx]); putc('\n', outfile); } } } } } /* prntsymmetries */ void tabulate(long mm, long nn, long pp, long qq, double *mr, double *nr, double *pr, double *qr) { /* make quadratic invariant, table, chi-square */ long total; double k, TEMP; fprintf(outfile, "\n Contingency Table\n\n"); fprintf(outfile, "%7ld%6ld\n", mm, nn); fprintf(outfile, "%7ld%6ld\n\n", pp, qq); *mr = (long)(mm); *nr = (long)(nn); *pr = (long)pp; *qr = (long)qq; total = mm + nn + pp + qq; if (printinv) fprintf(outfile, " Quadratic invariant = %15.1f\n\n", (*nr) * (*pr) - (*mr) * (*qr)); fprintf(outfile, " Chi-square = "); TEMP = (*mr) * (*qr) - (*nr) * (*pr); k = total * (TEMP * TEMP) / (((*mr) + (*nr)) * ((*mr) + (*pr)) * ((*nr) + (*qr)) * ((*pr) + (*qr))); fprintf(outfile, "%10.5f", k); if ((*mr) * (*qr) > (*nr) * (*pr) && k > 2.71) fprintf(outfile, " (P < 0.05)\n"); else fprintf(outfile, " (not significant)\n"); fprintf(outfile, "\n\n"); } /* tabulate */ void dnainvar_writename(long m) { /* write out a species name */ long i, n; n = nmlngth; while (nayme[m - 1][n - 1] == ' ') n--; if (n == 0) n = 1; for (i = 0; i < n; i++) putc(nayme[m - 1][i], outfile); } /* dnainvar_writename */ void writetree(long i, long j, long k, long l) { /* write out tree topology ((i,j),(k,l)) using names */ fprintf(outfile, "(("); dnainvar_writename(i); putc(',', outfile); dnainvar_writename(j); fprintf(outfile, "),("); dnainvar_writename(k); putc(',', outfile); dnainvar_writename(l); fprintf(outfile, "))\n"); } /* writetree */ void exacttest(long m, long n) { /* exact binomial test that m <= n */ long i; double p, sum; p = 1.0; for (i = 1; i <= m + n; i++) p /= 2.0; sum = p; for (i = 1; i <= n; i++) { p = p * (m + n - i + 1) / i; sum += p; } fprintf(outfile, " %7.4f", sum); if (sum <= 0.05) fprintf(outfile, " yes\n"); else fprintf(outfile, " no\n"); } /* exacttest */ void invariants() { /* compute invariants */ long m, n, p, q; double L1, L2, L3; double mr,nr,pr,qr; fprintf(outfile, "\nTree topologies (unrooted): \n\n"); fprintf(outfile, " I: "); writetree(1, 2, 3, 4); fprintf(outfile, " II: "); writetree(1, 3, 2, 4); fprintf(outfile, " III: "); writetree(1, 4, 2, 3); fprintf(outfile, "\n\nLake's linear invariants\n"); fprintf(outfile, " (these are expected to be zero for the two incorrect tree topologies.\n"); fprintf(outfile, " This is tested by testing the equality of the two parts\n"); fprintf(outfile, " of each expression using a one-sided exact binomial test.\n"); fprintf(outfile, " The null hypothesis is that the first part is no larger than the second.)\n\n"); fprintf(outfile, " Tree "); fprintf(outfile, " Exact test P value Significant?\n\n"); m = f[(long)yy - (long)xx][(long)zz - (long)xx] [(long)ww - (long)xx] + f[0][(long)zz - (long)xx] [(long)zz - (long)xx]; n = f[(long)yy - (long)xx][(long)zz - (long)xx] [(long)zz - (long)xx] + f[0][(long)zz - (long)xx] [(long)ww - (long)xx]; fprintf(outfile, " I %5ld - %5ld = %5ld", m, n, m - n); exacttest(m, n); m = f[(long)zz - (long)xx][(long)yy - (long)xx] [(long)ww - (long)xx] + f[(long)zz - (long)xx][0] [(long)zz - (long)xx]; n = f[(long)zz - (long)xx][(long)yy - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx][0] [(long)ww - (long)xx]; fprintf(outfile, " II %5ld - %5ld = %5ld", m, n, m - n); exacttest(m, n); m = f[(long)zz - (long)xx][(long)ww - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx] [(long)zz - (long)xx][0]; n = f[(long)zz - (long)xx][(long)zz - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx] [(long)ww - (long)xx][0]; fprintf(outfile, " III%5ld - %5ld = %5ld", m, n, m - n); exacttest(m, n); fprintf(outfile, "\n\nCavender's quadratic invariants (type L)"); fprintf(outfile, " using purines vs. pyrimidines\n"); fprintf(outfile, " (these are expected to be zero, and thus have a nonsignificant\n"); fprintf(outfile, " chi-square, for the correct tree topology)\n"); fprintf(outfile, "They will be misled if there are substantially\n"); fprintf(outfile, "different evolutionary rate between sites, or\n"); fprintf(outfile, "different purine:pyrimidine ratios from 1:1.\n\n"); fprintf(outfile, " Tree I:\n"); m = f[0][0][0] + f[0][(long)yy - (long)xx] [(long)yy - (long)xx] + f[0][(long)zz - (long)xx] [(long)zz - (long)xx]; n = f[0][0][(long)yy - (long)xx] + f[0][0] [(long)zz - (long)xx] + f[0][(long)yy - (long)xx][0] + f[0] [(long)yy - (long)xx][(long)zz - (long)xx] + f[0] [(long)zz - (long)xx][0] + f[0][(long)zz - (long)xx] [(long)yy - (long)xx] + f[0][(long)zz - (long)xx] [(long)ww - (long)xx]; p = f[(long)yy - (long)xx][0][0] + f[(long)yy - (long)xx] [(long)yy - (long)xx] [(long)yy - (long)xx] + f[(long)yy - (long)xx] [(long)zz - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx][0] [0] + f[(long)zz - (long)xx][(long)yy - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx] [(long)zz - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)ww - (long)xx][(long)ww - (long)xx]; q = f[(long)yy - (long)xx][0][(long)yy - (long)xx] + f[(long)yy - (long)xx][0][(long)zz - (long)xx] + f[(long)yy - (long)xx][(long)yy - (long)xx][0] + f[(long)yy - (long)xx][(long)yy - (long)xx][(long)zz - (long)xx] + f[(long)yy - (long)xx][(long)zz - (long)xx][0] + f[(long)yy - (long)xx][(long)zz - (long)xx][(long)yy - (long)xx] + f[(long)yy - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] + f[(long)zz - (long)xx][0][(long)yy - (long)xx] + f[(long)zz - (long)xx][0][(long)zz - (long)xx] + f[(long)zz - (long)xx][0][(long)ww - (long)xx] + f[(long)zz - (long)xx][(long)yy - (long)xx][0] + f[(long)zz - (long)xx][(long)yy - (long)xx][(long)zz - (long)xx] + f[(long)zz - (long)xx][(long)yy - (long)xx][(long)ww - (long)xx] + f[(long)zz - (long)xx][(long)zz - (long)xx][0] + f[(long)zz - (long)xx][(long)zz - (long)xx][(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx][0] + f[(long)zz - (long)xx][(long)ww - (long)xx][(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx][(long)zz - (long)xx]; nr = n; pr = p; mr = m; qr = q; L1 = nr * pr - mr * qr; tabulate(m, n, p, q, &mr,&nr,&pr,&qr); fprintf(outfile, " Tree II:\n"); m = f[0][0][0] + f[(long)yy - (long)xx][0] [(long)yy - (long)xx] + f[(long)zz - (long)xx][0] [(long)zz - (long)xx]; n = f[0][0][(long)yy - (long)xx] + f[0][0] [(long)zz - (long)xx] + f[(long)yy - (long)xx][0] [0] + f[(long)yy - (long)xx][0] [(long)zz - (long)xx] + f[(long)zz - (long)xx][0] [0] + f[(long)zz - (long)xx][0] [(long)yy - (long)xx] + f[(long)zz - (long)xx][0] [(long)ww - (long)xx]; p = f[0][(long)yy - (long)xx][0] + f[(long)yy - (long)xx] [(long)yy - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx] [(long)yy - (long)xx][(long)zz - (long)xx] + f[0] [(long)zz - (long)xx][0] + f[(long)yy - (long)xx] [(long)zz - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx] [(long)zz - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)ww - (long)xx][(long)zz - (long)xx]; q = f[0][(long)yy - (long)xx][(long)yy - (long)xx] + f[0] [(long)yy - (long)xx][(long)zz - (long)xx] + f[(long)yy - (long)xx][(long)yy - (long)xx][0] + f[(long)yy - (long)xx][(long)yy - (long)xx][(long)zz - (long)xx] + f[(long)zz - (long)xx][(long)yy - (long)xx][0] + f[(long)zz - (long)xx][(long)yy - (long)xx][(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)yy - (long)xx][(long)ww - (long)xx] + f[0][(long)zz - (long)xx][(long)yy - (long)xx] + f[0] [(long)zz - (long)xx][(long)zz - (long)xx] + f[0] [(long)zz - (long)xx][(long)ww - (long)xx] + f[(long)yy - (long)xx][(long)zz - (long)xx][0] + f[(long)yy - (long)xx][(long)zz - (long)xx][(long)zz - (long)xx] + f[(long)yy - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] + f[(long)zz - (long)xx][(long)zz - (long)xx][0] + f[(long)zz - (long)xx][(long)zz - (long)xx][(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx][0] + f[(long)zz - (long)xx][(long)ww - (long)xx][(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx][(long)ww - (long)xx]; nr = n; pr = p; mr = m; qr = q; L2 = nr * pr - mr * qr; tabulate(m, n, p, q, &mr,&nr,&pr,&qr); fprintf(outfile, " Tree III:\n"); m = f[0][0][0] + f[(long)yy - (long)xx][(long)yy - (long)xx] [0] + f[(long)zz - (long)xx][(long)zz - (long)xx][0]; n = f[(long)yy - (long)xx][0][0] + f[(long)zz - (long)xx][0] [0] + f[0][(long)yy - (long)xx][0] + f[(long)zz - (long)xx] [(long)yy - (long)xx][0] + f[0][(long)zz - (long)xx] [0] + f[(long)yy - (long)xx][(long)zz - (long)xx] [0] + f[(long)zz - (long)xx][(long)ww - (long)xx][0]; p = f[0][0][(long)yy - (long)xx] + f[(long)yy - (long)xx] [(long)yy - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx] [(long)zz - (long)xx][(long)yy - (long)xx] + f[0][0] [(long)zz - (long)xx] + f[(long)yy - (long)xx] [(long)yy - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)zz - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)zz - (long)xx][(long)ww - (long)xx]; q = f[(long)yy - (long)xx][0][(long)yy - (long)xx] + f[(long)zz - (long)xx] [0][(long)yy - (long)xx] + f[0][(long)yy - (long)xx][(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)yy - (long)xx][(long)yy - (long)xx] + f[0][(long)zz - (long)xx] [(long)yy - (long)xx] + f[(long)yy - (long)xx][(long)zz - (long)xx] [(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx] [(long)yy - (long)xx] + f[(long)yy - (long)xx][0] [(long)zz - (long)xx] + f[(long)zz - (long)xx][0] [(long)zz - (long)xx] + f[0][(long)zz - (long)xx] [(long)ww - (long)xx] + f[0][(long)yy - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)yy - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)yy - (long)xx][(long)ww - (long)xx] + f[0] [(long)zz - (long)xx] [(long)zz - (long)xx] + f[(long)yy - (long)xx] [(long)zz - (long)xx] [(long)zz - (long)xx] + f[(long)zz - (long)xx] [(long)ww - (long)xx] [(long)ww - (long)xx] + f[(long)zz - (long)xx][0] [(long)ww - (long)xx] + f[(long)yy - (long)xx] [(long)zz - (long)xx][(long)ww - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx][(long)zz - (long)xx]; nr = n; pr = p; mr = m; qr = q; L3 = nr * pr - mr * qr; tabulate(m, n, p, q, &mr,&nr,&pr,&qr); fprintf(outfile, "\n\nCavender's quadratic invariants (type K)"); fprintf(outfile, " using purines vs. pyrimidines\n"); fprintf(outfile, " (these are expected to be zero for the correct tree topology)\n"); fprintf(outfile, "They will be misled if there are substantially\n"); fprintf(outfile, "different evolutionary rate between sites, or\n"); fprintf(outfile, "different purine:pyrimidine ratios from 1:1.\n"); fprintf(outfile, "No statistical test is done on them here.\n\n"); fprintf(outfile, " Tree I: %15.1f\n", L2 - L3); fprintf(outfile, " Tree II: %15.1f\n", L3 - L1); fprintf(outfile, " Tree III: %15.1f\n\n", L1 - L2); } /* invariants */ void makeinv() { /* print out patterns and compute invariants */ prntpatterns(); makesymmetries(); prntsymmetries(); if (printinv) invariants(); } /* makeinv */ int main(int argc, Char *argv[]) { /* DNA Invariants */ #ifdef MAC argc = 1; /* macsetup("Dnainvar",""); */ argv[0] = "Dnainvar"; #endif init(argc,argv); emboss_getoptions("fdnainvar", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= msets; ith++) { doinput(); if (ith == 1) firstset = false; if (msets > 1 && !justwts) { if (progress) printf("\nData set # %ld:\n",ith); fprintf(outfile, "Data set # %ld:\n\n",ith); } makeinv(); } if (progress) { putchar('\n'); printf("Output written to output file \"%s\"\n", outfilename); putchar('\n'); } FClose(outfile); FClose(infile); #ifdef MAC fixmacfile(outfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* DNA Invariants */ PHYLIPNEW-3.69.650/src/seq.c0000664000175000017500000033347411605067345012036 00000000000000 #include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ long nonodes, endsite, outgrno, nextree, which; boolean interleaved, printdata, outgropt, treeprint, dotdiff, transvp; steptr weight, category, alias, location, ally; sequence y; void fix_x(node* p,long site, double maxx, long rcategs) { /* dnaml dnamlk */ long i,j; p->underflows[site] += log(maxx); for ( i = 0 ; i < rcategs ; i++ ) { for ( j = 0 ; j < ((long)T - (long)A + 1) ; j++) p->x[site][i][j] /= maxx; } } /* fix_x */ void fix_protx(node* p,long site, double maxx, long rcategs) { /* proml promlk */ long i,m; p->underflows[site] += log(maxx); for ( i = 0 ; i < rcategs ; i++ ) for (m = 0; m <= 19; m++) p->protx[site][i][m] /= maxx; } /* fix_protx */ void free_all_x_in_array (long nonodes, pointarray treenode) { /* used in dnaml & dnamlk */ long i, j, k; node *p; /* Zero thru spp are tips, */ for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(treenode[i]->x[j]); free(treenode[i]->x); } /* The rest are rings (i.e. triads) */ for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]; do { for (k = 0; k < endsite; k++) free(p->x[k]); free(p->x); p = p->next; } while(p != treenode[i]); } } } /* free_all_x_in_array */ void free_all_x2_in_array (long nonodes, pointarray treenode) { /* used in restml */ long i, j; node *p; /* Zero thru spp are tips */ for (i = 0; i < spp; i++) free(treenode[i]->x2); /* The rest are rings (i.e. triads) */ for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { free(p->x2); p = p->next; } } } /* free_all_x2_in_array */ void alloctemp(node **temp, long *zeros, long endsite) { /*used in dnacomp and dnapenny */ *temp = (node *)Malloc(sizeof(node)); (*temp)->numsteps = (steptr)Malloc(endsite*sizeof(long)); (*temp)->base = (baseptr)Malloc(endsite*sizeof(long)); (*temp)->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); memcpy((*temp)->base, zeros, endsite*sizeof(long)); memcpy((*temp)->numsteps, zeros, endsite*sizeof(long)); zeronumnuc(*temp, endsite); } /* alloctemp */ void freetemp(node **temp) { /* used in dnacomp, dnapars, & dnapenny */ free((*temp)->numsteps); free((*temp)->base); free((*temp)->numnuc); free(*temp); } /* freetemp */ void freetree2 (pointarray treenode, long nonodes) { /* The natural complement to alloctree2. Free all elements of all the rings (normally triads) in treenode */ long i; node *p, *q; /* The first spp elements are just nodes, not rings */ for (i = 0; i < spp; i++) free (treenode[i]); /* The rest are rings */ for (i = spp; i < nonodes; i++) { p = treenode[i]->next; while (p != treenode[i]) { q = p->next; free (p); p = q; } /* p should now point to treenode[i], which has yet to be freed */ free (p); } free (treenode); } /* freetree2 */ void seq_inputdata(AjPSeqset seqset, long chars) { /* input the names and sequences for each species */ /* used by dnacomp, dnadist, dnainvar, dnaml, dnamlk, dnapars, & dnapenny */ long i, j, k, l; Char charstate; ajint ilen; if (printdata) headings(chars, "Sequences", "---------"); for(i=0;i chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (dotdiff && (j > 1 && y[j - 1][k - 1] == y[0][k - 1])) charstate = '.'; else charstate = y[j - 1][k - 1]; putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* inputdata */ void alloctree(pointarray *treenode, long nonodes, boolean usertree) { /* allocate treenode dynamically */ /* used in dnapars, dnacomp, dnapenny & dnamove */ long i, j; node *p, *q; *treenode = (pointarray)Malloc(nonodes*sizeof(node *)); for (i = 0; i < spp; i++) { (*treenode)[i] = (node *)Malloc(sizeof(node)); (*treenode)[i]->tip = true; (*treenode)[i]->index = i+1; (*treenode)[i]->iter = true; (*treenode)[i]->branchnum = 0; (*treenode)[i]->initialized = true; } if (!usertree) for (i = spp; i < nonodes; i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node *)Malloc(sizeof(node)); p->tip = false; p->index = i+1; p->iter = true; p->branchnum = 0; p->initialized = false; p->next = q; q = p; } p->next->next->next = p; (*treenode)[i] = p; } } /* alloctree */ void allocx(long nonodes, long rcategs, pointarray treenode, boolean usertree) { /* allocate x dynamically */ /* used in dnaml & dnamlk */ long i, j, k; node *p; for (i = 0; i < spp; i++){ treenode[i]->x = (phenotype)Malloc(endsite*sizeof(ratelike)); treenode[i]->underflows = (double *)Malloc(endsite * sizeof (double)); for (j = 0; j < endsite; j++) treenode[i]->x[j] = (ratelike)Malloc(rcategs*sizeof(sitelike)); } if (!usertree) { for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->underflows = (double *)Malloc (endsite * sizeof (double)); p->x = (phenotype)Malloc(endsite*sizeof(ratelike)); for (k = 0; k < endsite; k++) p->x[k] = (ratelike)Malloc(rcategs*sizeof(sitelike)); p = p->next; } } } } /* allocx */ void prot_allocx(long nonodes, long rcategs, pointarray treenode, boolean usertree) { /* allocate x dynamically */ /* used in proml */ long i, j, k; node *p; for (i = 0; i < spp; i++){ treenode[i]->protx = (pphenotype)Malloc(endsite*sizeof(pratelike)); treenode[i]->underflows = (double *)Malloc(endsite*sizeof(double)); for (j = 0; j < endsite; j++) treenode[i]->protx[j] = (pratelike)Malloc(rcategs*sizeof(psitelike)); } if (!usertree) { for (i = spp; i < nonodes; i++) { p = treenode[i]; for (j = 1; j <= 3; j++) { p->protx = (pphenotype)Malloc(endsite*sizeof(pratelike)); p->underflows = (double *)Malloc(endsite*sizeof(double)); for (k = 0; k < endsite; k++) p->protx[k] = (pratelike)Malloc(rcategs*sizeof(psitelike)); p = p->next; } } } } /* prot_allocx */ void setuptree(pointarray treenode, long nonodes, boolean usertree) { /* initialize treenodes */ long i; node *p; for (i = 1; i <= nonodes; i++) { if (i <= spp || !usertree) { treenode[i-1]->back = NULL; treenode[i-1]->tip = (i <= spp); treenode[i-1]->index = i; treenode[i-1]->numdesc = 0; treenode[i-1]->iter = true; treenode[i-1]->initialized = true; treenode[i-1]->tyme = 0.0; } } if (!usertree) { for (i = spp + 1; i <= nonodes; i++) { p = treenode[i-1]->next; while (p != treenode[i-1]) { p->back = NULL; p->tip = false; p->index = i; p->numdesc = 0; p->iter = true; p->initialized = false; p->tyme = 0.0; p = p->next; } } } } /* setuptree */ void setuptree2(tree *a) { /* initialize a tree */ /* used in dnaml, dnamlk, & restml */ a->likelihood = -999999.0; a->start = a->nodep[0]->back; a->root = NULL; } /* setuptree2 */ void alloctip(node *p, long *zeros) { /* allocate a tip node */ /* used by dnacomp, dnapars, & dnapenny */ p->numsteps = (steptr)Malloc(endsite*sizeof(long)); p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); p->base = (baseptr)Malloc(endsite*sizeof(long)); p->oldbase = (baseptr)Malloc(endsite*sizeof(long)); memcpy(p->base, zeros, endsite*sizeof(long)); memcpy(p->numsteps, zeros, endsite*sizeof(long)); memcpy(p->oldbase, zeros, endsite*sizeof(long)); memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); } /* alloctip */ void getbasefreqs(double freqa, double freqc, double freqg, double freqt, double *freqr, double *freqy, double *freqar, double *freqcy, double *freqgr, double *freqty, double *ttratio, double *xi, double *xv, double *fracchange, boolean freqsfrom, boolean printdata) { /* used by dnadist, dnaml, & dnamlk */ double aa, bb; if (printdata) { putc('\n', outfile); if (freqsfrom) fprintf(outfile, "Empirical "); fprintf(outfile, "Base Frequencies:\n\n"); fprintf(outfile, " A %10.5f\n", freqa); fprintf(outfile, " C %10.5f\n", freqc); fprintf(outfile, " G %10.5f\n", freqg); fprintf(outfile, " T(U) %10.5f\n", freqt); fprintf(outfile, "\n"); } *freqr = freqa + freqg; *freqy = freqc + freqt; *freqar = freqa / *freqr; *freqcy = freqc / *freqy; *freqgr = freqg / *freqr; *freqty = freqt / *freqy; aa = *ttratio * (*freqr) * (*freqy) - freqa * freqg - freqc * freqt; bb = freqa * (*freqgr) + freqc * (*freqty); *xi = aa / (aa + bb); *xv = 1.0 - *xi; if (*xi < 0.0) { printf("\n WARNING: This transition/transversion ratio\n"); printf(" is impossible with these base frequencies!\n"); *xi = 0.0; *xv = 1.0; (*ttratio) = (freqa*freqg+freqc*freqt)/((*freqr)*(*freqy)); printf(" Transition/transversion parameter reset\n"); printf(" so transition/transversion ratio is %10.6f\n\n", (*ttratio)); } if (freqa <= 0.0) freqa = 0.000001; if (freqc <= 0.0) freqc = 0.000001; if (freqg <= 0.0) freqg = 0.000001; if (freqt <= 0.0) freqt = 0.000001; *fracchange = (*xi) * (2 * freqa * (*freqgr) + 2 * freqc * (*freqty)) + (*xv) * (1.0 - freqa * freqa - freqc * freqc - freqg * freqg - freqt * freqt); } /* getbasefreqs */ void empiricalfreqs(double *freqa, double *freqc, double *freqg, double *freqt, steptr weight, pointarray treenode) { /* Get empirical base frequencies from the data */ /* used in dnaml & dnamlk */ long i, j, k; double sum, suma, sumc, sumg, sumt, w; *freqa = 0.25; *freqc = 0.25; *freqg = 0.25; *freqt = 0.25; for (k = 1; k <= 8; k++) { suma = 0.0; sumc = 0.0; sumg = 0.0; sumt = 0.0; for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) { w = weight[j]; sum = (*freqa) * treenode[i]->x[j][0][0]; sum += (*freqc) * treenode[i]->x[j][0][(long)C - (long)A]; sum += (*freqg) * treenode[i]->x[j][0][(long)G - (long)A]; sum += (*freqt) * treenode[i]->x[j][0][(long)T - (long)A]; suma += w * (*freqa) * treenode[i]->x[j][0][0] / sum; sumc += w * (*freqc) * treenode[i]->x[j][0][(long)C - (long)A] / sum; sumg += w * (*freqg) * treenode[i]->x[j][0][(long)G - (long)A] / sum; sumt += w * (*freqt) * treenode[i]->x[j][0][(long)T - (long)A] / sum; } } sum = suma + sumc + sumg + sumt; *freqa = suma / sum; *freqc = sumc / sum; *freqg = sumg / sum; *freqt = sumt / sum; } if (*freqa <= 0.0) *freqa = 0.000001; if (*freqc <= 0.0) *freqc = 0.000001; if (*freqg <= 0.0) *freqg = 0.000001; if (*freqt <= 0.0) *freqt = 0.000001; } /* empiricalfreqs */ void sitesort(long chars, steptr weight) { /* Shell sort keeping sites, weights in same order */ /* used in dnainvar, dnapars, dnacomp & dnapenny */ long gap, i, j, jj, jg, k, itemp; boolean flip, tied; gap = chars / 2; while (gap > 0) { for (i = gap + 1; i <= chars; i++) { j = i - gap; flip = true; while (j > 0 && flip) { jj = alias[j - 1]; jg = alias[j + gap - 1]; tied = true; k = 1; while (k <= spp && tied) { flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); k++; } if (!flip) break; itemp = alias[j - 1]; alias[j - 1] = alias[j + gap - 1]; alias[j + gap - 1] = itemp; itemp = weight[j - 1]; weight[j - 1] = weight[j + gap - 1]; weight[j + gap - 1] = itemp; j -= gap; } } gap /= 2; } } /* sitesort */ void sitecombine(long chars) { /* combine sites that have identical patterns */ /* used in dnapars, dnapenny, & dnacomp */ long i, j, k; boolean tied; i = 1; while (i < chars) { j = i + 1; tied = true; while (j <= chars && tied) { k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); k++; } if (tied) { weight[i - 1] += weight[j - 1]; weight[j - 1] = 0; ally[alias[j - 1] - 1] = alias[i - 1]; } j++; } i = j - 1; } } /* sitecombine */ void sitescrunch(long chars) { /* move so one representative of each pattern of sites comes first */ /* used in dnapars & dnacomp */ long i, j, itemp; boolean done, found; done = false; i = 1; j = 2; while (!done) { if (ally[alias[i - 1] - 1] != alias[i - 1]) { if (j <= i) j = i + 1; if (j <= chars) { do { found = (ally[alias[j - 1] - 1] == alias[j - 1]); j++; } while (!(found || j > chars)); if (found) { j--; itemp = alias[i - 1]; alias[i - 1] = alias[j - 1]; alias[j - 1] = itemp; itemp = weight[i - 1]; weight[i - 1] = weight[j - 1]; weight[j - 1] = itemp; } else done = true; } else done = true; } i++; done = (done || i >= chars); } } /* sitescrunch */ void sitesort2(long sites, steptr aliasweight) { /* Shell sort keeping sites, weights in same order */ /* used in dnaml & dnamnlk */ long gap, i, j, jj, jg, k, itemp; boolean flip, tied, samewt; gap = sites / 2; while (gap > 0) { for (i = gap + 1; i <= sites; i++) { j = i - gap; flip = true; while (j > 0 && flip) { jj = alias[j - 1]; jg = alias[j + gap - 1]; samewt = ((weight[jj - 1] != 0) && (weight[jg - 1] != 0)) || ((weight[jj - 1] == 0) && (weight[jg - 1] == 0)); tied = samewt && (category[jj - 1] == category[jg - 1]); flip = ((!samewt) && (weight[jj - 1] == 0)) || (samewt && (category[jj - 1] > category[jg - 1])); k = 1; while (k <= spp && tied) { flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); k++; } if (!flip) break; itemp = alias[j - 1]; alias[j - 1] = alias[j + gap - 1]; alias[j + gap - 1] = itemp; itemp = aliasweight[j - 1]; aliasweight[j - 1] = aliasweight[j + gap - 1]; aliasweight[j + gap - 1] = itemp; j -= gap; } } gap /= 2; } } /* sitesort2 */ void sitecombine2(long sites, steptr aliasweight) { /* combine sites that have identical patterns */ /* used in dnaml & dnamlk */ long i, j, k; boolean tied, samewt; i = 1; while (i < sites) { j = i + 1; tied = true; while (j <= sites && tied) { samewt = ((aliasweight[i - 1] != 0) && (aliasweight[j - 1] != 0)) || ((aliasweight[i - 1] == 0) && (aliasweight[j - 1] == 0)); tied = samewt && (category[alias[i - 1] - 1] == category[alias[j - 1] - 1]); k = 1; while (k <= spp && tied) { tied = (tied && y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); k++; } if (!tied) break; aliasweight[i - 1] += aliasweight[j - 1]; aliasweight[j - 1] = 0; ally[alias[j - 1] - 1] = alias[i - 1]; j++; } i = j; } } /* sitecombine2 */ void sitescrunch2(long sites, long i, long j, steptr aliasweight) { /* move so positively weighted sites come first */ /* used by dnainvar, dnaml, dnamlk, & restml */ long itemp; boolean done, found; done = false; while (!done) { if (aliasweight[i - 1] > 0) i++; else { if (j <= i) j = i + 1; if (j <= sites) { do { found = (aliasweight[j - 1] > 0); j++; } while (!(found || j > sites)); if (found) { j--; itemp = alias[i - 1]; alias[i - 1] = alias[j - 1]; alias[j - 1] = itemp; itemp = aliasweight[i - 1]; aliasweight[i - 1] = aliasweight[j - 1]; aliasweight[j - 1] = itemp; } else done = true; } else done = true; } done = (done || i >= sites); } } /* sitescrunch2 */ void makevalues(pointarray treenode, long *zeros, boolean usertree) { /* set up fractional likelihoods at tips */ /* used by dnacomp, dnapars, & dnapenny */ long i, j; char ns = 0; node *p; setuptree(treenode, nonodes, usertree); for (i = 0; i < spp; i++) alloctip(treenode[i], zeros); if (!usertree) { for (i = spp; i < nonodes; i++) { p = treenode[i]; do { allocnontip(p, zeros, endsite); p = p->next; } while (p != treenode[i]); } } for (j = 0; j < endsite; j++) { for (i = 0; i < spp; i++) { switch (y[i][alias[j] - 1]) { case 'A': ns = 1 << A; break; case 'C': ns = 1 << C; break; case 'G': ns = 1 << G; break; case 'U': ns = 1 << T; break; case 'T': ns = 1 << T; break; case 'M': ns = (1 << A) | (1 << C); break; case 'R': ns = (1 << A) | (1 << G); break; case 'W': ns = (1 << A) | (1 << T); break; case 'S': ns = (1 << C) | (1 << G); break; case 'Y': ns = (1 << C) | (1 << T); break; case 'K': ns = (1 << G) | (1 << T); break; case 'B': ns = (1 << C) | (1 << G) | (1 << T); break; case 'D': ns = (1 << A) | (1 << G) | (1 << T); break; case 'H': ns = (1 << A) | (1 << C) | (1 << T); break; case 'V': ns = (1 << A) | (1 << C) | (1 << G); break; case 'N': ns = (1 << A) | (1 << C) | (1 << G) | (1 << T); break; case 'X': ns = (1 << A) | (1 << C) | (1 << G) | (1 << T); break; case '?': ns = (1 << A) | (1 << C) | (1 << G) | (1 << T) | (1 << O); break; case 'O': ns = 1 << O; break; case '-': ns = 1 << O; break; } treenode[i]->base[j] = ns; treenode[i]->numsteps[j] = 0; } } } /* makevalues */ void makevalues2(long categs, pointarray treenode, long endsite, long spp, sequence y, steptr alias) { /* set up fractional likelihoods at tips */ /* used by dnaml & dnamlk */ long i, j, k, l; bases b; for (k = 0; k < endsite; k++) { j = alias[k]; for (i = 0; i < spp; i++) { for (l = 0; l < categs; l++) { for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) treenode[i]->x[k][l][(long)b - (long)A] = 0.0; switch (y[i][j - 1]) { case 'A': treenode[i]->x[k][l][0] = 1.0; break; case 'C': treenode[i]->x[k][l][(long)C - (long)A] = 1.0; break; case 'G': treenode[i]->x[k][l][(long)G - (long)A] = 1.0; break; case 'T': treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'U': treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'M': treenode[i]->x[k][l][0] = 1.0; treenode[i]->x[k][l][(long)C - (long)A] = 1.0; break; case 'R': treenode[i]->x[k][l][0] = 1.0; treenode[i]->x[k][l][(long)G - (long)A] = 1.0; break; case 'W': treenode[i]->x[k][l][0] = 1.0; treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'S': treenode[i]->x[k][l][(long)C - (long)A] = 1.0; treenode[i]->x[k][l][(long)G - (long)A] = 1.0; break; case 'Y': treenode[i]->x[k][l][(long)C - (long)A] = 1.0; treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'K': treenode[i]->x[k][l][(long)G - (long)A] = 1.0; treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'B': treenode[i]->x[k][l][(long)C - (long)A] = 1.0; treenode[i]->x[k][l][(long)G - (long)A] = 1.0; treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'D': treenode[i]->x[k][l][0] = 1.0; treenode[i]->x[k][l][(long)G - (long)A] = 1.0; treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'H': treenode[i]->x[k][l][0] = 1.0; treenode[i]->x[k][l][(long)C - (long)A] = 1.0; treenode[i]->x[k][l][(long)T - (long)A] = 1.0; break; case 'V': treenode[i]->x[k][l][0] = 1.0; treenode[i]->x[k][l][(long)C - (long)A] = 1.0; treenode[i]->x[k][l][(long)G - (long)A] = 1.0; break; case 'N': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) treenode[i]->x[k][l][(long)b - (long)A] = 1.0; break; case 'X': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) treenode[i]->x[k][l][(long)b - (long)A] = 1.0; break; case '?': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) treenode[i]->x[k][l][(long)b - (long)A] = 1.0; break; case 'O': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) treenode[i]->x[k][l][(long)b - (long)A] = 1.0; break; case '-': for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) treenode[i]->x[k][l][(long)b - (long)A] = 1.0; break; } } } } } /* makevalues2 */ void fillin(node *p, node *left, node *rt) { /* sets up for each node in the tree the base sequence at that point and counts the changes. */ long i, j, k, n, purset, pyrset; node *q; purset = (1 << (long)A) + (1 << (long)G); pyrset = (1 << (long)C) + (1 << (long)T); if (!left) { memcpy(p->base, rt->base, endsite*sizeof(long)); memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); q = rt; } else if (!rt) { memcpy(p->base, left->base, endsite*sizeof(long)); memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); q = left; } else { for (i = 0; i < endsite; i++) { p->base[i] = left->base[i] & rt->base[i]; p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; if (p->base[i] == 0) { p->base[i] = left->base[i] | rt->base[i]; if (transvp) { if (!((p->base[i] == purset) || (p->base[i] == pyrset))) p->numsteps[i] += weight[i]; } else p->numsteps[i] += weight[i]; } } q = rt; } if (left && rt) n = 2; else n = 1; for (i = 0; i < endsite; i++) for (j = (long)A; j <= (long)O; j++) p->numnuc[i][j] = 0; for (k = 1; k <= n; k++) { if (k == 2) q = left; for (i = 0; i < endsite; i++) { for (j = (long)A; j <= (long)O; j++) { if (q->base[i] & (1 << j)) p->numnuc[i][j]++; } } } } /* fillin */ long getlargest(long *numnuc) { /* find the largest in array numnuc */ long i, largest; largest = 0; for (i = (long)A; i <= (long)O; i++) if (numnuc[i] > largest) largest = numnuc[i]; return largest; } /* getlargest */ void multifillin(node *p, node *q, long dnumdesc) { /* sets up for each node in the tree the base sequence at that point and counts the changes according to the changes in q's base */ long i, j, b, largest, descsteps, purset, pyrset; memcpy(p->oldbase, p->base, endsite*sizeof(long)); memcpy(p->oldnumsteps, p->numsteps, endsite*sizeof(long)); purset = (1 << (long)A) + (1 << (long)G); pyrset = (1 << (long)C) + (1 << (long)T); for (i = 0; i < endsite; i++) { descsteps = 0; for (j = (long)A; j <= (long)O; j++) { b = 1 << j; if ((descsteps == 0) && (p->base[i] & b)) descsteps = p->numsteps[i] - (p->numdesc - dnumdesc - p->numnuc[i][j]) * weight[i]; } if (dnumdesc == -1) descsteps -= q->oldnumsteps[i]; else if (dnumdesc == 0) descsteps += (q->numsteps[i] - q->oldnumsteps[i]); else descsteps += q->numsteps[i]; if (q->oldbase[i] != q->base[i]) { for (j = (long)A; j <= (long)O; j++) { b = 1 << j; if (transvp) { if (b & purset) b = purset; if (b & pyrset) b = pyrset; } if ((q->oldbase[i] & b) && !(q->base[i] & b)) p->numnuc[i][j]--; else if (!(q->oldbase[i] & b) && (q->base[i] & b)) p->numnuc[i][j]++; } } largest = getlargest(p->numnuc[i]); if (q->oldbase[i] != q->base[i]) { p->base[i] = 0; for (j = (long)A; j <= (long)O; j++) { if (p->numnuc[i][j] == largest) p->base[i] |= (1 << j); } } p->numsteps[i] = (p->numdesc - largest) * weight[i] + descsteps; } } /* multifillin */ void sumnsteps(node *p, node *left, node *rt, long a, long b) { /* sets up for each node in the tree the base sequence at that point and counts the changes. */ long i; long ns, rs, ls, purset, pyrset; if (!left) { memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); memcpy(p->base, rt->base, endsite*sizeof(long)); } else if (!rt) { memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); memcpy(p->base, left->base, endsite*sizeof(long)); } else { purset = (1 << (long)A) + (1 << (long)G); pyrset = (1 << (long)C) + (1 << (long)T); for (i = a; i < b; i++) { ls = left->base[i]; rs = rt->base[i]; ns = ls & rs; p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; if (ns == 0) { ns = ls | rs; if (transvp) { if (!((ns == purset) || (ns == pyrset))) p->numsteps[i] += weight[i]; } else p->numsteps[i] += weight[i]; } p->base[i] = ns; } } } /* sumnsteps */ void sumnsteps2(node *p,node *left,node *rt,long a,long b,long *threshwt) { /* counts the changes at each node. */ long i, steps; long ns, rs, ls, purset, pyrset; long term; if (a == 0) p->sumsteps = 0.0; if (!left) memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); else if (!rt) memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); else { purset = (1 << (long)A) + (1 << (long)G); pyrset = (1 << (long)C) + (1 << (long)T); for (i = a; i < b; i++) { ls = left->base[i]; rs = rt->base[i]; ns = ls & rs; p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; if (ns == 0) { ns = ls | rs; if (transvp) { if (!((ns == purset) || (ns == pyrset))) p->numsteps[i] += weight[i]; } else p->numsteps[i] += weight[i]; } } } for (i = a; i < b; i++) { steps = p->numsteps[i]; if ((long)steps <= threshwt[i]) term = steps; else term = threshwt[i]; p->sumsteps += (double)term; } } /* sumnsteps2 */ void multisumnsteps(node *p, node *q, long a, long b, long *threshwt) { /* computes the number of steps between p and q */ long i, j, steps, largest, descsteps, purset, pyrset, b1; long term; if (a == 0) p->sumsteps = 0.0; purset = (1 << (long)A) + (1 << (long)G); pyrset = (1 << (long)C) + (1 << (long)T); for (i = a; i < b; i++) { descsteps = 0; for (j = (long)A; j <= (long)O; j++) { if ((descsteps == 0) && (p->base[i] & (1 << j))) descsteps = p->numsteps[i] - (p->numdesc - 1 - p->numnuc[i][j]) * weight[i]; } descsteps += q->numsteps[i]; largest = 0; for (j = (long)A; j <= (long)O; j++) { b1 = (1 << j); if (transvp) { if (b1 & purset) b1 = purset; if (b1 & pyrset) b1 = pyrset; } if (q->base[i] & b1) p->numnuc[i][j]++; if (p->numnuc[i][j] > largest) largest = p->numnuc[i][j]; } steps = (p->numdesc - largest) * weight[i] + descsteps; if ((long)steps <= threshwt[i]) term = steps; else term = threshwt[i]; p->sumsteps += (double)term; } } /* multisumnsteps */ void multisumnsteps2(node *p) { /* counts the changes at each multi-way node. Sums up steps of all descendants */ long i, j, largest, purset, pyrset, b1; node *q; baseptr b; purset = (1 << (long)A) + (1 << (long)G); pyrset = (1 << (long)C) + (1 << (long)T); for (i = 0; i < endsite; i++) { p->numsteps[i] = 0; q = p->next; while (q != p) { if (q->back) { p->numsteps[i] += q->back->numsteps[i]; b = q->back->base; for (j = (long)A; j <= (long)O; j++) { b1 = (1 << j); if (transvp) { if (b1 & purset) b1 = purset; if (b1 & pyrset) b1 = pyrset; } if (b[i] & b1) p->numnuc[i][j]++; } } q = q->next; } largest = getlargest(p->numnuc[i]); p->base[i] = 0; for (j = (long)A; j <= (long)O; j++) { if (p->numnuc[i][j] == largest) p->base[i] |= (1 << j); } p->numsteps[i] += ((p->numdesc - largest) * weight[i]); } } /* multisumnsteps2 */ boolean alltips(node *forknode, node *p) { /* returns true if all descendants of forknode except p are tips; false otherwise. */ node *q, *r; boolean tips; tips = true; r = forknode; q = forknode->next; do { if (q->back && q->back != p && !q->back->tip) tips = false; q = q->next; } while (tips && q != r); return tips; } /* alltips */ void gdispose(node *p, node **grbg, pointarray treenode) { /* go through tree throwing away nodes */ node *q, *r; p->back = NULL; if (p->tip) return; treenode[p->index - 1] = NULL; q = p->next; while (q != p) { gdispose(q->back, grbg, treenode); q->back = NULL; r = q; q = q->next; chuck(grbg, r); } chuck(grbg, q); } /* gdispose */ void preorder(node *p, node *r, node *root, node *removing, node *adding, node *changing, long dnumdesc) { /* recompute number of steps in preorder taking both ancestoral and descendent steps into account. removing points to a node being removed, if any */ node *q, *p1, *p2; if (p && !p->tip && p != adding) { q = p; do { if (p->back != r) { if (p->numdesc > 2) { if (changing) multifillin (p, r, dnumdesc); else multifillin (p, r, 0); } else { p1 = p->next; if (!removing) while (!p1->back) p1 = p1->next; else while (!p1->back || p1->back == removing) p1 = p1->next; p2 = p1->next; if (!removing) while (!p2->back) p2 = p2->next; else while (!p2->back || p2->back == removing) p2 = p2->next; p1 = p1->back; p2 = p2->back; if (p->back == p1) p1 = NULL; else if (p->back == p2) p2 = NULL; memcpy(p->oldbase, p->base, endsite*sizeof(long)); memcpy(p->oldnumsteps, p->numsteps, endsite*sizeof(long)); fillin(p, p1, p2); } } p = p->next; } while (p != q); q = p; do { preorder(p->next->back, p->next, root, removing, adding, NULL, 0); p = p->next; } while (p->next != q); } } /* preorder */ void updatenumdesc(node *p, node *root, long n) { /* set p's numdesc to n. If p is the root, numdesc of p's descendants are set to n-1. */ node *q; q = p; if (p == root && n > 0) { p->numdesc = n; n--; q = q->next; } do { q->numdesc = n; q = q->next; } while (q != p); } /* updatenumdesc */ void add(node *below,node *newtip,node *newfork,node **root, boolean recompute,pointarray treenode,node **grbg,long *zeros) { /* inserts the nodes newfork and its left descendant, newtip, to the tree. below becomes newfork's right descendant. if newfork is NULL, newtip is added as below's sibling */ /* used in dnacomp & dnapars */ node *p; if (below != treenode[below->index - 1]) below = treenode[below->index - 1]; if (newfork) { if (below->back != NULL) below->back->back = newfork; newfork->back = below->back; below->back = newfork->next->next; newfork->next->next->back = below; newfork->next->back = newtip; newtip->back = newfork->next; if (*root == below) *root = newfork; updatenumdesc(newfork, *root, 2); } else { gnutreenode(grbg, &p, below->index, endsite, zeros); p->back = newtip; newtip->back = p; p->next = below->next; below->next = p; updatenumdesc(below, *root, below->numdesc + 1); } if (!newtip->tip) updatenumdesc(newtip, *root, newtip->numdesc); (*root)->back = NULL; if (!recompute) return; if (!newfork) { memcpy(newtip->back->base, below->base, endsite*sizeof(long)); memcpy(newtip->back->numsteps, below->numsteps, endsite*sizeof(long)); memcpy(newtip->back->numnuc, below->numnuc, endsite*sizeof(nucarray)); if (below != *root) { memcpy(below->back->oldbase, zeros, endsite*sizeof(long)); memcpy(below->back->oldnumsteps, zeros, endsite*sizeof(long)); multifillin(newtip->back, below->back, 1); } if (!newtip->tip) { memcpy(newtip->back->oldbase, zeros, endsite*sizeof(long)); memcpy(newtip->back->oldnumsteps, zeros, endsite*sizeof(long)); preorder(newtip, newtip->back, *root, NULL, NULL, below, 1); } memcpy(newtip->oldbase, zeros, endsite*sizeof(long)); memcpy(newtip->oldnumsteps, zeros, endsite*sizeof(long)); preorder(below, newtip, *root, NULL, newtip, below, 1); if (below != *root) preorder(below->back, below, *root, NULL, NULL, NULL, 0); } else { fillin(newtip->back, newtip->back->next->back, newtip->back->next->next->back); if (!newtip->tip) { memcpy(newtip->back->oldbase, zeros, endsite*sizeof(long)); memcpy(newtip->back->oldnumsteps, zeros, endsite*sizeof(long)); preorder(newtip, newtip->back, *root, NULL, NULL, newfork, 1); } if (newfork != *root) { memcpy(below->back->base, newfork->back->base, endsite*sizeof(long)); memcpy(below->back->numsteps, newfork->back->numsteps, endsite*sizeof(long)); preorder(newfork, newtip, *root, NULL, newtip, NULL, 0); } else { fillin(below->back, newtip, NULL); fillin(newfork, newtip, below); memcpy(below->back->oldbase, zeros, endsite*sizeof(long)); memcpy(below->back->oldnumsteps, zeros, endsite*sizeof(long)); preorder(below, below->back, *root, NULL, NULL, newfork, 1); } if (newfork != *root) { memcpy(newfork->oldbase, below->base, endsite*sizeof(long)); memcpy(newfork->oldnumsteps, below->numsteps, endsite*sizeof(long)); preorder(newfork->back, newfork, *root, NULL, NULL, NULL, 0); } } } /* add */ void findbelow(node **below, node *item, node *fork) { /* decide which of fork's binary children is below */ if (fork->next->back == item) *below = fork->next->next->back; else *below = fork->next->back; } /* findbelow */ void re_move(node *item, node **fork, node **root, boolean recompute, pointarray treenode, node **grbg, long *zeros) { /* removes nodes item and its ancestor, fork, from the tree. the new descendant of fork's ancestor is made to be fork's second descendant (other than item). Also returns pointers to the deleted nodes, item and fork. If item belongs to a node with more than 2 descendants, fork will not be deleted */ /* used in dnacomp & dnapars */ node *p, *q, *other = NULL, *otherback = NULL; if (item->back == NULL) { *fork = NULL; return; } *fork = treenode[item->back->index - 1]; if ((*fork)->numdesc == 2) { updatenumdesc(*fork, *root, 0); findbelow(&other, item, *fork); otherback = other->back; if (*root == *fork) { *root = other; if (!other->tip) updatenumdesc(other, *root, other->numdesc); } p = item->back->next->back; q = item->back->next->next->back; if (p != NULL) p->back = q; if (q != NULL) q->back = p; (*fork)->back = NULL; p = (*fork)->next; while (p != *fork) { p->back = NULL; p = p->next; } } else { updatenumdesc(*fork, *root, (*fork)->numdesc - 1); p = *fork; while (p->next != item->back) p = p->next; p->next = item->back->next; } if (!item->tip) { updatenumdesc(item, item, item->numdesc); if (recompute) { memcpy(item->back->oldbase, item->back->base, endsite*sizeof(long)); memcpy(item->back->oldnumsteps, item->back->numsteps, endsite*sizeof(long)); memcpy(item->back->base, zeros, endsite*sizeof(long)); memcpy(item->back->numsteps, zeros, endsite*sizeof(long)); preorder(item, item->back, *root, item->back, NULL, item, -1); } } if ((*fork)->numdesc >= 2) chuck(grbg, item->back); item->back = NULL; if (!recompute) return; if ((*fork)->numdesc == 0) { memcpy(otherback->oldbase, otherback->base, endsite*sizeof(long)); memcpy(otherback->oldnumsteps, otherback->numsteps, endsite*sizeof(long)); if (other == *root) { memcpy(otherback->base, zeros, endsite*sizeof(long)); memcpy(otherback->numsteps, zeros, endsite*sizeof(long)); } else { memcpy(otherback->base, other->back->base, endsite*sizeof(long)); memcpy(otherback->numsteps, other->back->numsteps, endsite*sizeof(long)); } p = other->back; other->back = otherback; if (other == *root) preorder(other, otherback, *root, otherback, NULL, other, -1); else preorder(other, otherback, *root, NULL, NULL, NULL, 0); other->back = p; if (other != *root) { memcpy(other->oldbase,(*fork)->base, endsite*sizeof(long)); memcpy(other->oldnumsteps,(*fork)->numsteps, endsite*sizeof(long)); preorder(other->back, other, *root, NULL, NULL, NULL, 0); } } else { memcpy(item->oldbase, item->base, endsite*sizeof(long)); memcpy(item->oldnumsteps, item->numsteps, endsite*sizeof(long)); memcpy(item->base, zeros, endsite*sizeof(long)); memcpy(item->numsteps, zeros, endsite*sizeof(long)); preorder(*fork, item, *root, NULL, NULL, *fork, -1); if (*fork != *root) preorder((*fork)->back, *fork, *root, NULL, NULL, NULL, 0); memcpy(item->base, item->oldbase, endsite*sizeof(long)); memcpy(item->numsteps, item->oldnumsteps, endsite*sizeof(long)); } } /* remove */ void postorder(node *p) { /* traverses an n-ary tree, suming up steps at a node's descendants */ /* used in dnacomp, dnapars, & dnapenny */ node *q; if (p->tip) return; q = p->next; while (q != p) { postorder(q->back); q = q->next; } zeronumnuc(p, endsite); if (p->numdesc > 2) multisumnsteps2(p); else fillin(p, p->next->back, p->next->next->back); } /* postorder */ void getnufork(node **nufork,node **grbg,pointarray treenode,long *zeros) { /* find a fork not used currently */ long i; i = spp; while (treenode[i] && treenode[i]->numdesc > 0) i++; if (!treenode[i]) gnutreenode(grbg, &treenode[i], i, endsite, zeros); *nufork = treenode[i]; } /* getnufork */ void reroot(node *outgroup, node *root) { /* reorients tree, putting outgroup in desired position. used if the root is binary. */ /* used in dnacomp & dnapars */ node *p, *q; if (outgroup->back->index == root->index) return; p = root->next; q = root->next->next; p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; outgroup->back->back = q; outgroup->back = p; } /* reroot */ void reroot2(node *outgroup, node *root) { /* reorients tree, putting outgroup in desired position. */ /* used in dnacomp & dnapars */ node *p; p = outgroup->back->next; while (p->next != outgroup->back) p = p->next; root->next = outgroup->back; p->next = root; } /* reroot2 */ void reroot3(node *outgroup, node *root, node *root2, node *lastdesc, node **grbg) { /* reorients tree, putting back outgroup in original position. */ /* used in dnacomp & dnapars */ node *p; p = root->next; while (p->next != root) p = p->next; chuck(grbg, root); p->next = outgroup->back; root2->next = lastdesc->next; lastdesc->next = root2; } /* reroot3 */ void savetraverse(node *p) { /* sets BOOLEANs that indicate which way is down */ node *q; p->bottom = true; if (p->tip) return; q = p->next; while (q != p) { q->bottom = false; savetraverse(q->back); q = q->next; } } /* savetraverse */ void newindex(long i, node *p) { /* assigns index i to node p */ while (p->index != i) { p->index = i; p = p->next; } } /* newindex */ void flipindexes(long nextnode, pointarray treenode) { /* flips index of nodes between nextnode and last node. */ long last; node *temp; last = nonodes; while (treenode[last - 1]->numdesc == 0) last--; if (last > nextnode) { temp = treenode[nextnode - 1]; treenode[nextnode - 1] = treenode[last - 1]; treenode[last - 1] = temp; newindex(nextnode, treenode[nextnode - 1]); newindex(last, treenode[last - 1]); } } /* flipindexes */ boolean parentinmulti(node *anode) { /* sees if anode's parent has more than 2 children */ node *p; while (!anode->bottom) anode = anode->next; p = anode->back; while (!p->bottom) p = p->next; return (p->numdesc > 2); } /* parentinmulti */ long sibsvisited(node *anode, long *place) { /* computes the number of nodes which are visited earlier than anode among its siblings */ node *p; long nvisited; while (!anode->bottom) anode = anode->next; p = anode->back->next; nvisited = 0; do { if (!p->bottom && place[p->back->index - 1] != 0) nvisited++; p = p->next; } while (p != anode->back); return nvisited; } /* sibsvisited */ long smallest(node *anode, long *place) { /* finds the smallest index of sibling of anode */ node *p; long min; while (!anode->bottom) anode = anode->next; p = anode->back->next; if (p->bottom) p = p->next; min = nonodes; do { if (p->back && place[p->back->index - 1] != 0) { if (p->back->index <= spp) { if (p->back->index < min) min = p->back->index; } else { if (place[p->back->index - 1] < min) min = place[p->back->index - 1]; } } p = p->next; if (p->bottom) p = p->next; } while (p != anode->back); return min; } /* smallest */ void bintomulti(node **root, node **binroot, node **grbg, long *zeros) { /* attaches root's left child to its right child and makes the right child new root */ node *left, *right, *newnode, *temp; right = (*root)->next->next->back; left = (*root)->next->back; if (right->tip) { (*root)->next = right->back; (*root)->next->next = left->back; temp = left; left = right; right = temp; right->back->next = *root; } gnutreenode(grbg, &newnode, right->index, endsite, zeros); newnode->next = right->next; newnode->back = left; left->back = newnode; right->next = newnode; (*root)->next->back = (*root)->next->next->back = NULL; *binroot = *root; (*binroot)->numdesc = 0; *root = right; (*root)->numdesc++; (*root)->back = NULL; } /* bintomulti */ void backtobinary(node **root, node *binroot, node **grbg) { /* restores binary root */ node *p; binroot->next->back = (*root)->next->back; (*root)->next->back->back = binroot->next; p = (*root)->next; (*root)->next = p->next; binroot->next->next->back = *root; (*root)->back = binroot->next->next; chuck(grbg, p); (*root)->numdesc--; *root = binroot; (*root)->numdesc = 2; } /* backtobinary */ boolean outgrin(node *root, node *outgrnode) { /* checks if outgroup node is a child of root */ node *p; p = root->next; while (p != root) { if (p->back == outgrnode) return true; p = p->next; } return false; } /* outgrin */ void flipnodes(node *nodea, node *nodeb) { /* flip nodes */ node *backa, *backb; backa = nodea->back; backb = nodeb->back; backa->back = nodeb; backb->back = nodea; nodea->back = backb; nodeb->back = backa; } /* flipnodes */ void moveleft(node *root, node *outgrnode, node **flipback) { /* makes outgroup node to leftmost child of root */ node *p; boolean done; p = root->next; done = false; while (p != root && !done) { if (p->back == outgrnode) { *flipback = p; flipnodes(root->next->back, p->back); done = true; } p = p->next; } } /* moveleft */ void savetree(node *root, long *place, pointarray treenode, node **grbg, long *zeros) { /* record in place where each species has to be added to reconstruct this tree */ /* used by dnacomp & dnapars */ long i, j, nextnode, nvisited; node *p, *q, *r = NULL, *root2, *lastdesc, *outgrnode, *binroot, *flipback; boolean done, newfork; binroot = NULL; lastdesc = NULL; root2 = NULL; flipback = NULL; outgrnode = treenode[outgrno - 1]; if (root->numdesc == 2) bintomulti(&root, &binroot, grbg, zeros); if (outgrin(root, outgrnode)) { if (outgrnode != root->next->back) moveleft(root, outgrnode, &flipback); } else { root2 = root; lastdesc = root->next; while (lastdesc->next != root) lastdesc = lastdesc->next; lastdesc->next = root->next; gnutreenode(grbg, &root, outgrnode->back->index, endsite, zeros); root->numdesc = root2->numdesc; reroot2(outgrnode, root); } savetraverse(root); nextnode = spp + 1; for (i = nextnode; i <= nonodes; i++) if (treenode[i - 1]->numdesc == 0) flipindexes(i, treenode); for (i = 0; i < nonodes; i++) place[i] = 0; place[root->index - 1] = 1; for (i = 1; i <= spp; i++) { p = treenode[i - 1]; while (place[p->index - 1] == 0) { place[p->index - 1] = i; while (!p->bottom) p = p->next; r = p; p = p->back; } if (i > 1) { q = treenode[i - 1]; newfork = true; nvisited = sibsvisited(q, place); if (nvisited == 0) { if (parentinmulti(r)) { nvisited = sibsvisited(r, place); if (nvisited == 0) place[i - 1] = place[p->index - 1]; else if (nvisited == 1) place[i - 1] = smallest(r, place); else { place[i - 1] = -smallest(r, place); newfork = false; } } else place[i - 1] = place[p->index - 1]; } else if (nvisited == 1) { place[i - 1] = place[p->index - 1]; } else { place[i - 1] = -smallest(q, place); newfork = false; } if (newfork) { j = place[p->index - 1]; done = false; while (!done) { place[p->index - 1] = nextnode; while (!p->bottom) p = p->next; p = p->back; done = (p == NULL); if (!done) done = (place[p->index - 1] != j); if (done) { nextnode++; } } } } } if (flipback) flipnodes(outgrnode, flipback->back); else { if (root2) { reroot3(outgrnode, root, root2, lastdesc, grbg); root = root2; } } if (binroot) backtobinary(&root, binroot, grbg); } /* savetree */ void addnsave(node *p, node *item, node *nufork, node **root, node **grbg, boolean multf, pointarray treenode, long *place, long *zeros) { /* adds item to tree and save it. Then removes item. */ node *dummy; if (!multf) add(p, item, nufork, root, false, treenode, grbg, zeros); else add(p, item, NULL, root, false, treenode, grbg, zeros); savetree(*root, place, treenode, grbg, zeros); if (!multf) re_move(item, &nufork, root, false, treenode, grbg, zeros); else re_move(item, &dummy, root, false, treenode, grbg, zeros); } /* addnsave */ void addbestever(long *pos, long *nextree, long maxtrees, boolean collapse, long *place, bestelm *bestrees) { /* adds first best tree */ *pos = 1; *nextree = 1; initbestrees(bestrees, maxtrees, true); initbestrees(bestrees, maxtrees, false); addtree(*pos, nextree, collapse, place, bestrees); } /* addbestever */ void addtiedtree(long pos, long *nextree, long maxtrees, boolean collapse, long *place, bestelm *bestrees) { /* add tied tree */ if (*nextree <= maxtrees) addtree(pos, nextree, collapse, place, bestrees); } /* addtiedtree */ void clearcollapse(pointarray treenode) { /* clears collapse status at a node */ long i; node *p; for (i = 0; i < nonodes; i++) { treenode[i]->collapse = undefined; if (!treenode[i]->tip) { p = treenode[i]->next; while (p != treenode[i]) { p->collapse = undefined; p = p->next; } } } } /* clearcollapse */ void clearbottom(pointarray treenode) { /* clears boolean bottom at a node */ long i; node *p; for (i = 0; i < nonodes; i++) { treenode[i]->bottom = false; if (!treenode[i]->tip) { p = treenode[i]->next; while (p != treenode[i]) { p->bottom = false; p = p->next; } } } } /* clearbottom */ void collabranch(node *collapfrom, node *tempfrom, node *tempto) { /* collapse branch from collapfrom */ long i, j, b, largest, descsteps; boolean done; for (i = 0; i < endsite; i++) { descsteps = 0; for (j = (long)A; j <= (long)O; j++) { b = 1 << j; if ((descsteps == 0) && (collapfrom->base[i] & b)) descsteps = tempfrom->oldnumsteps[i] - (collapfrom->numdesc - collapfrom->numnuc[i][j]) * weight[i]; } done = false; for (j = (long)A; j <= (long)O; j++) { b = 1 << j; if (!done && (tempto->base[i] & b)) { descsteps += (tempto->numsteps[i] - (tempto->numdesc - collapfrom->numdesc - tempto->numnuc[i][j]) * weight[i]); done = true; } } for (j = (long)A; j <= (long)O; j++) tempto->numnuc[i][j] += collapfrom->numnuc[i][j]; largest = getlargest(tempto->numnuc[i]); tempto->base[i] = 0; for (j = (long)A; j <= (long)O; j++) { if (tempto->numnuc[i][j] == largest) tempto->base[i] |= (1 << j); } tempto->numsteps[i] = (tempto->numdesc - largest) * weight[i] + descsteps; } } /* collabranch */ boolean allcommonbases(node *a, node *b, boolean *allsame) { /* see if bases are common at all sites for nodes a and b */ long i; boolean allcommon; allcommon = true; *allsame = true; for (i = 0; i < endsite; i++) { if ((a->base[i] & b->base[i]) == 0) allcommon = false; else if (a->base[i] != b->base[i]) *allsame = false; } return allcommon; } /* allcommonbases */ void findbottom(node *p, node **bottom) { /* find a node with field bottom set at node p */ node *q; if (p->bottom) *bottom = p; else { q = p->next; while(!q->bottom && q != p) q = q->next; *bottom = q; } } /* findbottom */ boolean moresteps(node *a, node *b) { /* see if numsteps of node a exceeds those of node b */ long i; for (i = 0; i < endsite; i++) if (a->numsteps[i] > b->numsteps[i]) return true; return false; } /* moresteps */ boolean passdown(node *desc, node *parent, node *start, node *below, node *item, node *added, node *total, node *tempdsc, node *tempprt, boolean multf) { /* track down to node start to see if an ancestor branch can be collapsed */ node *temp; boolean done, allsame; done = (parent == start); while (!done) { desc = parent; findbottom(parent->back, &parent); if (multf && start == below && parent == below) parent = added; memcpy(tempdsc->base, tempprt->base, endsite*sizeof(long)); memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); memcpy(tempdsc->oldbase, desc->base, endsite*sizeof(long)); memcpy(tempdsc->oldnumsteps, desc->numsteps, endsite*sizeof(long)); memcpy(tempprt->base, parent->base, endsite*sizeof(long)); memcpy(tempprt->numsteps, parent->numsteps, endsite*sizeof(long)); memcpy(tempprt->numnuc, parent->numnuc, endsite*sizeof(nucarray)); tempprt->numdesc = parent->numdesc; multifillin(tempprt, tempdsc, 0); if (!allcommonbases(tempprt, parent, &allsame)) return false; else if (moresteps(tempprt, parent)) return false; else if (allsame) return true; if (parent == added) parent = below; done = (parent == start); if (done && ((start == item) || (!multf && start == below))) { memcpy(tempdsc->base, tempprt->base, endsite*sizeof(long)); memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); memcpy(tempdsc->oldbase, start->base, endsite*sizeof(long)); memcpy(tempdsc->oldnumsteps, start->numsteps, endsite*sizeof(long)); multifillin(added, tempdsc, 0); tempprt = added; } } temp = tempdsc; if (start == below || start == item) fillin(temp, tempprt, below->back); else fillin(temp, tempprt, added); return !moresteps(temp, total); } /* passdown */ boolean trycollapdesc(node *desc, node *parent, node *start, node *below, node *item, node *added, node *total, node *tempdsc, node *tempprt, boolean multf, long *zeros) { /* see if branch between nodes desc and parent can be collapsed */ boolean allsame; if (desc->numdesc == 1) return true; if (multf && start == below && parent == below) parent = added; memcpy(tempdsc->base, zeros, endsite*sizeof(long)); memcpy(tempdsc->numsteps, zeros, endsite*sizeof(long)); memcpy(tempdsc->oldbase, desc->base, endsite*sizeof(long)); memcpy(tempdsc->oldnumsteps, desc->numsteps, endsite*sizeof(long)); memcpy(tempprt->base, parent->base, endsite*sizeof(long)); memcpy(tempprt->numsteps, parent->numsteps, endsite*sizeof(long)); memcpy(tempprt->numnuc, parent->numnuc, endsite*sizeof(nucarray)); tempprt->numdesc = parent->numdesc - 1; multifillin(tempprt, tempdsc, -1); tempprt->numdesc += desc->numdesc; collabranch(desc, tempdsc, tempprt); if (!allcommonbases(tempprt, parent, &allsame) || moresteps(tempprt, parent)) { if (parent != added) { desc->collapse = nocollap; parent->collapse = nocollap; } return false; } else if (allsame) { if (parent != added) { desc->collapse = tocollap; parent->collapse = tocollap; } return true; } if (parent == added) parent = below; if ((start == item && parent == item) || (!multf && start == below && parent == below)) { memcpy(tempdsc->base, tempprt->base, endsite*sizeof(long)); memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); memcpy(tempdsc->oldbase, start->base, endsite*sizeof(long)); memcpy(tempdsc->oldnumsteps, start->numsteps, endsite*sizeof(long)); memcpy(tempprt->base, added->base, endsite*sizeof(long)); memcpy(tempprt->numsteps, added->numsteps, endsite*sizeof(long)); memcpy(tempprt->numnuc, added->numnuc, endsite*sizeof(nucarray)); tempprt->numdesc = added->numdesc; multifillin(tempprt, tempdsc, 0); if (!allcommonbases(tempprt, added, &allsame)) return false; else if (moresteps(tempprt, added)) return false; else if (allsame) return true; } return passdown(desc, parent, start, below, item, added, total, tempdsc, tempprt, multf); } /* trycollapdesc */ void setbottom(node *p) { /* set field bottom at node p */ node *q; p->bottom = true; q = p->next; do { q->bottom = false; q = q->next; } while (q != p); } /* setbottom */ boolean zeroinsubtree(node *subtree, node *start, node *below, node *item, node *added, node *total, node *tempdsc, node *tempprt, boolean multf, node* root, long *zeros) { /* sees if subtree contains a zero length branch */ node *p; if (!subtree->tip) { setbottom(subtree); p = subtree->next; do { if (p->back && !p->back->tip && !((p->back->collapse == nocollap) && (subtree->collapse == nocollap)) && (subtree->numdesc != 1)) { if ((p->back->collapse == tocollap) && (subtree->collapse == tocollap) && multf && (subtree != below)) return true; /* when root->numdesc == 2 * there is no mandatory step at the root, * instead of checking at the root we check around it * we only need to check p because the first if * statement already gets rid of it for the subtree */ else if ((p->back->index != root->index || root->numdesc > 2) && trycollapdesc(p->back, subtree, start, below, item, added, total, tempdsc, tempprt, multf, zeros)) return true; else if ((p->back->index == root->index && root->numdesc == 2) && !(root->next->back->tip) && !(root->next->next->back->tip) && trycollapdesc(root->next->back, root->next->next->back, start, below, item,added, total, tempdsc, tempprt, multf, zeros)) return true; } p = p->next; } while (p != subtree); p = subtree->next; do { if (p->back && !p->back->tip) { if (zeroinsubtree(p->back, start, below, item, added, total, tempdsc, tempprt, multf, root, zeros)) return true; } p = p->next; } while (p != subtree); } return false; } /* zeroinsubtree */ boolean collapsible(node *item, node *below, node *temp, node *temp1, node *tempdsc, node *tempprt, node *added, node *total, boolean multf, node *root, long *zeros, pointarray treenode) { /* sees if any branch can be collapsed */ node *belowbk; boolean allsame; if (multf) { memcpy(tempdsc->base, item->base, endsite*sizeof(long)); memcpy(tempdsc->numsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempdsc->oldbase, zeros, endsite*sizeof(long)); memcpy(tempdsc->oldnumsteps, zeros, endsite*sizeof(long)); memcpy(added->base, below->base, endsite*sizeof(long)); memcpy(added->numsteps, below->numsteps, endsite*sizeof(long)); memcpy(added->numnuc, below->numnuc, endsite*sizeof(nucarray)); added->numdesc = below->numdesc + 1; multifillin(added, tempdsc, 1); } else { fillin(added, item, below); added->numdesc = 2; } fillin(total, added, below->back); clearbottom(treenode); if (below->back) { if (zeroinsubtree(below->back, below->back, below, item, added, total, tempdsc, tempprt, multf, root, zeros)) return true; } if (multf) { if (zeroinsubtree(below, below, below, item, added, total, tempdsc, tempprt, multf, root, zeros)) return true; } else if (!below->tip) { if (zeroinsubtree(below, below, below, item, added, total, tempdsc, tempprt, multf, root, zeros)) return true; } if (!item->tip) { if (zeroinsubtree(item, item, below, item, added, total, tempdsc, tempprt, multf, root, zeros)) return true; } if (multf && below->back && !below->back->tip) { memcpy(tempdsc->base, zeros, endsite*sizeof(long)); memcpy(tempdsc->numsteps, zeros, endsite*sizeof(long)); memcpy(tempdsc->oldbase, added->base, endsite*sizeof(long)); memcpy(tempdsc->oldnumsteps, added->numsteps, endsite*sizeof(long)); if (below->back == treenode[below->back->index - 1]) belowbk = below->back->next; else belowbk = treenode[below->back->index - 1]; memcpy(tempprt->base, belowbk->base, endsite*sizeof(long)); memcpy(tempprt->numsteps, belowbk->numsteps, endsite*sizeof(long)); memcpy(tempprt->numnuc, belowbk->numnuc, endsite*sizeof(nucarray)); tempprt->numdesc = belowbk->numdesc - 1; multifillin(tempprt, tempdsc, -1); tempprt->numdesc += added->numdesc; collabranch(added, tempdsc, tempprt); if (!allcommonbases(tempprt, belowbk, &allsame)) return false; else if (allsame && !moresteps(tempprt, belowbk)) return true; else if (belowbk->back) { fillin(temp, tempprt, belowbk->back); fillin(temp1, belowbk, belowbk->back); return !moresteps(temp, temp1); } } return false; } /* collapsible */ void replaceback(node **oldback, node *item, node *forknode, node **grbg, long *zeros) { /* replaces back node of item with another */ node *p; p = forknode; while (p->next->back != item) p = p->next; *oldback = p->next; gnutreenode(grbg, &p->next, forknode->index, endsite, zeros); p->next->next = (*oldback)->next; p->next->back = (*oldback)->back; p->next->back->back = p->next; (*oldback)->next = (*oldback)->back = NULL; } /* replaceback */ void putback(node *oldback, node *item, node *forknode, node **grbg) { /* restores node to back of item */ node *p, *q; p = forknode; while (p->next != item->back) p = p->next; q = p->next; oldback->next = p->next->next; p->next = oldback; oldback->back = item; item->back = oldback; oldback->index = forknode->index; chuck(grbg, q); } /* putback */ void savelocrearr(node *item, node *forknode, node *below, node *tmp, node *tmp1, node *tmp2, node *tmp3, node *tmprm, node *tmpadd, node **root, long maxtrees, long *nextree, boolean multf, boolean bestever, boolean *saved, long *place, bestelm *bestrees, pointarray treenode, node **grbg, long *zeros) { /* saves tied or better trees during local rearrangements by removing item from forknode and adding to below */ node *other, *otherback = NULL, *oldfork, *nufork, *oldback; long pos; boolean found, collapse; if (forknode->numdesc == 2) { findbelow(&other, item, forknode); otherback = other->back; oldback = NULL; } else { other = NULL; replaceback(&oldback, item, forknode, grbg, zeros); } re_move(item, &oldfork, root, false, treenode, grbg, zeros); if (!multf) getnufork(&nufork, grbg, treenode, zeros); else nufork = NULL; addnsave(below, item, nufork, root, grbg, multf, treenode, place, zeros); pos = 0; findtree(&found, &pos, *nextree, place, bestrees); if (other) { add(other, item, oldfork, root, false, treenode, grbg, zeros); if (otherback->back != other) flipnodes(item, other); } else add(forknode, item, NULL, root, false, treenode, grbg, zeros); *saved = false; if (found) { if (oldback) putback(oldback, item, forknode, grbg); } else { if (oldback) chuck(grbg, oldback); re_move(item, &oldfork, root, true, treenode, grbg, zeros); collapse = collapsible(item, below, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, multf, *root, zeros, treenode); if (!collapse) { if (bestever) addbestever(&pos, nextree, maxtrees, collapse, place, bestrees); else addtiedtree(pos, nextree, maxtrees, collapse, place, bestrees); } if (other) add(other, item, oldfork, root, true, treenode, grbg, zeros); else add(forknode, item, NULL, root, true, treenode, grbg, zeros); *saved = !collapse; } } /* savelocrearr */ void clearvisited(pointarray treenode) { /* clears boolean visited at a node */ long i; node *p; for (i = 0; i < nonodes; i++) { treenode[i]->visited = false; if (!treenode[i]->tip) { p = treenode[i]->next; while (p != treenode[i]) { p->visited = false; p = p->next; } } } } /* clearvisited */ void hyprint(long b1, long b2, struct LOC_hyptrav *htrav, pointarray treenode, Char *basechar) { /* print out states in sites b1 through b2 at node */ long i, j, k, n; boolean dot; bases b; if (htrav->bottom) { if (!outgropt) fprintf(outfile, " "); else fprintf(outfile, "root "); } else fprintf(outfile, "%4ld ", htrav->r->back->index - spp); if (htrav->r->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[htrav->r->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", htrav->r->index - spp); if (htrav->bottom) fprintf(outfile, " "); else if (htrav->nonzero) fprintf(outfile, " yes "); else if (htrav->maybe) fprintf(outfile, " maybe "); else fprintf(outfile, " no "); for (i = b1; i <= b2; i++) { j = location[ally[i - 1] - 1]; htrav->tempset = htrav->r->base[j - 1]; htrav->anc = htrav->hypset[j - 1]; if (!htrav->bottom) htrav->anc = treenode[htrav->r->back->index - 1]->base[j - 1]; dot = dotdiff && (htrav->tempset == htrav->anc && !htrav->bottom); if (dot) putc('.', outfile); else if (htrav->tempset == (1 << A)) putc('A', outfile); else if (htrav->tempset == (1 << C)) putc('C', outfile); else if (htrav->tempset == (1 << G)) putc('G', outfile); else if (htrav->tempset == (1 << T)) putc('T', outfile); else if (htrav->tempset == (1 << O)) putc('-', outfile); else { k = 1; n = 0; for (b = A; b <= O; b = b + 1) { if (((1 << b) & htrav->tempset) != 0) n += k; k += k; } putc(basechar[n - 1], outfile); } if (i % 10 == 0) putc(' ', outfile); } putc('\n', outfile); } /* hyprint */ void gnubase(gbases **p, gbases **garbage, long endsite) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (*garbage != NULL) { *p = *garbage; *garbage = (*garbage)->next; } else { *p = (gbases *)Malloc(sizeof(gbases)); (*p)->base = (baseptr)Malloc(endsite*sizeof(long)); } (*p)->next = NULL; } /* gnubase */ void chuckbase(gbases *p, gbases **garbage) { /* collect garbage on p -- put it on front of garbage list */ p->next = *garbage; *garbage = p; } /* chuckbase */ void hyptrav(node *r_, long *hypset_, long b1, long b2, boolean bottom_, pointarray treenode, gbases **garbage, Char *basechar) { /* compute, print out states at one interior node */ struct LOC_hyptrav Vars; long i, j, k; long largest; gbases *ancset; nucarray *tempnuc; node *p, *q; Vars.bottom = bottom_; Vars.r = r_; Vars.hypset = hypset_; gnubase(&ancset, garbage, endsite); tempnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); Vars.maybe = false; Vars.nonzero = false; if (!Vars.r->tip) zeronumnuc(Vars.r, endsite); for (i = b1 - 1; i < b2; i++) { j = location[ally[i] - 1]; Vars.anc = Vars.hypset[j - 1]; if (!Vars.r->tip) { p = Vars.r->next; for (k = (long)A; k <= (long)O; k++) if (Vars.anc & (1 << k)) Vars.r->numnuc[j - 1][k]++; do { for (k = (long)A; k <= (long)O; k++) if (p->back->base[j - 1] & (1 << k)) Vars.r->numnuc[j - 1][k]++; p = p->next; } while (p != Vars.r); largest = getlargest(Vars.r->numnuc[j - 1]); Vars.tempset = 0; for (k = (long)A; k <= (long)O; k++) { if (Vars.r->numnuc[j - 1][k] == largest) Vars.tempset |= (1 << k); } Vars.r->base[j - 1] = Vars.tempset; } if (!Vars.bottom) Vars.anc = treenode[Vars.r->back->index - 1]->base[j - 1]; Vars.nonzero = (Vars.nonzero || (Vars.r->base[j - 1] & Vars.anc) == 0); Vars.maybe = (Vars.maybe || Vars.r->base[j - 1] != Vars.anc); } hyprint(b1, b2, &Vars, treenode, basechar); Vars.bottom = false; if (!Vars.r->tip) { memcpy(tempnuc, Vars.r->numnuc, endsite*sizeof(nucarray)); q = Vars.r->next; do { memcpy(Vars.r->numnuc, tempnuc, endsite*sizeof(nucarray)); for (i = b1 - 1; i < b2; i++) { j = location[ally[i] - 1]; for (k = (long)A; k <= (long)O; k++) if (q->back->base[j - 1] & (1 << k)) Vars.r->numnuc[j - 1][k]--; largest = getlargest(Vars.r->numnuc[j - 1]); ancset->base[j - 1] = 0; for (k = (long)A; k <= (long)O; k++) if (Vars.r->numnuc[j - 1][k] == largest) ancset->base[j - 1] |= (1 << k); if (!Vars.bottom) Vars.anc = ancset->base[j - 1]; } hyptrav(q->back, ancset->base, b1, b2, Vars.bottom, treenode, garbage, basechar); q = q->next; } while (q != Vars.r); } chuckbase(ancset, garbage); free(tempnuc); } /* hyptrav */ void hypstates(long chars, node *root, pointarray treenode, gbases **garbage, Char *basechar) { /* fill in and describe states at interior nodes */ /* used in dnacomp, dnapars, & dnapenny */ long i, n; baseptr nothing; fprintf(outfile, "\nFrom To Any Steps? State at upper node\n"); fprintf(outfile, " "); if (dotdiff) fprintf(outfile, " ( . means same as in the node below it on tree)\n"); nothing = (baseptr)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) nothing[i] = 0; for (i = 1; i <= ((chars - 1) / 40 + 1); i++) { putc('\n', outfile); n = i * 40; if (n > chars) n = chars; hyptrav(root, nothing, i * 40 - 39, n, true, treenode, garbage, basechar); } free(nothing); } /* hypstates */ void initbranchlen(node *p) { node *q; p->v = 0.0; if (p->back) p->back->v = 0.0; if (p->tip) return; q = p->next; while (q != p) { initbranchlen(q->back); q = q->next; } q = p->next; while (q != p) { q->v = 0.0; q = q->next; } } /* initbranchlen */ void initmin(node *p, long sitei, boolean internal) { long i; if (internal) { for (i = (long)A; i <= (long)O; i++) { p->cumlengths[i] = 0; p->numreconst[i] = 1; } } else { for (i = (long)A; i <= (long)O; i++) { if (p->base[sitei - 1] & (1 << i)) { p->cumlengths[i] = 0; p->numreconst[i] = 1; } else { p->cumlengths[i] = -1; p->numreconst[i] = 0; } } } } /* initmin */ void initbase(node *p, long sitei) { /* traverse tree to initialize base at internal nodes */ node *q; long i, largest; if (p->tip) return; q = p->next; while (q != p) { if (q->back) { memcpy(q->numnuc, p->numnuc, endsite*sizeof(nucarray)); for (i = (long)A; i <= (long)O; i++) { if (q->back->base[sitei - 1] & (1 << i)) q->numnuc[sitei - 1][i]--; } if (p->back) { for (i = (long)A; i <= (long)O; i++) { if (p->back->base[sitei - 1] & (1 << i)) q->numnuc[sitei - 1][i]++; } } largest = getlargest(q->numnuc[sitei - 1]); q->base[sitei - 1] = 0; for (i = (long)A; i <= (long)O; i++) { if (q->numnuc[sitei - 1][i] == largest) q->base[sitei - 1] |= (1 << i); } } q = q->next; } q = p->next; while (q != p) { initbase(q->back, sitei); q = q->next; } } /* initbase */ void inittreetrav(node *p, long sitei) { /* traverse tree to clear boolean initialized and set up base */ node *q; if (p->tip) { initmin(p, sitei, false); p->initialized = true; return; } q = p->next; while (q != p) { inittreetrav(q->back, sitei); q = q->next; } initmin(p, sitei, true); p->initialized = false; q = p->next; while (q != p) { initmin(q, sitei, true); q->initialized = false; q = q->next; } } /* inittreetrav */ void compmin(node *p, node *desc) { /* computes minimum lengths up to p */ long i, j, minn, cost, desclen, descrecon=0, maxx; maxx = 10 * spp; for (i = (long)A; i <= (long)O; i++) { minn = maxx; for (j = (long)A; j <= (long)O; j++) { if (transvp) { if ( ( ((i == (long)A) || (i == (long)G)) && ((j == (long)A) || (j == (long)G)) ) || ( ((j == (long)C) || (j == (long)T)) && ((i == (long)C) || (i == (long)T)) ) ) cost = 0; else cost = 1; } else { if (i == j) cost = 0; else cost = 1; } if (desc->cumlengths[j] == -1) { desclen = maxx; } else { desclen = desc->cumlengths[j]; } if (minn > cost + desclen) { minn = cost + desclen; descrecon = 0; } if (minn == cost + desclen) { descrecon += desc->numreconst[j]; } } p->cumlengths[i] += minn; p->numreconst[i] *= descrecon; } p->initialized = true; } /* compmin */ void minpostorder(node *p, pointarray treenode) { /* traverses an n-ary tree, computing minimum steps at each node */ node *q; if (p->tip) { return; } q = p->next; while (q != p) { if (q->back) minpostorder(q->back, treenode); q = q->next; } if (!p->initialized) { q = p->next; while (q != p) { if (q->back) compmin(p, q->back); q = q->next; } } } /* minpostorder */ void branchlength(node *subtr1, node *subtr2, double *brlen, pointarray treenode) { /* computes a branch length between two subtrees for a given site */ long i, j, minn, cost, nom, denom; node *temp; if (subtr1->tip) { temp = subtr1; subtr1 = subtr2; subtr2 = temp; } if (subtr1->index == outgrno) { temp = subtr1; subtr1 = subtr2; subtr2 = temp; } minpostorder(subtr1, treenode); minpostorder(subtr2, treenode); minn = 10 * spp; nom = 0; denom = 0; for (i = (long)A; i <= (long)O; i++) { for (j = (long)A; j <= (long)O; j++) { if (transvp) { if ( ( ((i == (long)A) || (i == (long)G)) && ((j == (long)A) || (j == (long)G)) ) || ( ((j == (long)C) || (j == (long)T)) && ((i == (long)C) || (i == (long)T)) ) ) cost = 0; else cost = 1; } else { if (i == j) cost = 0; else cost = 1; } if (subtr1->cumlengths[i] != -1 && (subtr2->cumlengths[j] != -1)) { if (subtr1->cumlengths[i] + cost + subtr2->cumlengths[j] < minn) { minn = subtr1->cumlengths[i] + cost + subtr2->cumlengths[j]; nom = 0; denom = 0; } if (subtr1->cumlengths[i] + cost + subtr2->cumlengths[j] == minn) { nom += subtr1->numreconst[i] * subtr2->numreconst[j] * cost; denom += subtr1->numreconst[i] * subtr2->numreconst[j]; } } } } *brlen = (double)nom/(double)denom; } /* branchlength */ void printbranchlengths(node *p) { node *q; long i; if (p->tip) return; q = p->next; do { fprintf(outfile, "%6ld ",q->index - spp); if (q->back->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[q->back->index - 1][i], outfile); } else fprintf(outfile, "%6ld ", q->back->index - spp); fprintf(outfile, " %f\n",q->v); if (q->back) printbranchlengths(q->back); q = q->next; } while (q != p); } /* printbranchlengths */ void branchlentrav(node *p, node *root, long sitei, long chars, double *brlen, pointarray treenode) { /* traverses the tree computing tree length at each branch */ node *q; if (p->tip) return; if (p->index == outgrno) p = p->back; q = p->next; do { if (q->back) { branchlength(q, q->back, brlen, treenode); q->v += ((weight[sitei - 1] / 10.0) * (*brlen)/chars); q->back->v += ((weight[sitei - 1] / 10.0) * (*brlen)/chars); if (!q->back->tip) branchlentrav(q->back, root, sitei, chars, brlen, treenode); } q = q->next; } while (q != p); } /* branchlentrav */ void treelength(node *root, long chars, pointarray treenode) { /* calls branchlentrav at each site */ long sitei; double trlen; initbranchlen(root); for (sitei = 1; sitei <= endsite; sitei++) { trlen = 0.0; initbase(root, sitei); inittreetrav(root, sitei); branchlentrav(root, root, sitei, chars, &trlen, treenode); } } /* treelength */ void coordinates(node *p, long *tipy, double f, long *fartemp) { /* establishes coordinates of nodes for display without lengths */ node *q, *first, *last; node *mid1 = NULL, *mid2 = NULL; long numbranches, numb2; if (p->tip) { p->xcoord = 0; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; return; } numbranches = 0; q = p->next; do { coordinates(q->back, tipy, f, fartemp); numbranches += 1; q = q->next; } while (p != q); first = p->next->back; q = p->next; while (q->next != p) q = q->next; last = q->back; numb2 = 1; q = p->next; while (q != p) { if (numb2 == (long)(numbranches + 1)/2) mid1 = q->back; if (numb2 == (long)(numbranches/2 + 1)) mid2 = q->back; numb2 += 1; q = q->next; } p->xcoord = (long)((double)(last->ymax - first->ymin) * f); p->ycoord = (long)((mid1->ycoord + mid2->ycoord) / 2); p->ymin = first->ymin; p->ymax = last->ymax; if (p->xcoord > *fartemp) *fartemp = p->xcoord; } /* coordinates */ void drawline(long i, double scale, node *root) { /* draws one row of the tree diagram by moving up tree */ node *p, *q, *r, *first =NULL, *last =NULL; long n, j; boolean extra, done, noplus; p = root; q = root; extra = false; noplus = false; if (i == (long)p->ycoord && p == root) { if (p->index - spp >= 10) fprintf(outfile, " %2ld", p->index - spp); else fprintf(outfile, " %ld", p->index - spp); extra = true; noplus = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)(scale * (p->xcoord - q->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if (noplus) { putc('-', outfile); noplus = false; } else putc('+', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; noplus = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && i != (long)p->ycoord) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } noplus = false; } else { for (j = 1; j <= n; j++) putc(' ', outfile); noplus = false; } if (p != q) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); } /* drawline */ void printree(node *root, double f) { /* prints out diagram of the tree */ /* used in dnacomp, dnapars, & dnapenny */ long i, tipy, dummy; double scale; putc('\n', outfile); if (!treeprint) return; putc('\n', outfile); tipy = 1; dummy = 0; coordinates(root, &tipy, f, &dummy); scale = 1.5; putc('\n', outfile); for (i = 1; i <= (tipy - down); i++) drawline(i, scale, root); fprintf(outfile, "\n remember:"); if (outgropt) fprintf(outfile, " (although rooted by outgroup)"); fprintf(outfile, " this is an unrooted tree!\n\n"); } /* printree */ void writesteps(long chars, boolean weights, steptr oldweight, node *root) { /* used in dnacomp, dnapars, & dnapenny */ long i, j, k, l; putc('\n', outfile); if (weights) fprintf(outfile, "weighted "); fprintf(outfile, "steps in each site:\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%4ld", i); fprintf(outfile, "\n *------------------------------------"); fprintf(outfile, "-----\n"); for (i = 0; i <= (chars / 10); i++) { fprintf(outfile, "%5ld", i * 10); putc('|', outfile); for (j = 0; j <= 9; j++) { k = i * 10 + j; if (k == 0 || k > chars) fprintf(outfile, " "); else { l = location[ally[k - 1] - 1]; if (oldweight[k - 1] > 0) fprintf(outfile, "%4ld", oldweight[k - 1] * (root->numsteps[l - 1] / weight[l - 1])); else fprintf(outfile, " 0"); } } putc('\n', outfile); } } /* writesteps */ void treeout(node *p, long nextree, long *col, node *root) { /* write out file with representation of final tree */ /* used in dnacomp, dnamove, dnapars, & dnapenny */ node *q; long i, n; Char c; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { putc('(', outtree); (*col)++; q = p->next; while (q != p) { treeout(q->back, nextree, col, root); q = q->next; if (q == p) break; putc(',', outtree); (*col)++; if (*col > 60) { putc('\n', outtree); *col = 0; } } putc(')', outtree); (*col)++; } if (p != root) return; if (nextree > 2) fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); else fprintf(outtree, ";\n"); } /* treeout */ void treeout3(node *p, long nextree, long *col, node *root) { /* write out file with representation of final tree */ /* used in dnapars -- writes branch lengths */ node *q; long i, n, w; double x; Char c; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index - 1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index - 1][i]; if (c == ' ') c = '_'; putc(c, outtree); } *col += n; } else { putc('(', outtree); (*col)++; q = p->next; while (q != p) { treeout3(q->back, nextree, col, root); q = q->next; if (q == p) break; putc(',', outtree); (*col)++; if (*col > 60) { putc('\n', outtree); *col = 0; } } putc(')', outtree); (*col)++; } x = p->v; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p != root) { fprintf(outtree, ":%*.5f", (int)(w + 7), x); *col += w + 8; } if (p != root) return; if (nextree > 2) fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); else fprintf(outtree, ";\n"); } /* treeout3 */ /* FIXME curtree should probably be passed by reference */ void drawline2(long i, double scale, tree curtree) { fdrawline2(outfile, i, scale, &curtree); } void fdrawline2(FILE *fp, long i, double scale, tree *curtree) { /* draws one row of the tree diagram by moving up tree */ /* used in dnaml & restml */ node *p, *q; long n, j; boolean extra; node *r, *first =NULL, *last =NULL; boolean done; p = curtree->start; q = curtree->start; extra = false; if (i == (long)p->ycoord && p == curtree->start) { if (p->index - spp >= 10) fprintf(fp, " %2ld", p->index - spp); else fprintf(fp, " %ld", p->index - spp); extra = true; } else fprintf(fp, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || (p != curtree->start && r == p) || (p == curtree->start && r == p->next))); first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; if (p == curtree->start) last = p->back; } done = (p->tip || p == q); n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if ((long)p->ycoord != (long)q->ycoord) putc('+', fp); else putc('-', fp); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', fp); if (q->index - spp >= 10) fprintf(fp, "%2ld", q->index - spp); else fprintf(fp, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', fp); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && (i != (long)p->ycoord || p == curtree->start)) { putc('|', fp); for (j = 1; j < n; j++) putc(' ', fp); } else { for (j = 1; j <= n; j++) putc(' ', fp); } } else { for (j = 1; j <= n; j++) putc(' ', fp); } if (q != p) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index-1][j], fp); } putc('\n', fp); } /* drawline2 */ void drawline3(long i, double scale, node *start) { /* draws one row of the tree diagram by moving up tree */ /* used in dnapars */ node *p, *q; long n, j; boolean extra; node *r, *first =NULL, *last =NULL; boolean done; p = start; q = start; extra = false; if (i == (long)p->ycoord) { if (p->index - spp >= 10) fprintf(outfile, " %2ld", p->index - spp); else fprintf(outfile, " %ld", p->index - spp); extra = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || (r == p))); first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; } done = (p->tip || p == q); n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if ((long)p->ycoord != (long)q->ycoord) putc('+', outfile); else putc('-', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if ((long)last->ycoord > i && (long)first->ycoord < i && (i != (long)p->ycoord || p == start)) { putc('|', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } } else { for (j = 1; j <= n; j++) putc(' ', outfile); } if (q != p) p = q; } while (!done); if ((long)p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index-1][j], outfile); } putc('\n', outfile); } /* drawline3 */ void copynode(node *c, node *d, long categs) { long i, j; for (i = 0; i < endsite; i++) for (j = 0; j < categs; j++) memcpy(d->x[i][j], c->x[i][j], sizeof(sitelike)); memcpy(d->underflows,c->underflows,sizeof(double) * endsite); d->tyme = c->tyme; d->v = c->v; d->xcoord = c->xcoord; d->ycoord = c->ycoord; d->ymin = c->ymin; d->ymax = c->ymax; d->iter = c->iter; /* iter used in dnaml only */ d->haslength = c->haslength; /* haslength used in dnamlk only */ d->initialized = c->initialized; /* initialized used in dnamlk only */ } /* copynode */ void prot_copynode(node *c, node *d, long categs) { /* a version of copynode for proml */ long i, j; for (i = 0; i < endsite; i++) for (j = 0; j < categs; j++) memcpy(d->protx[i][j], c->protx[i][j], sizeof(psitelike)); memcpy(d->underflows,c->underflows,sizeof(double) * endsite); d->tyme = c->tyme; d->v = c->v; d->xcoord = c->xcoord; d->ycoord = c->ycoord; d->ymin = c->ymin; d->ymax = c->ymax; d->iter = c->iter; /* iter used in dnaml only */ d->haslength = c->haslength; /* haslength used in dnamlk only */ d->initialized = c->initialized; /* initialized used in dnamlk only */ } /* prot_copynode */ void copy_(tree *a, tree *b, long nonodes, long categs) { /* used in dnamlk */ long i; node *p, *q, *r, *s, *t; for (i = 0; i < spp; i++) { copynode(a->nodep[i], b->nodep[i], categs); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes; i++) { if (a->nodep[i]) { p = a->nodep[i]; q = b->nodep[i]; r = p; do { copynode(p, q, categs); if (p->back) { s = a->nodep[p->back->index - 1]; t = b->nodep[p->back->index - 1]; if (s->tip) { if(p->back == s) q->back = t; } else { do { if (p->back == s) q->back = t; s = s->next; t = t->next; } while (s != a->nodep[p->back->index - 1]); } } else q->back = NULL; p = p->next; q = q->next; } while (p != r); } } b->likelihood = a->likelihood; b->start = a->start; /* start used in dnaml only */ b->root = a->root; /* root used in dnamlk only */ } /* copy_ */ void prot_copy_(tree *a, tree *b, long nonodes, long categs) { /* used in promlk */ /* identical to copy_() except for calls to prot_copynode rather */ /* than copynode. */ long i; node *p, *q, *r, *s, *t; for (i = 0; i < spp; i++) { prot_copynode(a->nodep[i], b->nodep[i], categs); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next ) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes; i++) { if (a->nodep[i]) { p = a->nodep[i]; q = b->nodep[i]; r = p; do { prot_copynode(p, q, categs); if (p->back) { s = a->nodep[p->back->index - 1]; t = b->nodep[p->back->index - 1]; if (s->tip) { if(p->back == s) q->back = t; } else { do { if (p->back == s) q->back = t; s = s->next; t = t->next; } while (s != a->nodep[p->back->index - 1]); } } else q->back = NULL; p = p->next; q = q->next; } while (p != r); } } b->likelihood = a->likelihood; b->start = a->start; /* start used in dnaml only */ b->root = a->root; /* root used in dnamlk only */ } /* prot_copy_ */ void standev(long chars, long numtrees, long minwhich, double minsteps, double *nsteps, long **fsteps, longer seed) { /* compute and write standard deviation of user trees */ /* used in dnapars & protpars */ long i, j, k; double wt, sumw, sum, sum2, sd; double temp; double **covar, *P, *f; #define SAMPLES 1000 /* ????? if numtrees too big for Shimo, truncate */ if (numtrees == 2) { fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); fprintf(outfile, "Tree Steps Diff Steps Its S.D."); fprintf(outfile, " Significantly worse?\n\n"); which = 1; while (which <= numtrees) { fprintf(outfile, "%3ld%10.1f", which, nsteps[which - 1] / 10); if (minwhich == which) fprintf(outfile, " <------ best\n"); else { sumw = 0.0; sum = 0.0; sum2 = 0.0; for (i = 0; i < chars; i++) { if (weight[i] > 0) { wt = weight[i] / 10.0; sumw += wt; temp = (fsteps[which - 1][i] - fsteps[minwhich - 1][i]) / 10.0; sum += temp; sum2 += temp * temp / wt; } } temp = sum / sumw; sd = sqrt(sumw / (sumw - 1.0) * (sum2 - temp * temp)); fprintf(outfile, "%10.1f%12.4f", (nsteps[which - 1] - minsteps) / 10, sd); if ((sum > 0.0) && (sum > 1.95996 * sd)) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } which++; } fprintf(outfile, "\n\n"); } else { /* Shimodaira-Hasegawa test using normal approximation */ if(numtrees > MAXSHIMOTREES){ fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" , MAXSHIMOTREES, numtrees); numtrees = MAXSHIMOTREES; } else { fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); } covar = (double **)Malloc(numtrees*sizeof(double *)); sumw = 0.0; for (i = 0; i < chars; i++) sumw += weight[i]; for (i = 0; i < numtrees; i++) covar[i] = (double *)Malloc(numtrees*sizeof(double)); for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ sum = nsteps[i]/(10.0*sumw); for (j = 0; j <=i; j++) { sum2 = nsteps[j]/(10.0*sumw); temp = 0.0; for (k = 0; k < chars; k++) { if (weight[k] > 0) { wt = weight[k]/10.0; temp = temp + wt*(fsteps[i][k]/(10.0*wt)-sum) *(fsteps[j][k]/(10.0*wt)-sum2); } } covar[i][j] = temp; if (i != j) covar[j][i] = temp; } } for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition of trees x trees covariance matrix */ sum = 0.0; for (j = 0; j <= i-1; j++) sum = sum + covar[i][j] * covar[i][j]; temp = sqrt(covar[i][i] - sum); covar[i][i] = temp; for (j = i+1; j < numtrees; j++) { sum = 0.0; for (k = 0; k < i; k++) sum = sum + covar[i][k] * covar[j][k]; if (fabs(temp) < 1.0E-12) covar[j][i] = 0.0; else covar[j][i] = (covar[j][i] - sum)/temp; } } f = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ for (i = 0; i < numtrees; i++) P[i] = 0.0; sum2 = nsteps[0]/10.0; /* sum2 will be smallest # of steps */ for (i = 1; i < numtrees; i++) if (sum2 > nsteps[i]/10.0) sum2 = nsteps[i]/10.0; for (i = 1; i < SAMPLES; i++) { /* loop over resampled trees */ for (j = 0; j < numtrees; j++) { /* draw vectors */ sum = 0.0; for (k = 0; k <= j; k++) sum += normrand(seed)*covar[j][k]; f[j] = sum; } sum = f[1]; for (j = 1; j < numtrees; j++) /* get min of vector */ if (f[j] < sum) sum = f[j]; for (j = 0; j < numtrees; j++) /* accumulate P's */ if (nsteps[j]/10.0-sum2 <= f[j] - sum) P[j] += 1.0/SAMPLES; } fprintf(outfile, "Tree Steps Diff Steps P value"); fprintf(outfile, " Significantly worse?\n\n"); for (i = 0; i < numtrees; i++) { fprintf(outfile, "%3ld%10.1f", i+1, nsteps[i]/10); if ((minwhich-1) == i) fprintf(outfile, " <------ best\n"); else { fprintf(outfile, " %9.1f %10.3f", nsteps[i]/10.0-sum2, P[i]); if (P[i] < 0.05) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } } fprintf(outfile, "\n"); free(P); /* free the variables we Malloc'ed */ free(f); for (i = 0; i < numtrees; i++) free(covar[i]); free(covar); } } /* standev */ void standev2(long numtrees, long maxwhich, long a, long b, double maxlogl, double *l0gl, double **l0gf, steptr aliasweight, longer seed) { /* compute and write standard deviation of user trees */ /* used in dnaml, dnamlk, proml, promlk, and restml */ double **covar, *P, *f; long i, j, k; double wt, sumw, sum, sum2, sd; double temp; #define SAMPLES 1000 /* ????? if numtrees too big for Shimo, truncate */ if (numtrees == 2) { fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); fprintf(outfile, "Tree logL Diff logL Its S.D."); fprintf(outfile, " Significantly worse?\n\n"); which = 1; while (which <= numtrees) { fprintf(outfile, "%3ld %9.1f", which, l0gl[which - 1]); if (maxwhich == which) fprintf(outfile, " <------ best\n"); else { sumw = 0.0; sum = 0.0; sum2 = 0.0; for (i = a; i <= b; i++) { if (aliasweight[i] > 0) { wt = aliasweight[i]; sumw += wt; temp = l0gf[which - 1][i] - l0gf[maxwhich - 1][i]; sum += temp; sum2 += temp * temp / wt; } } temp = sum / sumw; sd = sqrt(sumw / (sumw - 1.0) * (sum2 - temp * temp)); fprintf(outfile, "%10.1f %11.4f", (l0gl[which - 1])-maxlogl, sd); if ((sum < 0.0) && ((-sum) > 1.95996 * sd)) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } which++; } fprintf(outfile, "\n\n"); } else { /* Shimodaira-Hasegawa test using normal approximation */ if(numtrees > MAXSHIMOTREES){ fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" , MAXSHIMOTREES, numtrees); numtrees = MAXSHIMOTREES; } else { fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); } covar = (double **)Malloc(numtrees*sizeof(double *)); sumw = 0.0; for (i = a; i <= b; i++) sumw += aliasweight[i]; for (i = 0; i < numtrees; i++) covar[i] = (double *)Malloc(numtrees*sizeof(double)); for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ sum = l0gl[i]/sumw; for (j = 0; j <=i; j++) { sum2 = l0gl[j]/sumw; temp = 0.0; for (k = a; k <= b ; k++) { if (aliasweight[k] > 0) { wt = aliasweight[k]; temp = temp + wt*(l0gf[i][k]/(10.0*wt)-sum) *(l0gf[j][k]/(10.0*wt)-sum2); } } covar[i][j] = temp; if (i != j) covar[j][i] = temp; } } for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition of trees x trees covariance matrix */ sum = 0.0; for (j = 0; j <= i-1; j++) sum = sum + covar[i][j] * covar[i][j]; temp = sqrt(covar[i][i] - sum); covar[i][i] = temp; for (j = i+1; j < numtrees; j++) { sum = 0.0; for (k = 0; k < i; k++) sum = sum + covar[i][k] * covar[j][k]; if (fabs(temp) < 1.0E-12) covar[j][i] = 0.0; else covar[j][i] = (covar[j][i] - sum)/temp; } } f = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ for (i = 0; i < numtrees; i++) P[i] = 0.0; for (i = 1; i < SAMPLES; i++) { /* loop over resampled trees */ for (j = 0; j < numtrees; j++) { /* draw vectors */ sum = 0.0; for (k = 0; k <= j; k++) sum += normrand(seed)*covar[j][k]; f[j] = sum; } sum = f[1]; for (j = 1; j < numtrees; j++) /* get max of vector */ if (f[j] > sum) sum = f[j]; for (j = 0; j < numtrees; j++) /* accumulate P's */ if (maxlogl-l0gl[j] <= sum-f[j]) P[j] += 1.0/SAMPLES; } fprintf(outfile, "Tree logL Diff logL P value"); fprintf(outfile, " Significantly worse?\n\n"); for (i = 0; i < numtrees; i++) { fprintf(outfile, "%3ld%10.1f", i+1, l0gl[i]); if ((maxwhich-1) == i) fprintf(outfile, " <------ best\n"); else { fprintf(outfile, " %9.1f %10.3f", l0gl[i]-maxlogl, P[i]); if (P[i] < 0.05) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } } fprintf(outfile, "\n"); free(P); /* free the variables we Malloc'ed */ free(f); for (i = 0; i < numtrees; i++) free(covar[i]); free(covar); } } /* standev */ void freetip(node *anode) { /* used in dnacomp, dnapars, & dnapenny */ free(anode->numsteps); free(anode->oldnumsteps); free(anode->base); free(anode->oldbase); } /* freetip */ void freenontip(node *anode) { /* used in dnacomp, dnapars, & dnapenny */ free(anode->numsteps); free(anode->oldnumsteps); free(anode->base); free(anode->oldbase); free(anode->numnuc); } /* freenontip */ void freenodes(long nonodes, pointarray treenode) { /* used in dnacomp, dnapars, & dnapenny */ long i; node *p; for (i = 0; i < spp; i++) freetip(treenode[i]); for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]->next; do { freenontip(p); p = p->next; } while (p != treenode[i]); freenontip(p); } } } /* freenodes */ void freenode(node **anode) { /* used in dnacomp, dnapars, & dnapenny */ freenontip(*anode); free(*anode); } /* freenode */ void freetree(long nonodes, pointarray treenode) { /* used in dnacomp, dnapars, & dnapenny */ long i; node *p, *q; for (i = 0; i < spp; i++) free(treenode[i]); for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]->next; do { q = p->next; free(p); p = q; } while (p != treenode[i]); free(p); } } free(treenode); } /* freetree */ void prot_freex_notip(long nonodes, pointarray treenode) { /* used in proml */ long i, j; node *p; for (i = spp; i < nonodes; i++) { p = treenode[i]; do { for (j = 0; j < endsite; j++){ free(p->protx[j]); p->protx[j] = NULL; } free(p->underflows); p->underflows = NULL; free(p->protx); p->protx = NULL; p = p->next; } while (p != treenode[i]); } } /* prot_freex_notip */ void prot_freex(long nonodes, pointarray treenode) { /* used in proml */ long i, j; node *p; for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(treenode[i]->protx[j]); free(treenode[i]->protx); free(treenode[i]->underflows); } for (i = spp; i < nonodes; i++) { p = treenode[i]; do { for (j = 0; j < endsite; j++) free(p->protx[j]); free(p->protx); free(p->underflows); p = p->next; } while (p != treenode[i]); } } /* prot_freex */ void freex_notip(long nonodes, pointarray treenode) { /* used in dnaml & dnamlk */ long i, j; node *p; for (i = spp; i < nonodes; i++) { p = treenode[i]; do { for (j = 0; j < endsite; j++) free(p->x[j]); free(p->underflows); free(p->x); p = p->next; } while (p != treenode[i]); } } /* freex_notip */ void freex(long nonodes, pointarray treenode) { /* used in dnaml & dnamlk */ long i, j; node *p; for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(treenode[i]->x[j]); free(treenode[i]->x); free(treenode[i]->underflows); } for (i = spp; i < nonodes; i++) { if(treenode[i]){ p = treenode[i]; do { for (j = 0; j < endsite; j++) free(p->x[j]); free(p->x); free(p->underflows); p = p->next; } while (p != treenode[i]); } } } /* freex */ void freegarbage(gbases **garbage) { /* used in dnacomp, dnapars, & dnapenny */ gbases *p; while (*garbage) { p = *garbage; *garbage = (*garbage)->next; free(p->base); free(p); } } /*freegarbage */ void freegrbg(node **grbg) { /* used in dnacomp, dnapars, & dnapenny */ node *p; while (*grbg) { p = *grbg; *grbg = (*grbg)->next; freenontip(p); free(p); } } /*freegrbg */ void collapsetree(node *p, node *root, node **grbg, pointarray treenode, long *zeros) { /* Recurse through tree searching for zero length brances between */ /* nodes (not to tips). If one exists, collapse the nodes together, */ /* removing the branch. */ node *q, *x1, *y1, *x2, *y2; long i, /*j,*/ index, index2, numd; if (p->tip) return; q = p->next; do { if (!q->back->tip && q->v == 0.000000) { /* merge the two nodes. */ x1 = y2 = q->next; x2 = y1 = q->back->next; while(x1->next != q) x1 = x1-> next; while(y1->next != q->back) y1 = y1-> next; x1->next = x2; y1->next = y2; index = q->index; index2 = q->back->index; numd = treenode[index-1]->numdesc + q->back->numdesc -1; chuck(grbg, q->back); chuck(grbg, q); q = x2; /* update the indices around the node circle */ do{ if(q->index != index){ q->index = index; } q = q-> next; }while(x2 != q); updatenumdesc(treenode[index-1], root, numd); /* Alter treenode to point to real nodes, and update indices */ /* accordingly. */ /*j = 0;*/ i=0; for(i = (index2-1); i < nonodes-1 && treenode[i+1]; i++){ treenode[i]=treenode[i+1]; treenode[i+1] = NULL; x1=x2=treenode[i]; do{ x1->index = i+1; x1 = x1 -> next; } while(x1 != x2); } /* Create a new empty fork in the blank spot of treenode */ x1=NULL; for(i=1; i <=3 ; i++){ gnutreenode(grbg, &x2, index2, endsite, zeros); x2->next = x1; x1 = x2; } x2->next->next->next = x2; treenode[nonodes-1]=x2; if (q->back) collapsetree(q->back, root, grbg, treenode, zeros); } else { if (q->back) collapsetree(q->back, root, grbg, treenode, zeros); q = q->next; } } while (q != p); } /* collapsetree */ void collapsebestrees(node **root, node **grbg, pointarray treenode, bestelm *bestrees, long *place, long *zeros, long chars, boolean recompute, boolean progress) { /* Goes through all best trees, collapsing trees where possible, and */ /* deleting trees that are not unique. */ long i,j, k, pos, nextnode, oldnextree; boolean found; node *dummy; oldnextree = nextree; for(i = 0 ; i < (oldnextree - 1) ; i++){ bestrees[i].collapse = true; } if(progress) printf("Collapsing best trees\n "); k = 0; for(i = 0 ; i < (oldnextree - 1) ; i++){ if(progress){ if(i % (((oldnextree-1) / 72) + 1) == 0) putchar('.'); fflush(stdout); } while(!bestrees[k].collapse) k++; /* Reconstruct tree. */ *root = treenode[0]; add(treenode[0], treenode[1], treenode[spp], root, recompute, treenode, grbg, zeros); nextnode = spp + 2; for (j = 3; j <= spp; j++) { if (bestrees[k].btree[j - 1] > 0) add(treenode[bestrees[k].btree[j - 1] - 1], treenode[j - 1], treenode[nextnode++ - 1], root, recompute, treenode, grbg, zeros); else add(treenode[treenode[-bestrees[k].btree[j - 1]-1]->back->index-1], treenode[j - 1], NULL, root, recompute, treenode, grbg, zeros); } reroot(treenode[outgrno - 1], *root); treelength(*root, chars, treenode); collapsetree(*root, *root, grbg, treenode, zeros); savetree(*root, place, treenode, grbg, zeros); /* move everything down in the bestree list */ for(j = k ; j < (nextree - 2) ; j++){ memcpy(bestrees[j].btree, bestrees[j + 1].btree, spp * sizeof(long)); bestrees[j].gloreange = bestrees[j + 1].gloreange; bestrees[j + 1].gloreange = false; bestrees[j].locreange = bestrees[j + 1].locreange; bestrees[j + 1].locreange = false; bestrees[j].collapse = bestrees[j + 1].collapse; } pos=0; findtree(&found, &pos, nextree-1, place, bestrees); /* put the new tree at the end of the list if it wasn't found */ nextree--; if(!found) addtree(pos, &nextree, false, place, bestrees); /* Deconstruct the tree */ for (j = 1; j < spp; j++){ re_move(treenode[j], &dummy, root, recompute, treenode, grbg, zeros); } } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } } PHYLIPNEW-3.69.650/src/clique.c0000664000175000017500000010461111305225544012507 00000000000000#include "phylip.h" #include "disc.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Jerry Shurman, Hisashi Horino, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define FormWide 80 /* width of outfile page */ AjPPhyloState* phylostates; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylofact = NULL; AjPPhyloProp phyloweights = NULL; typedef boolean *aPtr; typedef long *SpPtr, *ChPtr; typedef struct vecrec { aPtr vec; struct vecrec *next; } vecrec; typedef vecrec **aDataPtr; typedef vecrec **Matrix; #ifndef OLDC /* function prototypes */ void clique_gnu(vecrec **); void clique_chuck(vecrec *); void nunode(node **); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void clique_setuptree(void); void allocrest(void); void doinit(void); void clique_inputancestors(void); void clique_printancestors(void); void clique_inputfactors(void); void inputoptions(void); void clique_inputdata(void); boolean Compatible(long, long); void SetUp(vecrec **); void Intersect(boolean *, boolean *, boolean *); long CountStates(boolean *); void Gen1(long , long, boolean *, boolean *, boolean *); boolean Ingroupstate(long ); void makeset(void); void Init(long *, long *, long *, aPtr); void ChSort(long *, long *, long); void PrintClique(boolean *); void bigsubset(long *, long); void recontraverse(node **, long *, long, long); void reconstruct(long, long); void reroot(node *); void clique_coordinates(node *, long *, long); void clique_drawline(long); void clique_printree(void); void DoAll(boolean *, boolean *, boolean *, long); void Gen2(long, long, boolean *, boolean *, boolean *); void GetMaxCliques(vecrec **); void reallocchars(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; long ActualChars, Cliqmin, outgrno, col, ith, msets, setsz; boolean ancvar, Clmin, Factors, outgropt, trout, weights, noroot, justwts, printcomp, progress, treeprint, mulsets, firstset; long nodes; aPtr ancone; Char *Factor; long *ActChar, *oldweight; aDataPtr Data; Matrix Comp; /* the character compatibility matrix */ node *root; long **grouping; pointptr treenode = NULL; /* pointers to all nodes in tree */ vecrec *garbage; /* these variables are to DoAll in the pascal Version. */ aPtr aChars; boolean *Rarer; long n, MaxChars; SpPtr SpOrder; ChPtr ChOrder; /* variables for GetMaxCliques: */ vecrec **Comp2; long tcount; aPtr Temp, Processed, Rarer2; void clique_gnu(vecrec **p) { /* this and the following are do-it-yourself garbage collectors. Make a new node or pull one off the garbage list */ if (garbage != NULL) { *p = garbage; garbage = garbage->next; } else { *p = (vecrec *)Malloc((long)sizeof(vecrec)); (*p)->vec = (aPtr)Malloc((long)chars*sizeof(boolean)); } (*p)->next = NULL; } /* clique_gnu */ void clique_chuck(vecrec *p) { /* collect garbage on p -- put it on front of garbage list */ p->next = garbage; garbage = p; } /* clique_chuck */ void nunode(node **p) { /* replacement for NEW */ *p = (node *)Malloc((long)sizeof(node)); (*p)->next = NULL; (*p)->tip = false; } /* nunode */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs=0; ajint numwts=0; ancvar = false; Clmin = false; Factors = false; outgrno = 1; outgropt = false; trout = true; weights = false; justwts = false; printdata = false; printcomp = false; progress = true; treeprint = true; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); while (phylostates[numseqs]) numseqs++; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; phylofact = ajAcdGetProperties("factorfile"); if(phylofact) Factors = true; Cliqmin = ajAcdGetInt("cliqmin"); if(Cliqmin != 0) Clmin = true; outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); printcomp = ajAcdGetBoolean("printcomp"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nLargest clique program, version %s\n\n",VERSION); } /* emboss_getoptions */ void clique_setuptree(void) { /* initialization of tree pointers, variables */ long i; if(!treenode) treenode = (pointptr)Malloc((long)spp*sizeof(node *)); for (i = 0; i < spp; i++) { if(!treenode[i]) treenode[i] = (node *)Malloc((long)sizeof(node)); treenode[i]->next = NULL; treenode[i]->back = NULL; treenode[i]->index = i + 1; treenode[i]->tip = false; } } /* clique_setuptree */ void reallocchars(void) { long i; Comp = (Matrix)Malloc((long)chars*sizeof(vecrec *)); for (i = 0; i < (chars); i++) clique_gnu(&Comp[i]); ancone = (aPtr)Malloc((long)chars*sizeof(boolean)); Factor = (Char *)Malloc((long)chars*sizeof(Char)); ActChar = (long *)Malloc((long)chars*sizeof(long)); oldweight = (long *)Malloc((long)chars*sizeof(long)); weight = (long *)Malloc((long)chars*sizeof(long)); ActualChars = chars; for (i = 1; i <= (chars); i++) ActChar[i - 1] = i; } void allocrest(void) { long i; Data = (aDataPtr)Malloc((long)spp*sizeof(vecrec *)); for (i = 0; i < (spp); i++) clique_gnu(&Data[i]); Comp = (Matrix)Malloc((long)chars*sizeof(vecrec *)); for (i = 0; i < (chars); i++) clique_gnu(&Comp[i]); setsz = (long)ceil(((double)spp+1.0)/(double)SETBITS); ancone = (aPtr)Malloc((long)chars*sizeof(boolean)); Factor = (Char *)Malloc((long)chars*sizeof(Char)); ActChar = (long *)Malloc((long)chars*sizeof(long)); oldweight = (long *)Malloc((long)chars*sizeof(long)); weight = (long *)Malloc((long)chars*sizeof(long)); nayme = (naym *)Malloc((long)spp*sizeof(naym)); } /* allocrest */ void doinit(void) { /* initializes variables */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); if (printdata) fprintf(outfile, "%2ld species, %3ld characters\n", spp, chars); clique_setuptree(); allocrest(); } /* doinit */ void clique_inputancestors(void) { /* reads the ancestral states for each character */ long i; Char ch = ' '; for (i = 0; i < (chars); i++) { ch = ajStrGetCharPos(phyloanc->Str[0], i); switch (ch) { case '1': ancone[i] = true; break; case '0': ancone[i] = false; break; default: printf("BAD ANCESTOR STATE: %c AT CHARACTER %4ld\n", ch, i + 1); ajExitBad(); } } } /* clique_inputancestors */ void clique_printancestors(void) { /* print out list of ancestral states */ long i; fprintf(outfile, "Ancestral states:\n"); for (i = 1; i <= nmlngth + 2; i++) putc(' ', outfile); for (i = 1; i <= (chars); i++) { newline(outfile, i, 55, (long)nmlngth + 1); if (ancone[i - 1]) putc('1', outfile); else putc('0', outfile); if (i % 5 == 0) putc(' ', outfile); } fprintf(outfile, "\n\n"); } /* clique_printancestors */ void clique_inputfactors(void) { /* reads the factor symbols */ long i; ActualChars = 1; for (i = 1; i <= (chars); i++) { Factor[i - 1] = ajStrGetCharPos(phylofact->Str[0], i-1); if (i > 1) { if (Factor[i - 1] != Factor[i - 2]) ActualChars++; } ActChar[i - 1] = ActualChars; } } /* clique_inputfactors */ void inputoptions(void) { /* reads the species names and character data */ long i; if(justwts){ if(!firstset) samenumspstate(phylostates[ith-1], &chars, ith); if(firstset){ ActualChars = chars; for (i = 1; i <= (chars); i++) ActChar[i - 1] = i; } else reallocchars(); for (i = 0; i < (chars); i++) oldweight[i] = 1; inputweightsstr(phyloweights->Str[ith-1], chars, oldweight, &weights); if(firstset && ancvar) clique_inputancestors(); if(firstset && Factors) clique_inputfactors(); if (printdata) printweights(outfile, 0, ActualChars, oldweight, "Characters"); if (Factors) printfactors(outfile, chars, Factor, ""); if (firstset && ancvar && printdata) clique_printancestors(); noroot = !(outgropt || ancvar); } else { ActualChars = chars; for (i = 1; i <= (chars); i++) ActChar[i - 1] = i; for (i = 0; i < (chars); i++) oldweight[i] = 1; if(weights) inputweightsstr(phyloweights->Str[0], chars, oldweight, &weights); if(ancvar) clique_inputancestors(); if(Factors) clique_inputfactors(); if (weights && printdata) printweights(outfile, 0, ActualChars, oldweight, "Characters"); if (Factors) printfactors(outfile, chars, Factor, ""); if (ancvar && printdata) clique_printancestors(); noroot = !(outgropt || ancvar); } } /* inputoptions */ void clique_inputdata(void) { long i, j; Char ch; j = chars / 2 + (chars / 5 - 1) / 2 - 5; if (j < 0) j = 0; if (j > 27) j = 27; if (printdata) { fprintf(outfile, "Species "); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Character states\n"); fprintf(outfile, "------- "); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "--------- ------\n\n"); } for (i = 0; i < (spp); i++) { initnamestate(phylostates[ith-1],i); if (printdata) for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); if (printdata) fprintf(outfile, " "); for (j = 1; j <= (chars); j++) { ch = ajStrGetCharPos(phylostates[ith-1]->Str[i],j-1); if (printdata) { putc(ch, outfile); newline(outfile, j, 55, (long)nmlngth + 1); if (j % 5 == 0) putc(' ', outfile); } if (ch != '0' && ch != '1') { printf("\n\nERROR: Bad character state: %c (not 0 or 1)", ch); printf(" at character %ld of species %ld\n\n", j, i + 1); exxit(-1); } Data[i]->vec[j - 1] = (ch == '1'); } if (printdata) putc('\n', outfile); } putc('\n', outfile); for (i = 0; i < (chars); i++) { if (i + 1 == 1 || !Factors) weight[i] = oldweight[i]; else if (Factor[i] != Factor[i - 1]) weight[ActChar[i] - 1] = oldweight[i]; } } /* clique_inputdata */ boolean Compatible(long ch1, long ch2) { /* TRUE if two characters ch1 < ch2 are compatible */ long i, j, k; boolean Compt, Done1, Done2; boolean Info[4]; Compt = true; j = 1; while (ch1 > ActChar[j - 1]) j++; Done1 = (ch1 != ActChar[j - 1]); while (!Done1) { k = j; while (ch2 > ActChar[k - 1]) k++; Done2 = (ch2 != ActChar[k - 1]); while (!Done2) { for (i = 0; i <= 3; i++) Info[i] = false; if (ancvar) { if (ancone[j - 1] && ancone[k - 1]) Info[0] = true; else if (ancone[j - 1] && !ancone[k - 1]) Info[1] = true; else if (!ancone[j - 1] && ancone[k - 1]) Info[2] = true; else Info[3] = true; } for (i = 0; i < (spp); i++) { if (Data[i]->vec[j - 1] && Data[i]->vec[k - 1]) Info[0] = true; else if (Data[i]->vec[j - 1] && !Data[i]->vec[k - 1]) Info[1] = true; else if (!Data[i]->vec[j - 1] && Data[i]->vec[k - 1]) Info[2] = true; else Info[3] = true; } Compt = (Compt && !(Info[0] && Info[1] && Info[2] && Info[3])); k++; Done2 = (k > chars); if (!Done2) Done2 = (ch2 != ActChar[k - 1]); } j++; Done1 = (j > chars); if (!Done1) Done1 = (ch1 != ActChar[j - 1]); } return Compt; } /* Compatible */ void SetUp(vecrec **Comp) { /* sets up the compatibility matrix */ long i, j; if (printcomp) { if (Factors) fprintf(outfile, " (For original multistate characters)\n"); fprintf(outfile, "Character Compatibility Matrix (1 if compatible)\n"); fprintf(outfile, "--------- ------------- ------ -- -- -----------\n\n"); } for (i = 0; i < (ActualChars); i++) { if (printcomp) { for (j = 1; j <= ((48 - ActualChars) / 2); j++) putc(' ', outfile); for (j = 1; j < i + 1; j++) { if (Comp[i]->vec[j - 1]) putc('1', outfile); else putc('.', outfile); newline(outfile, j, 70, (long)nmlngth + 1); } } Comp[i]->vec[i] = true; if (printcomp) putc('1', outfile); for (j = i + 1; j < (ActualChars); j++) { Comp[i]->vec[j] = Compatible(i + 1, j + 1); if (printcomp) { if (Comp[i]->vec[j]) putc('1', outfile); else putc('.', outfile); } Comp[j]->vec[i] = Comp[i]->vec[j]; } if (printcomp) putc('\n', outfile); } putc('\n', outfile); } /* SetUp */ void Intersect(boolean *V1, boolean *V2, boolean *V3) { /* takes the logical intersection V1 AND V2 */ long i; for (i = 0; i < (ActualChars); i++) V3[i] = (V1[i] && V2[i]); } /* Intersect */ long CountStates(boolean *V) { /* counts the 1's in V */ long i, TempCount; TempCount = 0; for (i = 0; i < (ActualChars); i++) { if (V[i]) TempCount += weight[i]; } return TempCount; } /* CountStates */ void Gen1(long i, long CurSize, boolean *aChars, boolean *Candidates, boolean *Excluded) { /* finds largest size cliques and prints them out */ long CurSize2, j, k, Actual, Possible; boolean Futile; vecrec *Chars2, *Cands2, *Excl2, *Cprime, *Exprime; clique_gnu(&Chars2); clique_gnu(&Cands2); clique_gnu(&Excl2); clique_gnu(&Cprime); clique_gnu(&Exprime); CurSize2 = CurSize; memcpy(Chars2->vec, aChars, chars*sizeof(boolean)); memcpy(Cands2->vec, Candidates, chars*sizeof(boolean)); memcpy(Excl2->vec, Excluded, chars*sizeof(boolean)); j = i; while (j <= ActualChars) { if (Cands2->vec[j - 1]) { Chars2->vec[j - 1] = true; Cands2->vec[j - 1] = false; CurSize2 += weight[j - 1]; Possible = CountStates(Cands2->vec); Intersect(Cands2->vec, Comp2[j - 1]->vec, Cprime->vec); Actual = CountStates(Cprime->vec); Intersect(Excl2->vec, Comp2[j - 1]->vec, Exprime->vec); Futile = false; for (k = 0; k <= j - 2; k++) { if (Exprime->vec[k] && !Futile) { Intersect(Cprime->vec, Comp2[k]->vec, Temp); Futile = (CountStates(Temp) == Actual); } } if (CurSize2 + Actual >= Cliqmin && !Futile) { if (Actual > 0) Gen1(j + 1,CurSize2,Chars2->vec,Cprime->vec,Exprime->vec); else if (CurSize2 > Cliqmin) { Cliqmin = CurSize2; if (tcount >= 0) tcount = 1; } else if (CurSize2 == Cliqmin) tcount++; } if (Possible > Actual) { Chars2->vec[j - 1] = false; Excl2->vec[j - 1] = true; CurSize2 -= weight[j - 1]; } else j = ActualChars; } j++; } clique_chuck(Chars2); clique_chuck(Cands2); clique_chuck(Excl2); clique_chuck(Cprime); clique_chuck(Exprime); } /* Gen1 */ boolean Ingroupstate(long i) { /* the ingroup state for the i-th character */ boolean outstate; if (noroot) { outstate = Data[0]->vec[i - 1]; return (!outstate); } if (ancvar) outstate = ancone[i - 1]; else outstate = Data[outgrno - 1]->vec[i - 1]; return (!outstate); } /* Ingroupstate */ void makeset(void) { /* make up set of species for given set of characters */ long i, j, k, m; boolean instate; long *st; st = (long *)Malloc(setsz*sizeof(long)); n = 0; for (i = 0; i < (MaxChars); i++) { for (j = 0; j < setsz; j++) st[j] = 0; instate = Ingroupstate(ChOrder[i]); for (j = 0; j < (spp); j++) { if (Data[SpOrder[j] - 1]->vec[ChOrder[i] - 1] == instate) { m = (long)(SpOrder[j]/SETBITS); st[m] = ((long)st[m]) | (1L << (SpOrder[j] % SETBITS)); } } memcpy(grouping[++n - 1], st, setsz*sizeof(long)); } for (i = 0; i < (spp); i++) { k = (long)(SpOrder[i]/SETBITS); grouping[++n - 1][k] = 1L << (SpOrder[i] % SETBITS); } free(st); } /* makeset */ void Init(long *ChOrder, long *Count, long *MaxChars, aPtr aChars) { /* initialize vectors and character count */ long i, j, temp; boolean instate; *MaxChars = 0; for (i = 1; i <= (chars); i++) { if (aChars[ActChar[i - 1] - 1]) { (*MaxChars)++; ChOrder[*MaxChars - 1] = i; instate = Ingroupstate(i); temp = 0; for (j = 0; j < (spp); j++) { if (Data[j]->vec[i - 1] == instate) temp++; } Count[i - 1] = temp; } } } /*Init */ void ChSort(long *ChOrder, long *Count, long MaxChars) { /* sorts the characters by number of ingroup states */ long j, temp; boolean ordered; ordered = false; while (!ordered) { ordered = true; for (j = 1; j < MaxChars; j++) { if (Count[ChOrder[j - 1] - 1] < Count[ChOrder[j] - 1]) { ordered = false; temp = ChOrder[j - 1]; ChOrder[j - 1] = ChOrder[j]; ChOrder[j] = temp; } } } } /* ChSort */ void PrintClique(boolean *aChars) { /* prints the characters in a clique */ long i, j; fprintf(outfile, "\n\n"); if (Factors) { fprintf(outfile, "Actual Characters: ("); j = 0; for (i = 1; i <= (ActualChars); i++) { if (aChars[i - 1]) { fprintf(outfile, "%3ld", i); j++; newline(outfile, j, (long)((FormWide - 22) / 3), (long)nmlngth + 1); } } fprintf(outfile, ")\n"); } if (Factors) fprintf(outfile, "Binary "); fprintf(outfile, "Characters: ("); j = 0; for (i = 1; i <= (chars); i++) { if (aChars[ActChar[i - 1] - 1]) { fprintf(outfile, "%3ld", i); j++; if (Factors) newline(outfile, j, (long)((FormWide - 22) / 3), (long)nmlngth + 1); else newline(outfile, j, (long)((FormWide - 15) / 3), (long)nmlngth + 1); } } fprintf(outfile, ")\n\n"); } /* PrintClique */ void bigsubset(long *st, long n) { /* find a maximal subset of st among the groupings */ long i, j; long *su; boolean max, same; su = (long *)Malloc(setsz*sizeof(long)); for (i = 0; i < setsz; i++) su[i] = 0; for (i = 0; i < n; i++) { max = true; for (j = 0; j < setsz; j++) if ((grouping[i][j] & ~st[j]) != 0) max = false; if (max) { same = true; for (j = 0; j < setsz; j++) if (grouping[i][j] != st[j]) same = false; if (!same) { for (j = 0; j < setsz; j++) if ((su[j] & ~grouping[i][j]) != 0) max = false; if (max) { same = true; for (j = 0; j < setsz; j++) if (grouping[i][j] != su[j]) same = false; if (!same) memcpy(su, grouping[i], setsz*sizeof(long)); } } } } memcpy(st, su, setsz*sizeof(long)); free(su); } /* bigsubset */ void recontraverse(node **p, long *st, long n, long MaxChars) { /* traverse to reconstruct the tree from the characters */ long i, j, k, maxpos; long *tempset, *st2; boolean found, zero, zero2, same; node *q; j = k = 0; for (i = 1; i <= (spp); i++) { if (((1L << (i % SETBITS)) & st[(long)(i / SETBITS)]) != 0) { k++; j = i; } } if (k == 1) { *p = treenode[j - 1]; (*p)->tip = true; (*p)->index = j; return; } nunode(p); (*p)->index = 0; tempset = (long*)Malloc(setsz*sizeof(long)); memcpy(tempset, st, setsz*sizeof(long)); q = *p; zero = true; for (i = 0; i < setsz; i++) if (tempset[i] != 0) zero = false; if (!zero) bigsubset(tempset, n); zero = true; zero2 = true; for (i = 0; i < setsz; i++) if (st[i] != 0) zero = false; if (!zero) { for (i = 0; i < setsz; i++) if (tempset[i] != 0) zero2 = false; } st2 = (long *)Malloc(setsz*sizeof(long)); memcpy(st2, st, setsz*sizeof(long)); while (!zero2) { nunode(&q->next); q = q->next; recontraverse(&q->back, tempset, n,MaxChars); i = 1; maxpos = 0; while (i <= MaxChars) { same = true; for (j = 0; j < setsz; j++) if (grouping[i - 1][j] != tempset[j]) same = false; if (same) maxpos = i; i++; } q->back->maxpos = maxpos; q->back->back = q; for (j = 0; j < setsz; j++) st2[j] &= ~tempset[j]; memcpy(tempset, st2, setsz*sizeof(long)); found = false; i = 1; while (!found && i <= n) { same = true; for (j = 0; j < setsz; j++) if (grouping[i - 1][j] != tempset[j]) same = false; if (same) found = true; else i++; } zero = true; for (j = 0; j < setsz; j++) if (tempset[j] != 0) zero = false; if (!zero && !found) bigsubset(tempset, n); zero = true; zero2 = true; for (j = 0; j < setsz; j++) if (st2[j] != 0) zero = false; if (!zero) for (j = 0; j < setsz; j++) if (tempset[j] != 0) zero2 = false; } q->next = *p; free(tempset); free(st2); } /* recontraverse */ void reconstruct(long n, long MaxChars) { /* reconstruct tree from the subsets */ long i; long *s; s = (long *)Malloc(setsz*sizeof(long)); for (i = 0; i < setsz; i++) { if (i+1 == setsz) { s[i] = 1L << ((spp % SETBITS) + 1); if (setsz > 1) s[i] -= 1; else s[i] -= 1L << 1; } else if (i == 0) { if (setsz > 1) s[i] = ~0L - 1; } else { if (setsz > 2) s[i] = ~0L; } } recontraverse(&root,s,n,MaxChars); free(s); } /* reconstruct */ void reroot(node *outgroup) { /* reorients tree, putting outgroup in desired position. */ long i; boolean nroot; node *p, *q; nroot = false; p = root->next; while (p != root) { if (outgroup->back == p) { nroot = true; p = root; } else p = p->next; } if (nroot) return; p = root; i = 0; while (p->next != root) { p = p->next; i++; } if (i == 2) { root->next->back->back = p->back; p->back->back = root->next->back; q = root->next; } else { p->next = root->next; nunode(&root->next); q = root->next; nunode(&q->next); p = q->next; p->next = root; q->tip = false; p->tip = false; } q->back = outgroup; p->back = outgroup->back; outgroup->back->back = p; outgroup->back = q; } /* reroot */ void clique_coordinates(node *p, long *tipy, long MaxChars) { /* establishes coordinates of nodes */ node *q, *first, *last; long maxx; if (p->tip) { p->xcoord = 0; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; return; } q = p->next; maxx = 0; while (q != p) { clique_coordinates(q->back, tipy, MaxChars); if (!q->back->tip) { if (q->back->xcoord > maxx) maxx = q->back->xcoord; } q = q->next; } first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (MaxChars - p->maxpos) * 3 - 2; if (p == root) p->xcoord += 2; p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* clique_coordinates */ void clique_drawline(long i) { /* draws one row of the tree diagram by moving up tree */ node *p, *q; long n, m, j, k, l, sumlocpos, size, locpos, branchpos; long *poslist; boolean extra, done, plus, found, same; node *r, *first = NULL, *last = NULL; poslist = (long *)Malloc((long)(spp + MaxChars)*sizeof(long)); branchpos = 0; p = root; q = root; fprintf(outfile, " "); extra = false; plus = false; do { if (!p->tip) { found = false; r = p->next; while (r != p && !found) { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; found = true; } else r = r->next; } first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; } done = (p->tip || p == q); n = p->xcoord - q->xcoord; m = n; if (extra) { n--; extra = false; } if ((long)q->ycoord == i && !done) { if (!q->tip) { putc('+', outfile); plus = true; j = 1; for (k = 1; k <= (q->maxpos); k++) { same = true; for (l = 0; l < setsz; l++) if (grouping[k - 1][l] != grouping[q->maxpos - 1][l]) same = false; if (same) { poslist[j - 1] = k; j++; } } size = j - 1; if (size == 0) { for (k = 1; k < n; k++) putc('-', outfile); sumlocpos = n; } else { sumlocpos = 0; j = 1; while (j <= size) { locpos = poslist[j - 1] * 3; if (j != 1) locpos -= poslist[j - 2] * 3; else locpos -= branchpos; for (k = 1; k < locpos; k++) putc('-', outfile); if (Rarer[ChOrder[poslist[j - 1] - 1] - 1]) putc('1', outfile); else putc('0', outfile); sumlocpos += locpos; j++; } for (j = sumlocpos + 1; j < n; j++) putc('-', outfile); putc('+', outfile); if (m > 0) branchpos += m; extra = true; } } else { if (!plus) { putc('+', outfile); plus = false; } else n++; j = 1; for (k = 1; k <= (q->maxpos); k++) { same = true; for (l = 0; l < setsz; l++) if (grouping[k - 1][l] != grouping[q->maxpos - 1][l]) same = false; if (same) { poslist[j - 1] = k; j++; } } size = j - 1; if (size == 0) { for (k = 1; k <= n; k++) putc('-', outfile); sumlocpos = n; } else { sumlocpos = 0; j = 1; while (j <= size) { locpos = poslist[j - 1] * 3; if (j != 1) locpos -= poslist[j - 2] * 3; else locpos -= branchpos; for (k = 1; k < locpos; k++) putc('-', outfile); if (Rarer[ChOrder[poslist[j - 1] - 1] - 1]) putc('1', outfile); else putc('0', outfile); sumlocpos += locpos; j++; } for (j = sumlocpos + 1; j <= n; j++) putc('-', outfile); if (m > 0) branchpos += m; } putc('-', outfile); } } else if (!p->tip && (long)last->ycoord > i && (long)first->ycoord < i && (i != (long)p->ycoord || p == root)) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); plus = false; if (m > 0) branchpos += m; } else { for (j = 1; j <= n; j++) putc(' ', outfile); plus = false; if (m > 0) branchpos += m; } if (q != p) p = q; } while (!done); if (p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); free(poslist); } /* clique_drawline */ void clique_printree(void) { /* prints out diagram of the tree */ long tipy, i; if (!treeprint) return; tipy = 1; clique_coordinates(root, &tipy, MaxChars); fprintf(outfile, "\n Tree and"); if (Factors) fprintf(outfile, " binary"); fprintf(outfile, " characters:\n\n"); fprintf(outfile, " "); for (i = 0; i < (MaxChars); i++) fprintf(outfile, "%3ld", ChOrder[i]); fprintf(outfile, "\n "); for (i = 0; i < (MaxChars); i++) { if (Rarer[ChOrder[i] - 1]) fprintf(outfile, "%3c", '1'); else fprintf(outfile, "%3c", '0'); } fprintf(outfile, "\n\n"); for (i = 1; i <= (tipy - down); i++) clique_drawline(i); fprintf(outfile, "\nremember: this is an unrooted tree!\n\n"); } /* clique_printree */ void DoAll(boolean *Chars_,boolean *Processed,boolean *Rarer_,long tcount) { /* print out a clique and its tree */ long i, j; ChPtr Count; aChars = (aPtr)Malloc((long)chars*sizeof(boolean)); SpOrder = (SpPtr)Malloc((long)spp*sizeof(long)); ChOrder = (ChPtr)Malloc((long)chars*sizeof(long)); Count = (ChPtr)Malloc((long)chars*sizeof(long)); memcpy(aChars, Chars_, chars*sizeof(boolean)); Rarer = Rarer_; Init(ChOrder, Count, &MaxChars, aChars); ChSort(ChOrder, Count, MaxChars); for (i = 1; i <= (spp); i++) SpOrder[i - 1] = i; for (i = 1; i <= (chars); i++) { if (aChars[ActChar[i - 1] - 1]) { if (!Processed[ActChar[i - 1] - 1]) { Rarer[i - 1] = Ingroupstate(i); Processed[ActChar[i - 1] - 1] = true; } } } PrintClique(aChars); grouping = (long **)Malloc((long)(spp + MaxChars)*sizeof(long *)); for (i = 0; i < spp + MaxChars; i++) { grouping[i] = (long *)Malloc(setsz*sizeof(long)); for (j = 0; j < setsz; j++) grouping[i][j] = 0; } makeset(); clique_setuptree(); reconstruct(n,MaxChars); if (noroot) reroot(treenode[outgrno - 1]); clique_printree(); if (trout) { col = 0; treeout(root, tcount+1, &col, root); } free(SpOrder); free(ChOrder); free(Count); for (i = 0; i < spp + MaxChars; i++) free(grouping[i]); free(grouping); } /* DoAll */ void Gen2(long i, long CurSize, boolean *aChars, boolean *Candidates, boolean *Excluded) { /* finds largest size cliques and prints them out */ long CurSize2, j, k, Actual, Possible; boolean Futile; vecrec *Chars2, *Cands2, *Excl2, *Cprime, *Exprime; clique_gnu(&Chars2); clique_gnu(&Cands2); clique_gnu(&Excl2); clique_gnu(&Cprime); clique_gnu(&Exprime); CurSize2 = CurSize; memcpy(Chars2->vec, aChars, chars*sizeof(boolean)); memcpy(Cands2->vec, Candidates, chars*sizeof(boolean)); memcpy(Excl2->vec, Excluded, chars*sizeof(boolean)); j = i; while (j <= ActualChars) { if (Cands2->vec[j - 1]) { Chars2->vec[j - 1] = true; Cands2->vec[j - 1] = false; CurSize2 += weight[j - 1]; Possible = CountStates(Cands2->vec); Intersect(Cands2->vec, Comp2[j - 1]->vec, Cprime->vec); Actual = CountStates(Cprime->vec); Intersect(Excl2->vec, Comp2[j - 1]->vec, Exprime->vec); Futile = false; for (k = 0; k <= j - 2; k++) { if (Exprime->vec[k] && !Futile) { Intersect(Cprime->vec, Comp2[k]->vec, Temp); Futile = (CountStates(Temp) == Actual); } } if (CurSize2 + Actual >= Cliqmin && !Futile) { if (Actual > 0) Gen2(j + 1,CurSize2,Chars2->vec,Cprime->vec,Exprime->vec); else DoAll(Chars2->vec,Processed,Rarer2,tcount); } if (Possible > Actual) { Chars2->vec[j - 1] = false; Excl2->vec[j - 1] = true; CurSize2 -= weight[j - 1]; } else j = ActualChars; } j++; } clique_chuck(Chars2); clique_chuck(Cands2); clique_chuck(Excl2); clique_chuck(Cprime); clique_chuck(Exprime); } /* Gen2 */ void GetMaxCliques(vecrec **Comp_) { /* recursively generates the largest cliques */ long i; aPtr aChars, Candidates, Excluded; Temp = (aPtr)Malloc((long)chars*sizeof(boolean)); Processed = (aPtr)Malloc((long)chars*sizeof(boolean)); Rarer2 = (aPtr)Malloc((long)chars*sizeof(boolean)); aChars = (aPtr)Malloc((long)chars*sizeof(boolean)); Candidates = (aPtr)Malloc((long)chars*sizeof(boolean)); Excluded = (aPtr)Malloc((long)chars*sizeof(boolean)); Comp2 = Comp_; putc('\n', outfile); if (Clmin) { fprintf(outfile, "Cliques with at least%3ld characters\n", Cliqmin); fprintf(outfile, "------- ---- -- ----- -- ----------\n"); } else { Cliqmin = 0; fprintf(outfile, "Largest Cliques\n"); fprintf(outfile, "------- -------\n"); for (i = 0; i < (ActualChars); i++) { aChars[i] = false; Excluded[i] = false; Candidates[i] = true; } tcount = 0; Gen1(1, 0, aChars, Candidates, Excluded); } for (i = 0; i < (ActualChars); i++) { aChars[i] = false; Candidates[i] = true; Processed[i] = false; Excluded[i] = false; } Gen2(1, 0, aChars, Candidates, Excluded); putc('\n', outfile); free(Temp); free(Processed); free(Rarer2); free(aChars); free(Candidates); free(Excluded); } /* GetMaxCliques */ int main(int argc, Char *argv[]) { /* Main Program */ #ifdef MAC argc = 1; /* macsetup("Clique","Clique"); */ argv[0] = "Clique"; #endif long i; init(argc, argv); emboss_getoptions("fclique",argc,argv); ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= (msets); ith++) { inputoptions(); if(!justwts || firstset) clique_inputdata(); firstset = false; SetUp(Comp); if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } if (justwts){ fprintf(outfile, "Weights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } GetMaxCliques(Comp); if (progress) { printf("\nOutput written to file \"%s\"\n",outfilename); if (trout) printf("\nTree"); if (tcount > 1) printf("s"); printf(" written on file \"%s\"\n\n", outtreename); } } FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif printf("Done.\n\n"); if(treenode) { for(i = 0; i < spp; i++) free(treenode[i]); free(treenode); } ajPhyloStateDelarray(&phylostates); embExit(); return 0; } PHYLIPNEW-3.69.650/src/protdist.c0000664000175000017500000016555511616234204013111 00000000000000 #include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define nmlngth 10 /* number of characters in species name */ #define protepsilon .00001 typedef long *steparray; typedef enum { universal, ciliate, mito, vertmito, flymito, yeastmito } codetype; typedef enum { chemical, hall, george } cattype; typedef double matrix[20][20]; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ void protdist_uppercase(Char *); void protdist_inputnumbers(AjPSeqset); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void transition(void); void doinit(void); void printcategories(void); void inputoptions(void); void protdist_inputdata(AjPSeqset); void doinput(void); void code(void); void protdist_cats(void); void maketrans(void); void givens(matrix, long, long, long, double, double, boolean); void coeffs(double, double, double *, double *, double); void tridiag(matrix, long, double); void shiftqr(matrix, long, double); void qreigen(matrix, long); void pmbeigen(void); void pameigen(void); void jtteigen(void); void predict(long, long, long); void makedists(void); void reallocchars(void); /* function prototypes */ #endif long chars, datasets, ith, ctgry, categs; /* spp = number of species chars = number of positions in actual sequences */ double freqa, freqc, freqg, freqt, cvi, invarfrac, ttratio, xi, xv, ease, fracchange; boolean weights, justwts, progress, mulsets, gama, invar; boolean basesequal, usepmb, usejtt, usepam, kimura, similarity, firstset; codetype whichcode; cattype whichcat; steptr oldweight; double rate[maxcategs]; aas **gnode; aas trans[4][4][4]; double pie[20]; long cat[(long)ser - (long)ala + 1], numaa[(long)ser - (long)ala + 1]; double eig[20]; matrix prob, eigvecs; double **d; char infilename[100], catfilename[100], weightfilename[100]; const char* outfilename; AjPFile embossoutfile; /* Local variables for makedists, propagated globally for c version: */ double tt, p, dp, d2p, q, elambdat; /* this jtt matrix decomposition due to Elisabeth Tillier */ static double jtteigs[] = {+0.00000000000000,-1.81721720738768,-1.87965834528616,-1.61403121885431, -1.53896608443751,-1.40486966367848,-1.30995061286931,-1.24668414819041, -1.17179756521289,-0.31033320987464,-0.34602837857034,-1.06031718484613, -0.99900602987105,-0.45576774888948,-0.86014403434677,-0.54569432735296, -0.76866956571861,-0.60593589295327,-0.65119724379348,-0.70249806480753}; static double jttprobs[20][20] = {{+0.07686196156903,+0.05105697447152,+0.04254597872702,+0.05126897436552, +0.02027898986051,+0.04106097946952,+0.06181996909002,+0.07471396264303, +0.02298298850851,+0.05256897371552,+0.09111095444453,+0.05949797025102, +0.02341398829301,+0.04052997973502,+0.05053197473402,+0.06822496588753, +0.05851797074102,+0.01433599283201,+0.03230298384851,+0.06637396681302}, {-0.04445795120462,-0.01557336502860,-0.09314817363516,+0.04411372100382, -0.00511178725134,+0.00188472427522,-0.02176250428454,-0.01330231089224, +0.01004072641973,+0.02707838224285,-0.00785039050721,+0.02238829876349, +0.00257470703483,-0.00510311699563,-0.01727154263346,+0.20074235330882, -0.07236268502973,-0.00012690116016,-0.00215974664431,-0.01059243778174}, {+0.09480046389131,+0.00082658405814,+0.01530023104155,-0.00639909042723, +0.00160605602061,+0.00035896642912,+0.00199161318384,-0.00220482855717, -0.00112601328033,+0.14840201765438,-0.00344295714983,-0.00123976286718, -0.00439399942758,+0.00032478785709,-0.00104270266394,-0.02596605592109, -0.05645800566901,+0.00022319903170,-0.00022792271829,-0.16133258048606}, {-0.06924141195400,-0.01816245289173,-0.08104005811201,+0.08985697111009, +0.00279659017898,+0.01083740322821,-0.06449599336038,+0.01794514261221, +0.01036809141699,+0.04283504450449,+0.00634472273784,+0.02339134834111, -0.01748667848380,+0.00161859106290,+0.00622486432503,-0.05854130195643, +0.15083728660504,+0.00030733757661,-0.00143739522173,-0.05295810171941}, {-0.14637948915627,+0.02029296323583,+0.02615316895036,-0.10311538564943, -0.00183412744544,-0.02589124656591,+0.11073673851935,+0.00848581728407, +0.00106057791901,+0.05530240732939,-0.00031533506946,-0.03124002869407, -0.01533984125301,-0.00288717337278,+0.00272787410643,+0.06300929916280, +0.07920438311152,-0.00041335282410,-0.00011648873397,-0.03944076085434}, {-0.05558229086909,+0.08935293782491,+0.04869509588770,+0.04856877988810, -0.00253836047720,+0.07651693957635,-0.06342453535092,-0.00777376246014, -0.08570270266807,+0.01943016473512,-0.00599516526932,-0.09157595008575, -0.00397735155663,-0.00440093863690,-0.00232998056918,+0.02979967701162, -0.00477299485901,-0.00144011795333,+0.01795114942404,-0.00080059359232}, {+0.05807741644682,+0.14654292420341,-0.06724975334073,+0.02159062346633, -0.00339085518294,-0.06829036785575,+0.03520631903157,-0.02766062718318, +0.03485632707432,-0.02436836692465,-0.00397566003573,-0.10095488644404, +0.02456887654357,+0.00381764117077,-0.00906261340247,-0.01043058066362, +0.01651199513994,-0.00210417220821,-0.00872508520963,-0.01495915462580}, {+0.02564617106907,+0.02960554611436,-0.00052356748770,+0.00989267817318, -0.00044034172141,-0.02279910634723,-0.00363768356471,-0.01086345665971, +0.01229721799572,+0.02633650142592,+0.06282966783922,-0.00734486499924, -0.13863936313277,-0.00993891943390,-0.00655309682350,-0.00245191788287, -0.02431633805559,-0.00068554031525,-0.00121383858869,+0.06280025239509}, {+0.11362428251792,-0.02080375718488,-0.08802750967213,-0.06531316372189, -0.00166626058292,+0.06846081717224,+0.07007301248407,-0.01713112936632, -0.05900588794853,-0.04497159138485,+0.04222484636983,+0.00129043178508, -0.01550337251561,-0.01553102163852,-0.04363429852047,+0.01600063777880, +0.05787328925647,-0.00008265841118,+0.02870014572813,-0.02657681214523}, {+0.01840541226842,+0.00610159018805,+0.01368080422265,+0.02383751807012, -0.00923516894192,+0.01209943150832,+0.02906782189141,+0.01992384905334, +0.00197323568330,+0.00017531415423,-0.01796698381949,+0.01887083962858, -0.00063335886734,-0.02365277334702,+0.01209445088200,+0.01308086447947, +0.01286727242301,-0.11420358975688,-0.01886991700613,+0.00238338728588}, {-0.01100105031759,-0.04250695864938,-0.02554356700969,-0.05473632078607, +0.00725906469946,-0.03003724918191,-0.07051526125013,-0.06939439879112, -0.00285883056088,+0.05334304124753,+0.12839241846919,-0.05883473754222, +0.02424304967487,+0.09134510778469,-0.00226003347193,-0.01280041778462, -0.00207988305627,-0.02957493909199,+0.05290385686789,+0.05465710875015}, {-0.01421274522011,+0.02074863337778,-0.01006411985628,+0.03319995456446, -0.00005371699269,-0.12266046460835,+0.02419847062899,-0.00441168706583, -0.08299118738167,-0.00323230913482,+0.02954035119881,+0.09212856795583, +0.00718635627257,-0.02706936115539,+0.04473173279913,-0.01274357634785, -0.01395862740618,-0.00071538848681,+0.04767640012830,-0.00729728326990}, {-0.03797680968123,+0.01280286509478,-0.08614616553187,-0.01781049963160, +0.00674319990083,+0.04208667754694,+0.05991325707583,+0.03581015660092, -0.01529816709967,+0.06885987924922,-0.11719120476535,-0.00014333663810, +0.00074336784254,+0.02893416406249,+0.07466151360134,-0.08182016471377, -0.06581536577662,-0.00018195976501,+0.00167443595008,+0.09015415667825}, {+0.03577726799591,-0.02139253448219,-0.01137813538175,-0.01954939202830, -0.04028242801611,-0.01777500032351,-0.02106862264440,+0.00465199658293, -0.02824805812709,+0.06618860061778,+0.08437791757537,-0.02533125946051, +0.02806344654855,-0.06970805797879,+0.02328376968627,+0.00692992333282, +0.02751392122018,+0.01148722812804,-0.11130404325078,+0.07776346000559}, {-0.06014297925310,-0.00711674355952,-0.02424493472566,+0.00032464353156, +0.00321221847573,+0.03257969053884,+0.01072805771161,+0.06892027923996, +0.03326534127710,-0.01558838623875,+0.13794237677194,-0.04292623056646, +0.01375763233229,-0.11125153774789,+0.03510076081639,-0.04531670712549, -0.06170413486351,-0.00182023682123,+0.05979891871679,-0.02551802851059}, {-0.03515069991501,+0.02310847227710,+0.00474493548551,+0.02787717003457, -0.12038329679812,+0.03178473522077,+0.04445111601130,-0.05334957493090, +0.01290386678474,-0.00376064171612,+0.03996642737967,+0.04777677295520, +0.00233689200639,+0.03917715404594,-0.01755598277531,-0.03389088626433, -0.02180780263389,+0.00473402043911,+0.01964539477020,-0.01260807237680}, {-0.04120428254254,+0.00062717164978,-0.01688703578637,+0.01685776910152, +0.02102702093943,+0.01295781834163,+0.03541815979495,+0.03968150445315, -0.02073122710938,-0.06932247350110,+0.11696314241296,-0.00322523765776, -0.01280515661402,+0.08717664266126,+0.06297225078802,-0.01290501780488, -0.04693925076877,-0.00177653675449,-0.08407812137852,-0.08380714022487}, {+0.03138655228534,-0.09052573757196,+0.00874202219428,+0.06060593729292, -0.03426076652151,-0.04832468257386,+0.04735628794421,+0.14504653737383, -0.01709111334001,-0.00278794215381,-0.03513813820550,-0.11690294831883, -0.00836264902624,+0.03270980973180,-0.02587764129811,+0.01638786059073, +0.00485499822497,+0.00305477087025,+0.02295754527195,+0.00616929722958}, {-0.04898722042023,-0.01460879656586,+0.00508708857036,+0.07730497806331, +0.04252420017435,+0.00484232580349,+0.09861807969412,-0.05169447907187, -0.00917820907880,+0.03679081047330,+0.04998537112655,+0.00769330211980, +0.01805447683564,-0.00498723245027,-0.14148416183376,-0.05170281760262, -0.03230723310784,-0.00032890672639,-0.02363523071957,+0.03801365471627}, {-0.02047562162108,+0.06933781779590,-0.02101117884731,-0.06841945874842, -0.00860967572716,-0.00886650271590,-0.07185241332269,+0.16703684361030, -0.00635847581692,+0.00811478913823,+0.01847205842216,+0.06700967948643, +0.00596607376199,+0.02318239240593,-0.10552958537847,-0.01980199747773, -0.02003785382406,-0.00593392430159,-0.00965391033612,+0.00743094349652}}; /* PMB matrix decomposition courtesy of Elisabeth Tillier */ static double pmbeigs[] = {0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, -1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, -1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, -0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, -0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; static double pmbprobs[20][20] = {{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, 0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, 0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, 0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, 0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, {0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, 0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, -0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, -0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, 0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, {-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, -0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, 0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, 0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, -0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, {0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, 0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, 0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, 0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, 0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, {0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, -0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, 0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, -0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, -0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, {-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, -0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, -0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, 0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, 0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, {0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, -0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, -0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, -0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, -0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, {0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, 0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, -0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, 0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, -0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, {0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, -0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, 0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, -0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, -0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, {-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, -0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, -0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, -0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, 0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, {-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, -0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, -0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, 0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, 0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, {0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, -0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, 0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, -0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, 0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, {-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, -0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, 0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, 0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, -0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, {-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, 0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, 0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, 0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, -0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, {0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, -0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, -0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, 0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, -0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, {0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, -0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, -0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, 0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, 0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, {0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, 0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, -0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, -0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, -0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, {0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, -0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, 0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, 0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, 0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, {0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, -0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, 0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, 0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, 0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, {0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, -0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, -0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, 0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, 0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} ; /* dcmut version of PAM model from http://www.ebi.ac.uk/goldman-srv/dayhoff/ */ static double pameigs[] = {0,-1.93321786301018,-2.20904642493621,-1.74835983874903, -1.64854548332072,-1.54505559488222,-1.33859384676989,-1.29786201193594, -0.235548517495575,-0.266951066089808,-0.28965813670665,-1.10505826965282, -1.04323310568532,-0.430423720979904,-0.541719761016713,-0.879636093986914, -0.711249353378695,-0.725050487280602,-0.776855937389452,-0.808735559461343}; static double pamprobs[20][20] ={ {0.08712695644, 0.04090397955, 0.04043197978, 0.04687197656, 0.03347398326, 0.03825498087, 0.04952997524, 0.08861195569, 0.03361898319, 0.03688598156, 0.08535695732, 0.08048095976, 0.01475299262, 0.03977198011, 0.05067997466, 0.06957696521, 0.05854197073, 0.01049399475, 0.02991598504, 0.06471796764}, {0.07991048383, 0.006888314018, 0.03857806206, 0.07947073194, 0.004895492884, 0.03815829405, -0.1087562465, 0.008691167141, -0.0140554828, 0.001306404001, -0.001888411299, -0.006921303342, 0.0007655604228, 0.001583298443, 0.006879590446, -0.171806883, 0.04890917949, 0.0006700432804, 0.0002276237277, -0.01350591875}, {-0.01641514483, -0.007233933239, -0.1377830621, 0.1163201333, -0.002305138017, 0.01557250366, -0.07455879489, -0.003225343503, 0.0140630487, 0.005112274204, 0.001405731862, 0.01975833782, -0.001348402973, -0.001085733262, -0.003880514478, 0.0851493313, -0.01163526615, -0.0001197903399, 0.002056153393, 0.0001536095643}, {0.009669278686, -0.006905863869, 0.101083544, 0.01179903104, -0.003780967591, 0.05845105878, -0.09138357299, -0.02850503638, -0.03233951408, 0.008708065876, -0.004700705411, -0.02053221579, 0.001165851398, -0.001366585849, -0.01317695074, 0.1199985703, -0.1146346193, -0.0005953021314, -0.0004297615194, 0.007475695618}, {0.1722243502, -0.003737582995, -0.02964873222, -0.02050116381, -0.0004530478465, -0.02460043205, 0.02280768412, -0.02127364909, 0.01570095258, 0.1027744285, -0.005330539586, 0.0179697651, -0.002904077286, -0.007068126663, -0.0142869583, -0.01444241844, -0.08218861544, 0.0002069181629, 0.001099671379, -0.1063484263}, {-0.1553433627, -0.001169168032, 0.02134785337, 0.0007602305436, 0.0001395330122, 0.03194992019, -0.01290252206, 0.03281720789, -0.01311103735, 0.1177254769, -0.008008783885, -0.02375317548, -0.002817809762, -0.008196682776, 0.01731267617, 0.01853526375, 0.08249908546, -2.788771776e-05, 0.001266182191, -0.09902299976}, {-0.03671080341, 0.0274168035, 0.04625877597, 0.07520706414, -0.0001833803619, -0.1207833161, -0.006415807779, -0.005465629648, 0.02778273972, 0.007589688485, -0.02945266034, -0.03797542064, 0.07044042052, -0.002018573865, 0.01845277071, 0.006901513991, -0.02430934639, -0.0005919635873, -0.001266962331, -0.01487591261}, {-0.03060317816, 0.01182361623, 0.04200270053, 0.05406235279, -0.0003920498815, -0.09159709348, -0.009602690652, -0.00382944418, 0.01761361993, 0.01605684317, 0.05198878008, 0.02198696949, -0.09308930025, -0.00102622863, 0.01477637127, 0.0009314065393, -0.01860959472, -0.0005964703968, -0.002694284083, 0.02079767439}, {0.0195976494, -0.005104484936, 0.007406728707, 0.01236244954, 0.0201446796, 0.007039564785, 0.01276942134, 0.02641595685, 0.002764624354, 0.001273314658, -0.01335316035, 0.01105658671, 2.148773499e-05, -0.02692205639, 0.0118684991, 0.01212624708, 0.01127770094, -0.09842754796, -0.01942336432, 0.007105703151}, {-0.01819461888, -0.01509348507, -0.01297636935, -0.01996453439, 0.1715705905, -0.01601550692, -0.02122706144, -0.02854628494, -0.009351082371, -0.001527995472, -0.010198224, -0.03609537551, -0.003153182095, 0.02395980501, -0.01378664626, -0.005992611421, -0.01176810875, 0.003132361603, 0.03018439539, -0.004956065656}, {-0.02733614784, -0.02258066705, -0.0153112506, -0.02475728664, -0.04480525045, -0.01526640341, -0.02438517425, -0.04836914601, -0.00635964824, 0.02263169831, 0.09794101931, -0.04004304158, 0.008464393478, 0.1185443142, -0.02239294163, -0.0281550321, -0.01453581604, -0.0246742804, 0.0879619849, 0.02342867605}, {0.06483718238, 0.1260012082, -0.006496013283, 0.009914915531, -0.004181603532, 0.0003493226286, 0.01408035752, -0.04881663016, -0.03431167356, -0.01768005602, 0.02362447761, -0.1482364784, -0.01289035619, -0.001778893279, -0.05240099752, 0.05536174567, 0.06782165352, -0.003548568717, 0.001125301173, -0.03277489363}, {0.06520296909, -0.0754802543, 0.03139281903, -0.03266449554, -0.004485188002, -0.03389072036, -0.06163274338, -0.06484769882, 0.05722658289, -0.02824079619, 0.01544837349, 0.03909752708, 0.002029218884, 0.003151939572, -0.05471208363, 0.07962008342, 0.125916047, 0.0008696184937, -0.01086027514, -0.05314092355}, {0.004543119081, 0.01935177735, 0.01905511007, 0.02682993409, -0.01199617967, 0.01426278655, 0.02472521255, 0.03864795501, 0.02166224804, -0.04754243479, -0.1921545477, 0.03621321546, -0.02120627881, 0.04928097895, 0.009396088815, 0.01748042052, -6.173742851e-05, -0.003168033098, 0.07723565812, -0.08255529309}, {0.06710378668, -0.09441410284, -0.004801776989, 0.008830272165, -0.01021645042, -0.02764365608, 0.004250361851, 0.1648777542, -0.037446109, 0.004541057635, -0.0296980702, -0.1532325189, -0.008940580901, 0.006998050812, 0.02338809379, 0.03175059182, 0.02033965512, 0.006388075608, 0.001762762044, 0.02616280361}, {0.01915943021, -0.05432967274, 0.01249342683, 0.06836622457, 0.002054462161, -0.01233535859, 0.07087282652, -0.08948637051, -0.1245896013, -0.02204522882, 0.03791481736, 0.06557467874, 0.005529294156, -0.006296644235, 0.02144530752, 0.01664230081, 0.02647078439, 0.001737725271, 0.01414149877, -0.05331990116}, {0.0266659303, 0.0564142853, -0.0263767738, -0.08029726006, -0.006059357163, -0.06317558457, -0.0911894019, 0.05401487057, -0.08178072458, 0.01580699778, -0.05370550396, 0.09798653264, 0.003934944022, 0.01977291947, 0.0441198541, 0.02788220393, 0.03201877081, -0.00206161759, -0.005101423308, 0.03113033802}, {0.02980360751, -0.009513246268, -0.009543527165, -0.02190644172, -0.006146440672, 0.01207009085, -0.0126989156, -0.1378266418, 0.0275235217, 0.00551720592, -0.03104791544, -0.07111701247, -0.006081754489, -0.01337494521, 0.1783961085, 0.01453225059, 0.01938736048, 0.0004488631071, 0.0110844398, 0.02049339243}, {-0.01433508581, 0.01258858175, -0.004294252236, -0.007146532854, 0.009541628809, 0.008040155729, -0.006857781832, 0.05584120066, 0.007749418365, -0.05867835844, 0.08008131283, -0.004877854222, -0.0007128540743, 0.09489058424, 0.06421121962, 0.00271493526, -0.03229944773, -0.001732026038, -0.08053448316, -0.1241903609}, {-0.009854113227, 0.01294129929, -0.00593064392, -0.03016833115, -0.002018439732, -0.00792418722, -0.03372768732, 0.07828561288, 0.007722254639, -0.05067377561, 0.1191848621, 0.005059475202, 0.004762387166, -0.1029870175, 0.03537190114, 0.001089956203, -0.02139157573, -0.001015245062, 0.08400521847, -0.08273195059}}; void protdist_uppercase(Char *ch) { (*ch) = (isupper((int)*ch) ? (*ch) : toupper((int)*ch)); } /* protdist_uppercase */ void protdist_inputnumbers(AjPSeqset seqset) { /* input the numbers of species and of characters */ long i; spp = seqset->Size; chars = seqset->Len; if (printdata) fprintf(outfile, "%2ld species, %3ld positions\n\n", spp, chars); gnode = (aas **)Malloc(spp * sizeof(aas *)); if (firstset) { for (i = 0; i < spp; i++) gnode[i] = (aas *)Malloc(chars * sizeof(aas )); } weight = (steparray)Malloc(chars*sizeof(long)); oldweight = (steparray)Malloc(chars*sizeof(long)); category = (steparray)Malloc(chars*sizeof(long)); d = (double **)Malloc(spp*sizeof(double *)); nayme = (naym *)Malloc(spp*sizeof(naym)); for (i = 0; i < spp; ++i) d[i] = (double *)Malloc(spp*sizeof(double)); } /* protdist_inputnumbers */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr model = NULL; AjPStr gammamethod = NULL; AjPFloat basefreq; AjPFloat arrayval; AjPStr whichcodestr = NULL; AjBool catmodel = false; weights = false; printdata = false; progress = true; interleaved = true; similarity = false; ttratio = 2.0; whichcode = universal; whichcat = george; basesequal = true; freqa = 0.25; freqc = 0.25; freqg = 0.25; freqt = 0.25; usejtt = false; usepmb = false; usepam = false; kimura = false; gama = false; invar = false; invarfrac = 0.0; cvi = 0.0; ease = 0.457; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; datasets = numseqs; } else if (numwts > 1) { mulsets = true; datasets = numwts; justwts = true; } categs = ajAcdGetInt("ncategories"); if (categs > 1) { ctgry = true; arrayval = ajAcdGetArray("rate"); emboss_initcategs(arrayval, categs, rate); } phyloratecat = ajAcdGetProperties("categories"); model = ajAcdGetListSingle("method"); if(ajStrMatchC(model, "j")) usejtt = true; else if(ajStrMatchC(model, "h")) usepmb = true; else if(ajStrMatchC(model, "d")) usepam = true; else if(ajStrMatchC(model, "k")) kimura = true; else if(ajStrMatchC(model, "s")) similarity = true; else if(ajStrMatchC(model, "c")) catmodel = true; if(catmodel) { whichcodestr = ajAcdGetListSingle("whichcode"); if(ajStrMatchCaseC(whichcodestr, "u")) whichcode = universal; else if (ajStrMatchCaseC(whichcodestr, "c")) whichcode = ciliate; else if (ajStrMatchCaseC(whichcodestr, "m")) whichcode = mito; else if (ajStrMatchCaseC(whichcodestr,"v")) whichcode = vertmito; else if (ajStrMatchCaseC(whichcodestr,"f")) whichcode = flymito; else if (ajStrMatchCaseC(whichcodestr,"y")) whichcode = yeastmito; ease = ajAcdGetFloat("ease"); ttratio = ajAcdGetFloat("ttratio"); basefreq = ajAcdGetArray("basefreq"); freqa = ajFloatGet(basefreq, 0); freqc = ajFloatGet(basefreq, 1); freqg = ajFloatGet(basefreq, 2); freqt = ajFloatGet(basefreq, 3); } if(!kimura && !similarity) { gammamethod = ajAcdGetListSingle("gammatype"); if(ajStrMatchC(gammamethod, "g")) { gama = true; cvi = ajAcdGetFloat("gammacoefficient"); } else if(ajStrMatchC(gammamethod, "i")) { invar = true; cvi = ajAcdGetFloat("invarcoefficient"); } cvi = 1.0 / (cvi * cvi); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); /* fprintf(outfile, "\nProtein distance algorithm, version %s\n\n",VERSION);*/ /* printf("\n weights: %s",(weights ? "true" : "false")); printf("\n progress: %s",(progress ? "true" : "false")); printf("\n similarity: %s",(similarity ? "true" : "false")); printf("\n ttratio: %f",(ttratio)); printf("\n freqa: %f",(freqa)) ; printf("\n freqc: %f",(freqc)); printf("\n freqg: %f",(freqg)); printf("\n freqt: %f",(freqt)); printf("\n usejtt: %s",(usejtt ? "true" : "false")); printf("\n usepmb: %s",(usepmb ? "true" : "false")); printf("\n usepam: %s",(usepam ? "true" : "false")); printf("\n kimura: %s",(kimura ? "true" : "false")); printf("\n catmodel: %s",(catmodel ? "true" : "false")); printf("\n gama: %s",(gama ? "true" : "false")); printf("\n invar: %s",(invarfrac ? "true" : "false")); printf("\n cvi: %f",(cvi)); printf("\n invar: %f",(ease)); printf("\n mulsets: %s",(mulsets ? "true" : "false")); printf("\n datasets: %ld",(datasets)); printf("\n printdata: %s",(printdata ? "true" : "false")); */ } /* emboss_getoptions */ void transition() { /* calculations related to transition-transversion ratio */ double aa, bb, freqr, freqy, freqgr, freqty; freqr = freqa + freqg; freqy = freqc + freqt; freqgr = freqg / freqr; freqty = freqt / freqy; aa = ttratio * freqr * freqy - freqa * freqg - freqc * freqt; bb = freqa * freqgr + freqc * freqty; xi = aa / (aa + bb); xv = 1.0 - xi; if (xi <= 0.0 && xi >= -epsilon) xi = 0.0; if (xi < 0.0){ printf("THIS TRANSITION-TRANSVERSION RATIO IS IMPOSSIBLE WITH"); printf(" THESE BASE FREQUENCIES\n"); embExitBad();} } /* transition */ void doinit() { /* initializes variables */ protdist_inputnumbers(seqsets[0]); transition(); } /* doinit*/ void printcategories() { /* print out list of categories of positions */ long i, j; fprintf(outfile, "Rate categories\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 1; i <= chars; i++) { fprintf(outfile, "%ld", category[i - 1]); if (i % 60 == 0) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } else if (i % 10 == 0) putc(' ', outfile); } fprintf(outfile, "\n\n"); } /* printcategories */ void reallocchars(void) { int i; free(weight); free(oldweight); free(category); for (i = 0; i < spp; i++) { free(gnode[i]); gnode[i] = (aas *)Malloc(chars * sizeof(aas )); } weight = (steparray)Malloc(chars*sizeof(long)); oldweight = (steparray)Malloc(chars*sizeof(long)); category = (steparray)Malloc(chars*sizeof(long)); } void inputoptions() { /* input the information on the options */ long i; if (!firstset && !justwts) { samenumspseq(seqsets[ith-1],&chars, ith); reallocchars(); } if (firstset || !justwts) { for (i = 0; i < chars; i++) { category[i] = 1; oldweight[i] = 1; weight[i] = 1; } } /* if (!justwts && weights) {*/ if (justwts || weights) inputweightsstr(phyloweights->Str[ith-1], chars, oldweight, &weights); if (printdata) putc('\n', outfile); if (usejtt && printdata) fprintf(outfile, " Jones-Taylor-Thornton model distance\n"); if (usepmb && printdata) fprintf(outfile, " Henikoff/Tillier PMB model distance\n"); if (usepam && printdata) fprintf(outfile, " Dayhoff PAM model distance\n"); if (kimura && printdata) fprintf(outfile, " Kimura protein distance\n"); if (!(usejtt || usepmb || usepam || kimura || similarity) && printdata) fprintf(outfile, " Categories model distance\n"); if (similarity) fprintf(outfile, " \n Table of similarity between sequences\n"); if ((ctgry && categs > 1) && (firstset || !justwts)) { inputcategsstr(phyloratecat->Str[0], 0, chars, category, categs, "ProtDist"); if (printdata) printcategs(outfile, chars, category, "Position categories"); } else if (printdata && (categs > 1)) { fprintf(outfile, "\nPosition category Rate of change\n\n"); for (i = 1; i <= categs; i++) fprintf(outfile, "%15ld%13.3f\n", i, rate[i - 1]); putc('\n', outfile); printcategories(); } if (weights && printdata) printweights(outfile, 0, chars, oldweight, "Positions"); } /* inputoptions */ void protdist_inputdata(AjPSeqset seqset) { /* input the names and sequences for each species */ long i=0, j, k, l /*, aasread=0*/; Char charstate; boolean allread, done; aas aa=0; /* temporary amino acid for input */ const AjPStr str; if (progress) putchar('\n'); j = nmlngth + (chars + (chars - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nName"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Sequences\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "---------\n\n"); } /*aasread = 0;*/ allread = false; while (!(allread)) { i = 1; while (i <= spp) { initnameseq(seqset, i-1); str = ajSeqGetSeqS(ajSeqsetGetseqSeq(seqset, i-1)); j = 0; done = false; while (!done) { while (j < chars) { charstate = ajStrGetCharPos(str, j); protdist_uppercase(&charstate); if ((!isalpha((int)charstate) && charstate != '.' && charstate != '?' && charstate != '-' && charstate != '*') || charstate == 'J' || charstate == 'O' || charstate == 'U' || charstate == '.') { printf("ERROR -- bad amino acid: %c at position %ld of species %3ld\n", charstate, j, i); if (charstate == '.') { printf(" Periods (.) may not be used as gap characters.\n"); printf(" The correct gap character is (-)\n"); } embExitBad(); } j++; switch (charstate) { case 'A': aa = ala; break; case 'B': aa = asx; break; case 'C': aa = cys; break; case 'D': aa = asp; break; case 'E': aa = glu; break; case 'F': aa = phe; break; case 'G': aa = gly; break; case 'H': aa = his; break; case 'I': aa = ileu; break; case 'K': aa = lys; break; case 'L': aa = leu; break; case 'M': aa = met; break; case 'N': aa = asn; break; case 'P': aa = pro; break; case 'Q': aa = gln; break; case 'R': aa = arg; break; case 'S': aa = ser; break; case 'T': aa = thr; break; case 'V': aa = val; break; case 'W': aa = trp; break; case 'X': aa = unk; break; case 'Y': aa = tyr; break; case 'Z': aa = glx; break; case '*': aa = stop; break; case '?': aa = quest; break; case '-': aa = del; break; } gnode[i - 1][j - 1] = aa; } if (j == chars) done = true; } i++; } allread = (i > spp); } if ( printdata) { for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { for (j = 1; j <= spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j - 1][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (j > 1 && gnode[j - 1][k - 1] == gnode[0][k - 1]) charstate = '.'; else { switch (gnode[j - 1][k - 1]) { case ala: charstate = 'A'; break; case asx: charstate = 'B'; break; case cys: charstate = 'C'; break; case asp: charstate = 'D'; break; case glu: charstate = 'E'; break; case phe: charstate = 'F'; break; case gly: charstate = 'G'; break; case his: charstate = 'H'; break; case ileu: charstate = 'I'; break; case lys: charstate = 'K'; break; case leu: charstate = 'L'; break; case met: charstate = 'M'; break; case asn: charstate = 'N'; break; case pro: charstate = 'P'; break; case gln: charstate = 'Q'; break; case arg: charstate = 'R'; break; case ser: charstate = 'S'; break; case thr: charstate = 'T'; break; case val: charstate = 'V'; break; case trp: charstate = 'W'; break; case tyr: charstate = 'Y'; break; case glx: charstate = 'Z'; break; case del: charstate = '-'; break; case stop: charstate = '*'; break; case unk: charstate = 'X'; break; case quest: charstate = '?'; break; default: /*cases ser1 and ser2 cannot occur*/ break; } } putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } if (printdata) putc('\n', outfile); } /* protdist_inputdata */ void doinput() { /* reads the input data */ long i; double sumrates, weightsum; inputoptions(); if(!justwts || firstset) protdist_inputdata(seqsets[ith-1]); if (!ctgry) { categs = 1; rate[0] = 1.0; } weightsum = 0; for (i = 0; i < chars; i++) weightsum += oldweight[i]; sumrates = 0.0; for (i = 0; i < chars; i++) sumrates += oldweight[i] * rate[category[i] - 1]; for (i = 0; i < categs; i++) rate[i] *= weightsum / sumrates; } /* doinput */ void code() { /* make up table of the code 1 = u, 2 = c, 3 = a, 4 = g */ long n; aas b; trans[0][0][0] = phe; trans[0][0][1] = phe; trans[0][0][2] = leu; trans[0][0][3] = leu; trans[0][1][0] = ser; trans[0][1][1] = ser; trans[0][1][2] = ser; trans[0][1][3] = ser; trans[0][2][0] = tyr; trans[0][2][1] = tyr; trans[0][2][2] = stop; trans[0][2][3] = stop; trans[0][3][0] = cys; trans[0][3][1] = cys; trans[0][3][2] = stop; trans[0][3][3] = trp; trans[1][0][0] = leu; trans[1][0][1] = leu; trans[1][0][2] = leu; trans[1][0][3] = leu; trans[1][1][0] = pro; trans[1][1][1] = pro; trans[1][1][2] = pro; trans[1][1][3] = pro; trans[1][2][0] = his; trans[1][2][1] = his; trans[1][2][2] = gln; trans[1][2][3] = gln; trans[1][3][0] = arg; trans[1][3][1] = arg; trans[1][3][2] = arg; trans[1][3][3] = arg; trans[2][0][0] = ileu; trans[2][0][1] = ileu; trans[2][0][2] = ileu; trans[2][0][3] = met; trans[2][1][0] = thr; trans[2][1][1] = thr; trans[2][1][2] = thr; trans[2][1][3] = thr; trans[2][2][0] = asn; trans[2][2][1] = asn; trans[2][2][2] = lys; trans[2][2][3] = lys; trans[2][3][0] = ser; trans[2][3][1] = ser; trans[2][3][2] = arg; trans[2][3][3] = arg; trans[3][0][0] = val; trans[3][0][1] = val; trans[3][0][2] = val; trans[3][0][3] = val; trans[3][1][0] = ala; trans[3][1][1] = ala; trans[3][1][2] = ala; trans[3][1][3] = ala; trans[3][2][0] = asp; trans[3][2][1] = asp; trans[3][2][2] = glu; trans[3][2][3] = glu; trans[3][3][0] = gly; trans[3][3][1] = gly; trans[3][3][2] = gly; trans[3][3][3] = gly; if (whichcode == mito) trans[0][3][2] = trp; if (whichcode == vertmito) { trans[0][3][2] = trp; trans[2][3][2] = stop; trans[2][3][3] = stop; trans[2][0][2] = met; } if (whichcode == flymito) { trans[0][3][2] = trp; trans[2][0][2] = met; trans[2][3][2] = ser; } if (whichcode == yeastmito) { trans[0][3][2] = trp; trans[1][0][2] = thr; trans[2][0][2] = met; } n = 0; for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { if (b != ser2) { n++; numaa[(long)b - (long)ala] = n; } } numaa[(long)ser - (long)ala] = (long)ser1 - (long)(ala) + 1; } /* code */ void protdist_cats() { /* define categories of amino acids */ aas b; /* fundamental subgroups */ cat[0] = 1; /* for alanine */ cat[(long)cys - (long)ala] = 1; cat[(long)met - (long)ala] = 2; cat[(long)val - (long)ala] = 3; cat[(long)leu - (long)ala] = 3; cat[(long)ileu - (long)ala] = 3; cat[(long)gly - (long)ala] = 4; cat[0] = 4; cat[(long)ser - (long)ala] = 4; cat[(long)thr - (long)ala] = 4; cat[(long)pro - (long)ala] = 5; cat[(long)phe - (long)ala] = 6; cat[(long)tyr - (long)ala] = 6; cat[(long)trp - (long)ala] = 6; cat[(long)glu - (long)ala] = 7; cat[(long)gln - (long)ala] = 7; cat[(long)asp - (long)ala] = 7; cat[(long)asn - (long)ala] = 7; cat[(long)lys - (long)ala] = 8; cat[(long)arg - (long)ala] = 8; cat[(long)his - (long)ala] = 8; if (whichcat == george) { /* George, Hunt and Barker: sulfhydryl, small hydrophobic, small hydrophilic, aromatic, acid/acid-amide/hydrophilic, basic */ for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { if (cat[(long)b - (long)ala] == 3) cat[(long)b - (long)ala] = 2; if (cat[(long)b - (long)ala] == 5) cat[(long)b - (long)ala] = 4; } } if (whichcat == chemical) { /* Conn and Stumpf: monoamino, aliphatic, heterocyclic, aromatic, dicarboxylic, basic */ for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { if (cat[(long)b - (long)ala] == 2) cat[(long)b - (long)ala] = 1; if (cat[(long)b - (long)ala] == 4) cat[(long)b - (long)ala] = 3; } } /* Ben Hall's personal opinion */ if (whichcat != hall) return; for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { if (cat[(long)b - (long)ala] == 3) cat[(long)b - (long)ala] = 2; } } /* protdist_cats */ void maketrans() { /* Make up transition probability matrix from code and category tables */ long i, j, k, m, n, s, nb1, nb2; double x, sum; long sub[3], newsub[3]; double f[4], g[4]; aas b1, b2; double TEMP, TEMP1, TEMP2, TEMP3; for (i = 0; i <= 19; i++) { pie[i] = 0.0; for (j = 0; j <= 19; j++) prob[i][j] = 0.0; } f[0] = freqt; f[1] = freqc; f[2] = freqa; f[3] = freqg; g[0] = freqc + freqt; g[1] = freqc + freqt; g[2] = freqa + freqg; g[3] = freqa + freqg; TEMP = f[0]; TEMP1 = f[1]; TEMP2 = f[2]; TEMP3 = f[3]; fracchange = xi * (2 * f[0] * f[1] / g[0] + 2 * f[2] * f[3] / g[2]) + xv * (1 - TEMP * TEMP - TEMP1 * TEMP1 - TEMP2 * TEMP2 - TEMP3 * TEMP3); sum = 0.0; for (i = 0; i <= 3; i++) { for (j = 0; j <= 3; j++) { for (k = 0; k <= 3; k++) { if (trans[i][j][k] != stop) sum += f[i] * f[j] * f[k]; } } } for (i = 0; i <= 3; i++) { sub[0] = i + 1; for (j = 0; j <= 3; j++) { sub[1] = j + 1; for (k = 0; k <= 3; k++) { sub[2] = k + 1; b1 = trans[i][j][k]; for (m = 0; m <= 2; m++) { s = sub[m]; for (n = 1; n <= 4; n++) { memcpy(newsub, sub, sizeof(long) * 3L); newsub[m] = n; x = f[i] * f[j] * f[k] / (3.0 * sum); if (((s == 1 || s == 2) && (n == 3 || n == 4)) || ((n == 1 || n == 2) && (s == 3 || s == 4))) x *= xv * f[n - 1]; else x *= xi * f[n - 1] / g[n - 1] + xv * f[n - 1]; b2 = trans[newsub[0] - 1][newsub[1] - 1][newsub[2] - 1]; if (b1 != stop) { nb1 = numaa[(long)b1 - (long)ala]; pie[nb1 - 1] += x; if (b2 != stop) { nb2 = numaa[(long)b2 - (long)ala]; if (cat[(long)b1 - (long)ala] != cat[(long)b2 - (long)ala]) { prob[nb1 - 1][nb2 - 1] += x * ease; prob[nb1 - 1][nb1 - 1] += x * (1.0 - ease); } else prob[nb1 - 1][nb2 - 1] += x; } else prob[nb1 - 1][nb1 - 1] += x; } } } } } } for (i = 0; i <= 19; i++) prob[i][i] -= pie[i]; for (i = 0; i <= 19; i++) { for (j = 0; j <= 19; j++) prob[i][j] /= sqrt(pie[i] * pie[j]); } /* computes pi^(1/2)*B*pi^(-1/2) */ } /* maketrans */ void givens(double (*a)[20], long i, long j, long n, double ctheta, double stheta, boolean left) { /* Givens transform at i,j for 1..n with angle theta */ long k; double d; for (k = 0; k < n; k++) { if (left) { d = ctheta * a[i - 1][k] + stheta * a[j - 1][k]; a[j - 1][k] = ctheta * a[j - 1][k] - stheta * a[i - 1][k]; a[i - 1][k] = d; } else { d = ctheta * a[k][i - 1] + stheta * a[k][j - 1]; a[k][j - 1] = ctheta * a[k][j - 1] - stheta * a[k][i - 1]; a[k][i - 1] = d; } } } /* givens */ void coeffs(double x, double y, double *c, double *s, double accuracy) { /* compute cosine and sine of theta */ double root; root = sqrt(x * x + y * y); if (root < accuracy) { *c = 1.0; *s = 0.0; } else { *c = x / root; *s = y / root; } } /* coeffs */ void tridiag(double (*a)[20], long n, double accuracy) { /* Givens tridiagonalization */ long i, j; double s, c; for (i = 2; i < n; i++) { for (j = i + 1; j <= n; j++) { coeffs(a[i - 2][i - 1], a[i - 2][j - 1], &c, &s,accuracy); givens(a, i, j, n, c, s, true); givens(a, i, j, n, c, s, false); givens(eigvecs, i, j, n, c, s, true); } } } /* tridiag */ void shiftqr(double (*a)[20], long n, double accuracy) { /* QR eigenvalue-finder */ long i, j; double approx, s, c, d, TEMP, TEMP1; for (i = n; i >= 2; i--) { do { TEMP = a[i - 2][i - 2] - a[i - 1][i - 1]; TEMP1 = a[i - 1][i - 2]; d = sqrt(TEMP * TEMP + TEMP1 * TEMP1); approx = a[i - 2][i - 2] + a[i - 1][i - 1]; if (a[i - 1][i - 1] < a[i - 2][i - 2]) approx = (approx - d) / 2.0; else approx = (approx + d) / 2.0; for (j = 0; j < i; j++) a[j][j] -= approx; for (j = 1; j < i; j++) { coeffs(a[j - 1][j - 1], a[j][j - 1], &c, &s, accuracy); givens(a, j, j + 1, i, c, s, true); givens(a, j, j + 1, i, c, s, false); givens(eigvecs, j, j + 1, n, c, s, true); } for (j = 0; j < i; j++) a[j][j] += approx; } while (fabs(a[i - 1][i - 2]) > accuracy); } } /* shiftqr */ void qreigen(double (*prob)[20], long n) { /* QR eigenvector/eigenvalue method for symmetric matrix */ double accuracy; long i, j; accuracy = 1.0e-6; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) eigvecs[i][j] = 0.0; eigvecs[i][i] = 1.0; } tridiag(prob, n, accuracy); shiftqr(prob, n, accuracy); for (i = 0; i < n; i++) eig[i] = prob[i][i]; for (i = 0; i <= 19; i++) { for (j = 0; j <= 19; j++) prob[i][j] = sqrt(pie[j]) * eigvecs[i][j]; } /* prob[i][j] is the value of U' times pi^(1/2) */ } /* qreigen */ void jtteigen() { /* eigenanalysis for JTT matrix, precomputed */ memcpy(prob,jttprobs,sizeof(jttprobs)); memcpy(eig,jtteigs,sizeof(jtteigs)); fracchange = 1.0; /** changed from 0.01 **/ } /* jtteigen */ void pmbeigen() { /* eigenanalysis for PMB matrix, precomputed */ memcpy(prob,pmbprobs,sizeof(pmbprobs)); memcpy(eig,pmbeigs,sizeof(pmbeigs)); fracchange = 1.0; } /* pmbeigen */ void pameigen() { /* eigenanalysis for PAM matrix, precomputed */ memcpy(prob,pamprobs,sizeof(pamprobs)); memcpy(eig,pameigs,sizeof(pameigs)); fracchange = 1.0; /** changed from 0.01 **/ } /* pameigen */ void predict(long nb1, long nb2, long cat) { /* make contribution to prediction of this aa pair */ long m; double TEMP; for (m = 0; m <= 19; m++) { if (gama || invar) elambdat = exp(-cvi*log(1.0-rate[cat-1]*tt*(eig[m]/(1.0-invarfrac))/cvi)); else elambdat = exp(rate[cat-1]*tt * eig[m]); q = prob[m][nb1 - 1] * prob[m][nb2 - 1] * elambdat; p += q; if (!gama && !invar) dp += rate[cat-1]*eig[m] * q; else dp += (rate[cat-1]*eig[m]/(1.0-rate[cat-1]*tt*(eig[m]/(1.0-invarfrac))/cvi)) * q; TEMP = eig[m]; if (!gama && !invar) d2p += TEMP * TEMP * q; else d2p += (rate[cat-1]*rate[cat-1]*eig[m]*eig[m]*(1.0+1.0/cvi)/ ((1.0-rate[cat-1]*tt*eig[m]/cvi) *(1.0-rate[cat-1]*tt*eig[m]/cvi))) * q; } if (nb1 == nb2) { p *= (1.0 - invarfrac); p += invarfrac; } dp *= (1.0 - invarfrac); d2p *= (1.0 - invarfrac); } /* predict */ void makedists() { /* compute the distances */ long i, j, k, m, n, itterations, nb1, nb2, cat; double delta, lnlike, slope, curv; boolean neginfinity, inf, overlap; aas b1, b2; if (!(printdata || similarity)) fprintf(outfile, "%5ld\n", spp); if (progress) printf("Computing distances:\n"); for (i = 1; i <= spp; i++) { if (progress) printf(" "); if (progress) { for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); } if (progress) { printf(" "); fflush(stdout); } if (similarity) d[i-1][i-1] = 1.0; else d[i-1][i-1] = 0.0; for (j = 0; j <= i - 2; j++) { if (!(kimura || similarity)) { if (usejtt || usepmb || usepam) tt = 0.1/fracchange; else tt = 1.0; delta = tt / 2.0; itterations = 0; inf = false; do { lnlike = 0.0; slope = 0.0; curv = 0.0; neginfinity = false; overlap = false; for (k = 0; k < chars; k++) { if (oldweight[k] > 0) { cat = category[k]; b1 = gnode[i - 1][k]; b2 = gnode[j][k]; if (b1 != stop && b1 != del && b1 != quest && b1 != unk && b2 != stop && b2 != del && b2 != quest && b2 != unk) { overlap = true; p = 0.0; dp = 0.0; d2p = 0.0; nb1 = numaa[(long)b1 - (long)ala]; nb2 = numaa[(long)b2 - (long)ala]; if (b1 != asx && b1 != glx && b2 != asx && b2 != glx) predict(nb1, nb2, cat); else { if (b1 == asx) { if (b2 == asx) { predict(3L, 3L, cat); predict(3L, 4L, cat); predict(4L, 3L, cat); predict(4L, 4L, cat); } else { if (b2 == glx) { predict(3L, 6L, cat); predict(3L, 7L, cat); predict(4L, 6L, cat); predict(4L, 7L, cat); } else { predict(3L, nb2, cat); predict(4L, nb2, cat); } } } else { if (b1 == glx) { if (b2 == asx) { predict(6L, 3L, cat); predict(6L, 4L, cat); predict(7L, 3L, cat); predict(7L, 4L, cat); } else { if (b2 == glx) { predict(6L, 6L, cat); predict(6L, 7L, cat); predict(7L, 6L, cat); predict(7L, 7L, cat); } else { predict(6L, nb2, cat); predict(7L, nb2, cat); } } } else { if (b2 == asx) { predict(nb1, 3L, cat); predict(nb1, 4L, cat); predict(nb1, 3L, cat); predict(nb1, 4L, cat); } else if (b2 == glx) { predict(nb1, 6L, cat); predict(nb1, 7L, cat); predict(nb1, 6L, cat); predict(nb1, 7L, cat); } } } } if (p <= 0.0) neginfinity = true; else { lnlike += oldweight[k]*log(p); slope += oldweight[k]*dp / p; curv += oldweight[k]*(d2p / p - dp * dp / (p * p)); } } } } itterations++; if (!overlap){ printf("\nWARNING: NO OVERLAP BETWEEN SEQUENCES %ld AND %ld; -1.0 WAS WRITTEN\n", i, j+1); tt = -1.0/fracchange; itterations = 20; inf = true; } else if (!neginfinity) { if (curv < 0.0) { tt -= slope / curv; if (tt > 10000.0) { printf("\nWARNING: INFINITE DISTANCE BETWEEN SPECIES %ld AND %ld; -1.0 WAS WRITTEN\n", i, j+1); tt = -1.0/fracchange; inf = true; itterations = 20; } } else { if ((slope > 0.0 && delta < 0.0) || (slope < 0.0 && delta > 0.0)) delta /= -2; tt += delta; } } else { delta /= -2; tt += delta; } if (tt < protepsilon && !inf) tt = protepsilon; } while (itterations != 20); } else { m = 0; n = 0; for (k = 0; k < chars; k++) { b1 = gnode[i - 1][k]; b2 = gnode[j][k]; if ((((long)b1 <= (long)val) || ((long)b1 == (long)ser)) && (((long)b2 <= (long)val) || ((long)b2 == (long)ser))) { if (b1 == b2) m++; n++; } } p = 1 - (double)m / n; if (kimura) { dp = 1.0 - p - 0.2 * p * p; if (dp < 0.0) { printf( "\nDISTANCE BETWEEN SEQUENCES %3ld AND %3ld IS TOO LARGE FOR KIMURA FORMULA\n", i, j + 1); tt = -1.0; } else tt = -log(dp); } else { /* if similarity */ tt = 1.0 - p; } } d[i - 1][j] = fracchange * tt; d[j][i - 1] = d[i - 1][j]; if (progress) { putchar('.'); fflush(stdout); } } if (progress) { putchar('\n'); fflush(stdout); } } if (!similarity) { for (i = 0; i < spp; i++) { for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); k = spp; for (j = 1; j <= k; j++) { if (d[i][j-1] < 100.0) fprintf(outfile, "%10.6f", d[i][j-1]); else if (d[i][j-1] < 1000.0) fprintf(outfile, " %10.6f", d[i][j-1]); else fprintf(outfile, " %11.6f", d[i][j-1]); if ((j + 1) % 7 == 0 && j < k) putc('\n', outfile); } putc('\n', outfile); } } else { for (i = 0; i < spp; i += 6) { if ((i+6) < spp) n = i+6; else n = spp; fprintf(outfile, " "); for (j = i; j < n ; j++) { for (k = 0; k < (nmlngth-2); k++) putc(nayme[j][k], outfile); putc(' ', outfile); putc(' ', outfile); } putc('\n', outfile); for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); if ((i+6) < spp) n = i+6; else n = spp; for (k = i; k < n ; k++) if (d[j][k] < 100.0) fprintf(outfile, "%10.6f", d[j][k]); else if (d[j][k] < 1000.0) fprintf(outfile, " %10.6f", d[j][k]); else fprintf(outfile, " %11.6f", d[j][k]); putc('\n', outfile); } putc('\n', outfile); } } if (progress) printf("\nOutput written to file \"%s\"\n\n", outfilename); } /* makedists */ int main(int argc, Char *argv[]) { /* ML Protein distances by PMB, JTT, PAM or categories model */ #ifdef MAC argc = 1; /* macsetup("Protdist",""); */ argv[0] = "Protdist"; #endif init(argc, argv); emboss_getoptions("fprotdist", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); if (!(kimura || similarity)) code(); if (!(usejtt || usepmb || usepam || kimura || similarity)) { protdist_cats(); maketrans(); qreigen(prob, 20L); } else { if (kimura || similarity) fracchange = 1.0; else { if (usejtt) jtteigen(); else { if (usepmb) pmbeigen(); else pameigen(); } } } for (ith = 1; ith <= datasets; ith++) { doinput(); if (ith == 1) firstset = false; if ((datasets > 1) && progress) printf("\nData set # %ld:\n\n", ith); makedists(); } FClose(outfile); FClose(infile); #ifdef MAC fixmacfile(outfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Protein distances */ PHYLIPNEW-3.69.650/src/restboot.c0000664000175000017500000006730311616234204013072 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, and Doug Buxton. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef enum { seqs, morphology, restsites, genefreqs } datatype; typedef enum { dna, rna, protein } seqtype; AjPPhyloState* phylorest = NULL; AjPPhyloProp phyloweights = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void seqboot_inputnumbersrest(AjPPhyloState); void inputoptions(void); void seqboot_inputdatarest(AjPPhyloState); void allocrest(void); void allocnew(void); void doinput(int argc, Char *argv[]); void bootweights(void); void sppermute(long); void charpermute(long, long); void writedata(void); void writeweights(void); void writecategories(void); void writeauxdata(steptr, FILE*); void writefactors(void); void bootwrite(void); void seqboot_inputaux(steptr, FILE*); void seqboot_inputfactors(AjPPhyloProp fact); /* function prototypes */ #endif FILE *outcatfile, *outweightfile, *outmixfile, *outancfile, *outfactfile; Char infilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH], mixfilename[FNMLNGTH], ancfilename[FNMLNGTH], factfilename[FNMLNGTH]; const char* outfilename; AjPFile embossoutfile; const char* outweightfilename; AjPFile embossoutweightfile; const char* outmixfilename; AjPFile embossoutmixfile; const char* outancfilename; AjPFile embossoutancfile; const char* outcatfilename; AjPFile embossoutcatfile; const char* outfactfilename; AjPFile embossoutfactfile; long sites, loci, maxalleles, groups, newsites, newersites, newgroups, newergroups, nenzymes, reps, ws, blocksize, categs, maxnewsites; boolean bootstrap, permute, ild, lockhart, jackknife, regular, xml, nexus, weights, categories, factors, enzymes, all, justwts, progress, mixture, firstrep, ancvar; double fracsample; datatype data; seqtype seq; steptr oldweight, where, how_many, newwhere, newhowmany, newerwhere, newerhowmany, factorr, newerfactor, mixdata, ancdata; steptr *charorder; Char *factor; long *alleles; Char **nodep; double **nodef; long **sppord; longer seed; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr test = NULL; AjPStr outputformat = NULL; AjPStr typeofseq = NULL; AjPStr justweights = NULL; AjBool rewrite = false; long inseed, inseed0; data = restsites; seq = dna; bootstrap = false; jackknife = false; permute = false; ild = false; lockhart = false; blocksize = 1; regular = true; fracsample = 1.0; all = false; reps = 100; weights = false; mixture = false; ancvar = false; categories = false; justwts = false; printdata = false; dotdiff = true; progress = true; interleaved = true; xml = false; nexus = false; factors = false; enzymes = false; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylorest = ajAcdGetDiscretestates("infile"); enzymes = ajAcdGetBoolean("enzymes"); test = ajAcdGetListSingle("test"); if(ajStrMatchC(test, "b")) { bootstrap = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 1.0; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } blocksize = ajAcdGetInt("blocksize"); } else if(ajStrMatchC(test, "j")) { jackknife = true; regular = ajAcdGetToggle("regular"); if(regular) fracsample = 0.5; else { fracsample = ajAcdGetFloat("fracsample"); fracsample = fracsample/100.0; } } else if(ajStrMatchC(test, "c")) permute = true; else if(ajStrMatchC(test, "o")) ild = true; else if(ajStrMatchC(test, "s")) lockhart = true; else if(ajStrMatchC(test, "r")) rewrite = true; if(rewrite) { if (data == seqs) { outputformat = ajAcdGetListSingle("rewriteformat"); if(ajStrMatchC(outputformat, "n")) nexus = true; else if(ajStrMatchC(outputformat, "x")) xml = true; if( (nexus) || (xml) ) { typeofseq = ajAcdGetListSingle("seqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } if (data == morphology) { typeofseq = ajAcdGetListSingle("morphseqtype"); if(ajStrMatchC(typeofseq, "d")) seq = dna; else if(ajStrMatchC(typeofseq, "r")) seq = rna; else if(ajStrMatchC(typeofseq, "p")) seq = protein; } } else{ reps = ajAcdGetInt("reps"); inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); if(jackknife || bootstrap || permute) { phyloweights = ajAcdGetProperties("weights"); if(phyloweights) weights = true; } if(!permute) { justweights = ajAcdGetListSingle("justweights"); if(ajStrMatchC(justweights, "j")) justwts = true; } } printdata = ajAcdGetBoolean("printdata"); if(printdata) dotdiff = ajAcdGetBoolean("dotdiff"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void seqboot_inputnumbersrest(AjPPhyloState rest) { /* read numbers of species and of sites */ spp = rest->Size; sites = rest->Len; loci = sites; nenzymes = rest->Count; maxalleles = 1; } /* seqboot_inputnumbersrest */ void seqboot_inputfactors(AjPPhyloProp fact) { long i, j; Char ch, prevch; AjPStr str; prevch = ' '; str = fact->Str[0]; j = 0; for (i = 0; i < (sites); i++) { ch = ajStrGetCharPos(str,i); if (ch != prevch) j++; prevch = ch; factorr[i] = j; } } /* seqboot_inputfactors */ void inputoptions() { /* input the information on the options */ long weightsum, maxfactsize, i, j, k, l, m; if (data == genefreqs) { k = 0; l = 0; for (i = 0; i < (loci); i++) { m = alleles[i]; k++; for (j = 1; j <= m; j++) { l++; factorr[l - 1] = k; } } } else { for (i = 1; i <= (sites); i++) factorr[i - 1] = i; } for (i = 0; i < (sites); i++) oldweight[i] = 1; if (weights) inputweightsstr2(phyloweights->Str[0],0, sites, &weightsum, oldweight, &weights, "seqboot"); if (factors && printdata) { for(i = 0; i < sites; i++) factor[i] = (char)('0' + (factorr[i]%10)); printfactors(outfile, sites, factor, " (least significant digit)"); } if (weights && printdata) printweights(outfile, 0, sites, oldweight, "Sites"); for (i = 0; i < (loci); i++) how_many[i] = 0; for (i = 0; i < (loci); i++) where[i] = 0; for (i = 1; i <= (sites); i++) { how_many[factorr[i - 1] - 1]++; if (where[factorr[i - 1] - 1] == 0) where[factorr[i - 1] - 1] = i; } groups = factorr[sites - 1]; newgroups = 0; newsites = 0; maxfactsize = 0; for(i = 0 ; i < loci ; i++){ if(how_many[i] > maxfactsize){ maxfactsize = how_many[i]; } } maxnewsites = groups * maxfactsize; allocnew(); for (i = 0; i < (groups); i++) { if (oldweight[where[i] - 1] > 0) { newgroups++; newsites += how_many[i]; newwhere[newgroups - 1] = where[i]; newhowmany[newgroups - 1] = how_many[i]; } } } /* inputoptions */ void seqboot_inputdatarest(AjPPhyloState rest) { /* input the names and sequences for each species */ long i, j, k, l, m, n; Char charstate; AjPStr str; boolean allread, done; nodep = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < (spp); i++) nodep[i] = (Char *)Malloc(sites*sizeof(Char)); j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; if (j < nmlngth - 1) j = nmlngth - 1; if (j > 37) j = 37; if (printdata) { fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); if (bootstrap) { if (blocksize > 1) { if (regular) fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); else fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", 100*fracsample, blocksize); } else { if (regular) fprintf(outfile, "Bootstrap\n\n"); else fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); } } else { if (jackknife) { if (regular) fprintf(outfile, "Delete-half Jackknife\n\n"); else fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); } else { if (permute) { fprintf(outfile, "Species order permuted separately for each"); if (data == morphology) fprintf(outfile, " character\n\n"); if (data == restsites) fprintf(outfile, " site\n\n"); } else { if (ild) { if (data == morphology) fprintf(outfile, "Character"); if (data == restsites) fprintf(outfile, "Site"); fprintf(outfile, " order permuted\n\n"); } else { if (lockhart) if (data == morphology) fprintf(outfile, "Character"); if (data == restsites) fprintf(outfile, "Site"); fprintf(outfile, " order permuted separately for each species\n\n"); } } } } fprintf(outfile, "%3ld species, ", spp); if (data == seqs) fprintf(outfile, "%3ld sites\n\n", sites); else if (data == morphology) fprintf(outfile, "%3ld characters\n\n", sites); else if (data == restsites) fprintf(outfile, "%3ld sites\n\n", sites); fprintf(outfile, "Name"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "Data\n"); fprintf(outfile, "----"); for (i = 1; i <= j; i++) putc(' ', outfile); fprintf(outfile, "----\n\n"); } allread = false; while (!allread) { allread = true; i = 1; while (i <= spp) { initnamestate(rest, i-1); str = rest->Str[i-1]; j = 0; done = false; while (!done) { while (j < sites) { charstate = ajStrGetCharPos(str, j); uppercase(&charstate); j++; if (charstate == '.') charstate = nodep[0][j-1]; nodep[i-1][j-1] = charstate; } if (j == sites) done = true; } i++; } allread = (i > spp); } if (!printdata) return; m = (sites - 1) / 60 + 1; for (i = 1; i <= m; i++) { for (j = 0; j < spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > sites) l = sites; n = (i - 1) * 60; for (k = n; k < l; k++) { if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) charstate = '.'; else charstate = nodep[j][k]; putc(charstate, outfile); if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* seqboot_inputdatarest */ void allocrest() { /* allocate memory for bookkeeping arrays */ oldweight = (steptr)Malloc(sites*sizeof(long)); weight = (steptr)Malloc(sites*sizeof(long)); if (categories) category = (steptr)Malloc(sites*sizeof(long)); if (mixture) mixdata = (steptr)Malloc(sites*sizeof(long)); if (ancvar) ancdata = (steptr)Malloc(sites*sizeof(long)); where = (steptr)Malloc(loci*sizeof(long)); how_many = (steptr)Malloc(loci*sizeof(long)); factor = (Char *)Malloc(sites*sizeof(Char)); factorr = (steptr)Malloc(sites*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); } /* allocrest */ void allocnew(void) { /* allocate memory for arrays that depend on the lenght of the output sequence*/ long i; newwhere = (steptr)Malloc(loci*sizeof(long)); newhowmany = (steptr)Malloc(loci*sizeof(long)); newerwhere = (steptr)Malloc(loci*sizeof(long)); newerhowmany = (steptr)Malloc(loci*sizeof(long)); newerfactor = (steptr)Malloc(maxnewsites*maxalleles*sizeof(long)); charorder = (steptr *)Malloc(spp*sizeof(steptr)); for (i = 0; i < spp; i++) charorder[i] = (steptr)Malloc(maxnewsites*sizeof(long)); } void doinput(int argc, Char *argv[]) { /* reads the input data */ seqboot_inputnumbersrest(phylorest[0]); allocrest(); inputoptions(); seqboot_inputdatarest(phylorest[0]); } /* doinput */ void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; for (i = 0; i < (newgroups); i++) newerwhere[i] = 0; for (i = 0; i < (newgroups); i++) newerhowmany[i] = 0; newergroups = 0; newersites = 0; for (i = 0; i < (newgroups); i++) { for (j = 1; j <= (weight[i]); j++) { newergroups++; for (k = 1; k <= (newhowmany[i]); k++) { newersites++; newerfactor[newersites - 1] = newergroups; } newerwhere[newergroups - 1] = newwhere[i]; newerhowmany[newergroups - 1] = newhowmany[i]; } } } /* bootweights */ void sppermute(long n) { /* permute the species order as given in array sppord */ long i, j, k; for (i = 1; i <= (spp - 1); i++) { k = (long)((i+1) * randum(seed)); j = sppord[n - 1][i]; sppord[n - 1][i] = sppord[n - 1][k]; sppord[n - 1][k] = j; } } /* sppermute */ void charpermute(long m, long n) { /* permute the n+1 characters of species m+1 */ long i, j, k; for (i = 1; i <= (n - 1); i++) { k = (long)((i+1) * randum(seed)); j = charorder[m][i]; charorder[m][i] = charorder[m][k]; charorder[m][k] = j; } } /* charpermute */ void writedata() { /* write out one set of bootstrapped sequences */ long i, j, k, l, m, n, n2=0; double x; Char charstate; sppord = (long **)Malloc(newergroups*sizeof(long *)); for (i = 0; i < (newergroups); i++) sppord[i] = (long *)Malloc(spp*sizeof(long)); for (j = 1; j <= spp; j++) sppord[0][j - 1] = j; for (i = 1; i < newergroups; i++) { for (j = 1; j <= (spp); j++) sppord[i][j - 1] = sppord[i - 1][j - 1]; } if (!justwts || permute) { if (data == restsites && enzymes) fprintf(outfile, "%5ld %5ld% 4ld\n", spp, newergroups, nenzymes); else if (data == genefreqs) fprintf(outfile, "%5ld %5ld\n", spp, newergroups); else { if ((data == seqs) && !(bootstrap || jackknife || permute || ild || lockhart) && xml) fprintf(outfile, "\n"); else if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) { fprintf(outfile, "#NEXUS\n"); fprintf(outfile, "BEGIN DATA\n"); fprintf(outfile, " DIMENSIONS NTAX=%ld NCHAR=%ld;\n", spp, newersites); fprintf(outfile, " FORMAT"); fprintf(outfile, " interleave"); fprintf(outfile, " DATATYPE="); if (data == seqs) { switch (seq) { case (dna): fprintf(outfile, "DNA missing=N gap=-"); break; case (rna): fprintf(outfile, "RNA missing=N gap=-"); break; case (protein): fprintf(outfile, "protein missing=? gap=-"); break; } } if (data == morphology) fprintf(outfile, "STANDARD"); fprintf(outfile, ";\n MATRIX\n"); } else fprintf(outfile, "%5ld %5ld\n", spp, newersites); } if (data == genefreqs) { for (i = 0; i < (newergroups); i++) fprintf(outfile, " %3ld", alleles[factorr[newerwhere[i] - 1] - 1]); putc('\n', outfile); } } l = 1; if ((!(bootstrap || jackknife || permute || ild || lockhart | nexus)) && ((data == seqs) || (data == restsites))) { interleaved = !interleaved; if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) interleaved = false; } if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; for (j = 0; j < spp; j++) { n = 0; if ((l == 1) || (interleaved && nexus)) { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { fprintf(outfile, " \n"); fprintf(outfile, " "); } n2 = nmlngth-1; if (!(bootstrap || jackknife || permute || ild || lockhart) && (xml || nexus)) { while (nayme[j][n2] == ' ') n2--; } if (nexus) fprintf(outfile, " "); for (k = 0; k <= n2; k++) if (nexus && (nayme[j][k] == ' ') && (k < n2)) putc('_', outfile); else putc(nayme[j][k], outfile); if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) fprintf(outfile, "\n "); } else { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { fprintf(outfile, " "); } else { for (k = 1; k <= nmlngth; k++) putc(' ', outfile); } } if (nexus) for (k = 0; k < nmlngth+1-n2; k++) fprintf(outfile, " "); for (k = l - 1; k < m; k++) { if (permute && j + 1 == 1) sppermute(newerfactor[n]); /* we can assume chars not permuted */ for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (data == genefreqs) { if (n > 1 && (n & 7) == 1) fprintf(outfile, "\n "); x = nodef[sppord[newerfactor[charorder[j][n - 1]] - 1][j] - 1] [newerwhere[charorder[j][k]] + n2]; fprintf(outfile, "%8.5f", x); } else { if (!(bootstrap || jackknife || permute || ild || lockhart) && xml && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); else if (!nexus && !interleaved && (n > 1) && (n % 60 == 1)) fprintf(outfile, "\n "); charstate = nodep[sppord[newerfactor[charorder[j][n - 1]] - 1] [j] - 1][newerwhere[charorder[j][k]] + n2]; putc(charstate, outfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfile); } } } if (!(bootstrap || jackknife || permute || ild || lockhart ) && xml) { fprintf(outfile, "\n \n"); } putc('\n', outfile); } if (interleaved) { if ((m <= newersites) && (newersites > 60)) putc('\n', outfile); l += 60; m += 60; } } while (interleaved && l <= newersites); if ((data == seqs) && (!(bootstrap || jackknife || permute || ild || lockhart) && xml)) fprintf(outfile, "\n"); if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) fprintf(outfile, " ;\nEND;\n"); for (i = 0; i < (newergroups); i++) free(sppord[i]); free(sppord); } /* writedata */ void writeweights() { /* write out one set of post-bootstrapping weights */ long j, k, l, m, n, o; j = 0; l = 1; if (interleaved) m = 60; else m = sites; do { if(m > sites) m = sites; n = 0; for (k = l - 1; k < m; k++) { for(o = 0 ; o < how_many[k] ; o++){ if(oldweight[k]==0){ fprintf(outweightfile, "0"); j++; } else{ if (weight[k-j] < 10) fprintf(outweightfile, "%c", (char)('0'+weight[k-j])); else fprintf(outweightfile, "%c", (char)('A'+weight[k-j]-10)); n++; if (!interleaved && n > 1 && n % 60 == 1) { fprintf(outweightfile, "\n"); if (n % 10 == 0 && n % 60 != 0) putc(' ', outweightfile); } } } } putc('\n', outweightfile); if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= sites); } /* writeweights */ void writecategories() { /* write out categories for the bootstrapped sequences */ long k, l, m, n, n2; Char charstate; if(justwts){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n=0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[k]; putc(charstate, outcatfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outcatfile, "\n"); return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outcatfile, "\n "); charstate = '0' + category[newerwhere[k] + n2]; putc(charstate, outcatfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outcatfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outcatfile, "\n"); } /* writecategories */ void writeauxdata(steptr auxdata, FILE *outauxfile) { /* write out auxiliary option data (mixtures, ancestors, ect) to appropriate file. Samples parralel to data, or just gives one output entry if justwts is true */ long k, l, m, n, n2; Char charstate; /* if we just output weights (justwts), and this is first set just output the data unsampled */ if(justwts){ if(firstrep){ if (interleaved) m = 60; else m = sites; l=1; do { if(m > sites) m = sites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[k]; putc(charstate, outauxfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= sites); fprintf(outauxfile, "\n"); } return; } l = 1; if (interleaved) m = 60; else m = newergroups; do { if (m > newergroups) m = newergroups; n = 0; for (k = l - 1; k < m; k++) { for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outauxfile, "\n "); charstate = auxdata[newerwhere[k] + n2]; putc(charstate, outauxfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outauxfile); } } if (interleaved) { l += 60; m += 60; } } while (interleaved && l <= newersites); fprintf(outauxfile, "\n"); } /* writeauxdata */ void writefactors(void) { long k, l, m, n, prevfact, writesites; char symbol; steptr wfactor; if(!justwts || firstrep){ if(justwts){ writesites = sites; wfactor = factorr; } else { writesites = newersites; wfactor = newerfactor; } prevfact = wfactor[0]; symbol = '+'; if (interleaved) m = 60; else m = writesites; l=1; do { if(m > writesites) m = writesites; n = 0; for(k=l-1 ; k < m ; k++){ n++; if (!interleaved && n > 1 && n % 60 == 1) fprintf(outfactfile, "\n "); if(prevfact != wfactor[k]){ symbol = (symbol == '+') ? '-' : '+'; prevfact = wfactor[k]; } putc(symbol, outfactfile); if (n % 10 == 0 && n % 60 != 0) putc(' ', outfactfile); } if (interleaved) { l += 60; m += 60; } }while(interleaved && l <= writesites); fprintf(outfactfile, "\n"); } } /* writefactors */ void bootwrite() { /* does bootstrapping and writes out data sets */ long i, j, rr, repdiv10; if (!(bootstrap || jackknife || permute || ild || lockhart)) reps = 1; repdiv10 = reps / 10; if (repdiv10 < 1) repdiv10 = 1; if (progress) putchar('\n'); for (rr = 1; rr <= (reps); rr++) { for (i = 0; i < spp; i++) for (j = 0; j < maxnewsites; j++) charorder[i][j] = j; if(rr==1) firstrep = true; else firstrep = false; if (ild) { charpermute(0, maxnewsites); for (i = 1; i < spp; i++) for (j = 0; j < maxnewsites; j++) charorder[i][j] = charorder[0][j]; } if (lockhart) for (i = 0; i < spp; i++) charpermute(i, maxnewsites); bootweights(); if (!justwts || permute || ild || lockhart) writedata(); if (justwts && !(permute || ild || lockhart)) writeweights(); if (categories) writecategories(); if (factors) writefactors(); if (mixture) writeauxdata(mixdata, outmixfile); if (ancvar) writeauxdata(ancdata, outancfile); if (progress && (bootstrap || jackknife || permute || ild || lockhart) && ((reps < 10) || rr % repdiv10 == 0)) { printf("completed replicate number %4ld\n", rr); #ifdef WIN32 phyFillScreenColor(); #endif } } if (progress) { if (justwts) printf("\nOutput weights written to file \"%s\"\n\n", outweightfilename); else printf("\nOutput written to file \"%s\"\n\n", outfilename); } } /* bootwrite */ int main(int argc, Char *argv[]) { /* Read in sequences or frequencies and bootstrap or jackknife them */ #ifdef MAC argc = 1; /* macsetup("SeqBoot",""); */ argv[0] = "SeqBoot"; #endif init(argc,argv); emboss_getoptions("frestboot", argc, argv); ibmpc = IBMCRT; ansi = ANSICRT; doinput(argc, argv); bootwrite(); FClose(infile); if (weights) FClose(weightfile); if (categories) { FClose(catfile); FClose(outcatfile); } if(mixture) FClose(outmixfile); if(ancvar) FClose(outancfile); if (justwts && !permute) { FClose(outweightfile); } else FClose(outfile); #ifdef MAC fixmacfile(outfilename); if (justwts && !permute) fixmacfile(outweightfilename); if (categories) fixmacfile(outcatfilename); if (mixture) fixmacfile(outmixfilename); #endif if(progress) printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/cons.c0000664000175000017500000011740711274017361012177 00000000000000#include "phylip.h" #include "cons.h" int tree_pairing; Char intreename[FNMLNGTH], intree2name[FNMLNGTH]; node *root; long numopts, outgrno, col, setsz; long maxgrp; /* max. no. of groups in all trees found */ boolean trout, firsttree, noroot, outgropt, didreroot, prntsets, progress, treeprint, goteof, strict, mr=false, mre=false, ml=false; /* initialized all false for Treedist */ pointarray nodep; pointarray treenode; group_type **grouping, **grping2, **group2;/* to store groups found */ double *lengths, *lengths2; long **order, **order2, lasti; group_type *fullset; node *grbg; long tipy; double **timesseen, **tmseen2, **times2 ; double *timesseen_changes, *tchange2; double trweight, ntrees, mlfrac; /* prototypes */ void censor(void); boolean compatible(long, long); void elimboth(long); void enterpartition (group_type*, long*); void reorient(node* n); void phylipcompress(long *n); /* begin hash table code */ #define NUM_BUCKETS 100 typedef struct namenode { struct namenode *next; plotstring naym; int hitCount; } namenode; typedef namenode **hashtype; hashtype hashp; long namesGetBucket(plotstring); void namesAdd(plotstring); boolean namesSearch(plotstring); void namesDelete(plotstring); void namesClearTable(void); void namesCheckTable(void); void missingnameRecurs(node *p); /** * namesGetBucket - return the bucket for a given name */ long namesGetBucket(plotstring searchname) { long i; long sum = 0; for (i = 0; (i < MAXNCH) && (searchname[i] != '\0'); i++) { sum += searchname[i]; } return (sum % NUM_BUCKETS); } /** * namesAdd - add a name to the hash table * * The argument is added at the head of the appropriate linked list. No * checking is done for duplicates. The caller can call * namesSearch to check for an existing name prior to calling * namesAdd. */ void namesAdd(plotstring addname) { long bucket = namesGetBucket(addname); namenode *hp, *temp; temp = hashp[bucket]; hashp[bucket] = (namenode *)Malloc(sizeof(namenode)); hp = hashp[bucket]; strcpy(hp->naym, addname); hp->next = temp; hp->hitCount = 0; } /** * namesSearch - search for a name in the hash table * * Return true if the name is found, else false. */ boolean namesSearch(plotstring searchname) { long i = namesGetBucket(searchname); namenode *p; p = hashp[i]; if (p == NULL) { return false; } do { if (strcmp(searchname,p->naym) == 0) { p->hitCount++; return true; } p = p->next; } while (p != NULL); return false; } /** * Go through hash table and check that the hit count on all entries is one. * If it is zero, then a species was missed, if it is two, then there is a * duplicate species. */ void namesCheckTable(void) { namenode *p; long i; for (i=0; i< NUM_BUCKETS; i++) { p = hashp[i]; while (p != NULL){ if(p->hitCount >1){ printf("\n\nERROR in user tree: duplicate name found: "); puts(p->naym); printf("\n\n"); exxit(-1); } else if(p->hitCount == 0){ printf("\n\nERROR in user tree: name %s not found\n\n\n", p->naym); exxit(-1); } p->hitCount = 0; p = p->next; } } } /** * namesClearTable - empty names out of the table and * return allocated memory */ void namesClearTable(void) { long i; namenode *p, *temp; for (i=0; i< NUM_BUCKETS; i++) { p = hashp[i]; if (p != NULL) { do { temp = p; p = p->next; free(temp); } while (p != NULL); hashp[i] = NULL; } } } /* end hash table code */ void initconsnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ long i; boolean minusread; double valyew, divisor, fracchange; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; for (i=0; inayme[i] = '\0'; nodep[(*p)->index - 1] = (*p); (*p)->v = 0; break; case nonbottom: gnu(grbg, p); (*p)->index = nodei; (*p)->v = 0; break; case tip: (*ntips)++; gnu(grbg, p); nodep[(*ntips) - 1] = *p; setupnode(*p, *ntips); (*p)->tip = true; strncpy ((*p)->nayme, str, MAXNCH); if (firsttree && prntsets) { fprintf(outfile, " %ld. ", *ntips); for (i = 0; i < len; i++) putc(str[i], outfile); putc('\n', outfile); if ((*ntips > 0) && (((*ntips) % 10) == 0)) putc('\n', outfile); } (*p)->v = 0; break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); fracchange = 1.0; (*p)->v = valyew / divisor / fracchange; break; case treewt: if (**treestr) { trweight = strtod(*treestr, treestr); if(trweight) { sgetch(ch, parens, treestr); if (*ch != ']') { ajErr("ERROR: Missing right square bracket"); exxit(-1); } else { sgetch(ch, parens, treestr); if (*ch != ';') { ajErr("ERROR: Missing semicolon after square brackets"); exxit(-1); } } } else { ajErr("ERROR: Expecting tree weight in last comment field"); exxit(-1); } } break; case unittrwt: /* This comes not only when setting trweight but also at the end of * any tree. The following code saves the current position in a * file and reads to a new line. If there is a new line then we're * at the end of tree, otherwise warn the user. */ trweight = 1.0 ; break; case hsnolength: (*p)->v = -1; /* signal value that a length is missing */ break; default: /* cases hslength, iter, hsnolength */ break; /* should there be an error message here?*/ } } /* initconsnode */ void censor(void) { /* delete groups that are too rare to be in the consensus tree */ long i; i = 1; do { if (timesseen[i-1]) if (!(mre || (mr && (2*(*timesseen[i-1]) > ntrees)) || (ml && ((*timesseen[i-1]) > mlfrac*ntrees)) || (strict && ((*timesseen[i-1]) == ntrees)))) { free(grouping[i - 1]); free(timesseen[i - 1]); grouping[i - 1] = NULL; timesseen[i - 1] = NULL; } i++; } while (i < maxgrp); } /* censor */ void phylipcompress(long *n) { /* push all the nonempty subsets to the front end of their array */ long i, j; i = 1; j = 1; do { while (grouping[i - 1] != NULL) i++; if (j <= i) j = i + 1; while ((grouping[j - 1] == NULL) && (j < maxgrp)) j++; if (j < maxgrp) { grouping[i - 1] = (group_type *)Malloc(setsz * sizeof(group_type)); timesseen[i - 1] = (double *)Malloc(sizeof(double)); memcpy(grouping[i - 1], grouping[j - 1], setsz * sizeof(group_type)); *timesseen[i - 1] = *timesseen[j - 1]; free(grouping[j - 1]); free(timesseen[j - 1]); grouping[j - 1] = NULL; timesseen[j - 1] = NULL; } } while (j != maxgrp); (*n) = i - 1; } /* phylipcompress */ void sort(long n) { /* Shell sort keeping grouping, timesseen in same order */ long gap, i, j; group_type *stemp; double rtemp; gap = n / 2; stemp = (group_type *)Malloc(setsz * sizeof(group_type)); while (gap > 0) { for (i = gap + 1; i <= n; i++) { j = i - gap; while (j > 0) { if (*timesseen[j - 1] < *timesseen[j + gap - 1]) { memcpy(stemp, grouping[j - 1], setsz * sizeof(group_type)); memcpy(grouping[j - 1], grouping[j + gap - 1], setsz * sizeof(group_type)); memcpy(grouping[j + gap - 1], stemp, setsz * sizeof(group_type)); rtemp = *timesseen[j - 1]; *timesseen[j - 1] = *timesseen[j + gap - 1]; *timesseen[j + gap - 1] = rtemp; } j -= gap; } } gap /= 2; } free(stemp); } /* sort */ boolean compatible(long i, long j) { /* are groups i and j compatible? */ boolean comp; long k; comp = true; for (k = 0; k < setsz; k++) if ((grouping[i][k] & grouping[j][k]) != 0) comp = false; if (!comp) { comp = true; for (k = 0; k < setsz; k++) if ((grouping[i][k] & ~grouping[j][k]) != 0) comp = false; if (!comp) { comp = true; for (k = 0; k < setsz; k++) if ((grouping[j][k] & ~grouping[i][k]) != 0) comp = false; if (!comp) { comp = noroot; if (comp) { for (k = 0; k < setsz; k++) if ((fullset[k] & ~grouping[i][k] & ~grouping[j][k]) != 0) comp = false; } } } } return comp; } /* compatible */ void eliminate(long *n, long *n2) { /* eliminate groups incompatible with preceding ones */ long i, j, k; boolean comp; for (i = 2; i <= (*n); i++) { comp = true; for (j = 0; comp && (j <= i - 2); j++) { if ((timesseen[j] != NULL) && *timesseen[j] > 0) { comp = compatible(i-1,j); if (!comp) { (*n2)++; times2[(*n2) - 1] = (double *)Malloc(sizeof(double)); group2[(*n2) - 1] = (group_type *)Malloc(setsz * sizeof(group_type)); *times2[(*n2) - 1] = *timesseen[i - 1]; memcpy(group2[(*n2) - 1], grouping[i - 1], setsz * sizeof(group_type)); *timesseen[i - 1] = 0.0; for (k = 0; k < setsz; k++) grouping[i - 1][k] = 0; } } } if (*timesseen[i - 1] == 0.0) { free(grouping[i - 1]); free(timesseen[i - 1]); timesseen[i - 1] = NULL; grouping[i - 1] = NULL; } } } /* eliminate */ void printset(long n) { /* print out the n sets of species */ long i, j, k, size; boolean noneprinted; fprintf(outfile, "\nSet (species in order) "); for (i = 1; i <= spp - 25; i++) putc(' ', outfile); fprintf(outfile, " How many times out of %7.2f\n\n", ntrees); noneprinted = true; for (i = 0; i < n; i++) { if ((timesseen[i] != NULL) && (*timesseen[i] > 0)) { size = 0; k = 0; for (j = 1; j <= spp; j++) { if (j == ((k+1)*SETBITS+1)) k++; if (((1L << (j - 1 - k*SETBITS)) & grouping[i][k]) != 0) size++; } if (size != 1 && !(noroot && size >= (spp-1))) { noneprinted = false; k = 0; for (j = 1; j <= spp; j++) { if (j == ((k+1)*SETBITS+1)) k++; if (((1L << (j - 1 - k*SETBITS)) & grouping[i][k]) != 0) putc('*', outfile); else putc('.', outfile); if (j % 10 == 0) putc(' ', outfile); } for (j = 1; j <= 23 - spp; j++) putc(' ', outfile); fprintf(outfile, " %5.2f\n", *timesseen[i]); } } } if (noneprinted) fprintf(outfile, " NONE\n"); } /* printset */ void bigsubset(group_type *st, long n) { /* Find a maximal subset of st among the n groupings, to be the set at the base of the tree. */ long i, j; group_type *su; boolean max, same; su = (group_type *)Malloc(setsz * sizeof(group_type)); for (i = 0; i < setsz; i++) su[i] = 0; for (i = 0; i < n; i++) { max = true; for (j = 0; j < setsz; j++) if ((grouping[i][j] & ~st[j]) != 0) max = false; if (max) { same = true; for (j = 0; j < setsz; j++) if (grouping[i][j] != st[j]) same = false; max = !same; } if (max) { for (j = 0; j < setsz; j ++) if ((su[j] & ~grouping[i][j]) != 0) max = false; if (max) { same = true; for (j = 0; j < setsz; j ++) if (su[j] != grouping[i][j]) same = false; max = !same; } if (max) memcpy(su, grouping[i], setsz * sizeof(group_type)); } } memcpy(st, su, setsz * sizeof(group_type)); free(su); } /* bigsubset */ void recontraverse(node **p, group_type *st, long n, long *nextnode) { /* traverse to add next node to consensus tree */ long i, j = 0, k = 0, l = 0; boolean found, same = 0, zero, zero2; group_type *tempset, *st2; node *q, *r; for (i = 1; i <= spp; i++) { /* count species in set */ if (i == ((l+1)*SETBITS+1)) l++; if (((1L << (i - 1 - l*SETBITS)) & st[l]) != 0) { k++; /* k is the number of species in the set */ j = i; /* j is set to last species in the set */ } } if (k == 1) { /* if only 1, set up that tip */ *p = nodep[j - 1]; (*p)->tip = true; (*p)->index = j; return; } gnu(&grbg, p); /* otherwise make interior node */ (*p)->tip = false; (*p)->index = *nextnode; nodep[*nextnode - 1] = *p; (*nextnode)++; (*p)->deltav = 0.0; for (i = 0; i < n; i++) { /* go through all sets */ same = true; /* to find one which is this one */ for (j = 0; j < setsz; j++) if (grouping[i][j] != st[j]) same = false; if (same) (*p)->deltav = *timesseen[i]; } tempset = (group_type *)Malloc(setsz * sizeof(group_type)); memcpy(tempset, st, setsz * sizeof(group_type)); q = *p; st2 = (group_type *)Malloc(setsz * sizeof(group_type)); memcpy(st2, st, setsz * sizeof(group_type)); zero = true; /* having made two copies of the set ... */ for (j = 0; j < setsz; j++) /* see if they are empty */ if (tempset[j] != 0) zero = false; if (!zero) bigsubset(tempset, n); /* find biggest set within it */ zero = zero2 = false; /* ... tempset is that subset */ while (!zero && !zero2) { zero = zero2 = true; for (j = 0; j < setsz; j++) { if (st2[j] != 0) zero = false; if (tempset[j] != 0) zero2 = false; } if (!zero && !zero2) { gnu(&grbg, &q->next); q->next->index = q->index; q = q->next; q->tip = false; r = *p; recontraverse(&q->back, tempset, n, nextnode); /* put it on tree */ *p = r; q->back->back = q; for (j = 0; j < setsz; j++) st2[j] &= ~tempset[j]; /* remove that subset from the set */ memcpy(tempset, st2, setsz * sizeof(group_type)); /* that becomes set */ found = false; i = 1; while (!found && i <= n) { if (grouping[i - 1] != 0) { same = true; for (j = 0; j < setsz; j++) if (grouping[i - 1][j] != tempset[j]) same = false; } if ((grouping[i - 1] != 0) && same) found = true; else i++; } zero = true; for (j = 0; j < setsz; j++) if (tempset[j] != 0) zero = false; if (!zero && !found) bigsubset(tempset, n); } } q->next = *p; free(tempset); free(st2); } /* recontraverse */ void reconstruct(long n) { /* reconstruct tree from the subsets */ long nextnode; group_type *s; nextnode = spp + 1; s = (group_type *)Malloc(setsz * sizeof(group_type)); memcpy(s, fullset, setsz * sizeof(group_type)); recontraverse(&root, s, n, &nextnode); free(s); } /* reconstruct */ void coordinates(node *p, long *tipy) { /* establishes coordinates of nodes */ node *q, *first, *last; long maxx; if (p->tip) { p->xcoord = 0; p->ycoord = *tipy; p->ymin = *tipy; p->ymax = *tipy; (*tipy) += down; return; } q = p->next; maxx = 0; while (q != p) { coordinates(q->back, tipy); if (!q->back->tip) { if (q->back->xcoord > maxx) maxx = q->back->xcoord; } q = q->next; } first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = maxx + OVER; p->ycoord = (long)((first->ycoord + last->ycoord) / 2); p->ymin = first->ymin; p->ymax = last->ymax; } /* coordinates */ void drawline(long i) { /* draws one row of the tree diagram by moving up tree */ node *p, *q; long n, j; boolean extra, done, trif; node *r, *first = NULL, *last = NULL; boolean found; p = root; q = root; fprintf(outfile, " "); extra = false; trif = false; do { if (!p->tip) { found = false; r = p->next; while (r != p && !found) { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; found = true; } else r = r->next; } first = p->next->back; r = p; while (r->next != p) r = r->next; last = r->back; } done = (p->tip || p == q); n = p->xcoord - q->xcoord; if (extra) { n--; extra = false; } if (q->ycoord == i && !done) { if (trif) putc('-', outfile); else putc('+', outfile); trif = false; if (!q->tip) { for (j = 1; j <= n - 8; j++) putc('-', outfile); if (noroot && (root->next->next->next == root) && (((root->next->back == q) && root->next->next->back->tip) || ((root->next->next->back == q) && root->next->back->tip))) fprintf(outfile, "-------|"); else { if (!strict) { /* write number of times seen */ if (q->deltav >= 10000) fprintf(outfile, "-%5.0f-|", (double)q->deltav); else if (q->deltav >= 1000) fprintf(outfile, "--%4.0f-|", (double)q->deltav); else if (q->deltav >= 100) fprintf(outfile, "-%5.1f-|", (double)q->deltav); else if (q->deltav >= 10) fprintf(outfile, "--%4.1f-|", (double)q->deltav); else fprintf(outfile, "--%4.2f-|", (double)q->deltav); } else fprintf(outfile, "-------|"); } extra = true; trif = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip && last->ycoord > i && first->ycoord < i && (i != p->ycoord || p == root)) { putc('|', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); if (trif) trif = false; } if (q != p) p = q; } while (!done); if (p->ycoord == i && p->tip) { for (j = 0; (j < MAXNCH) && (p->nayme[j] != '\0'); j++) putc(p->nayme[j], outfile); } putc('\n', outfile); } /* drawline */ void printree() { /* prints out diagram of the tree */ long i; long tipy; if (treeprint) { fprintf(outfile, "\nCONSENSUS TREE:\n"); if (mr || mre || ml) { if (noroot) { fprintf(outfile, "the numbers on the branches indicate the number\n"); fprintf(outfile, "of times the partition of the species into the two sets\n"); fprintf(outfile, "which are separated by that branch occurred\n"); } else { fprintf(outfile, "the numbers forks indicate the number\n"); fprintf(outfile, "of times the group consisting of the species\n"); fprintf(outfile, "which are to the right of that fork occurred\n"); } fprintf(outfile, "among the trees, out of %6.2f trees\n", ntrees); if (ntrees <= 1.001) fprintf(outfile, "(trees had fractional weights)\n"); } tipy = 1; coordinates(root, &tipy); putc('\n', outfile); for (i = 1; i <= tipy - down; i++) drawline(i); putc('\n', outfile); } if (noroot) { fprintf(outfile, "\n remember:"); if (didreroot) fprintf(outfile, " (though rerooted by outgroup)"); fprintf(outfile, " this is an unrooted tree!\n"); } putc('\n', outfile); } /* printree */ void enterpartition (group_type *s1, long *n) { /* try to put this partition in list of partitions. If implied by others, don't bother. If others implied by it, replace them. If this one vacuous because only one element in s1, forget it */ long i, j; boolean found; /* this stuff all to be rewritten but left here so pieces can be used */ found = false; for (i = 0; i < (*n); i++) { /* go through looking whether it is there */ found = true; for (j = 0; j < setsz; j++) { /* check both parts of partition */ found = found && (grouping[i][j] == s1[j]); found = found && (group2[i][j] == (fullset[j] & (~s1[j]))); } if (found) break; } if (!found) { /* if not, add it to the slot after the end, which must be empty */ grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type)); timesseen[i] = (double *)Malloc(sizeof(double)); group2[i] = (group_type *)Malloc(setsz * sizeof(group_type)); for (j = 0; j < setsz; j++) grouping[i][j] = s1[j]; *timesseen[i] = 1; (*n)++; } } /* enterpartition */ void elimboth(long n) { /* for Adams case: eliminate pairs of groups incompatible with each other */ long i, j; boolean comp; for (i = 0; i < n-1; i++) { for (j = i+1; j < n; j++) { comp = compatible(i,j); if (!comp) { *timesseen[i] = 0.0; *timesseen[j] = 0.0; } } if (*timesseen[i] == 0.0) { free(grouping[i]); free(timesseen[i]); timesseen[i] = NULL; grouping[i] = NULL; } } if (*timesseen[n-1] == 0.0) { free(grouping[n-1]); free(timesseen[n-1]); timesseen[n-1] = NULL; grouping[n-1] = NULL; } } /* elimboth */ void consensus(pattern_elm ***pattern_array, long trees_in) { long i, n, n2, tipy; group2 = (group_type **) Malloc(maxgrp*sizeof(group_type *)); for (i = 0; i < maxgrp; i++) group2[i] = NULL; times2 = (double **)Malloc(maxgrp*sizeof(double *)); for (i = 0; i < maxgrp; i++) times2[i] = NULL; n2 = 0; censor(); /* drop groups that are too rare */ phylipcompress(&n); /* push everybody to front of array */ if (!strict) { /* drop those incompatible, if any */ sort(n); eliminate(&n, &n2); phylipcompress(&n); } reconstruct(n); tipy = 1; coordinates(root, &tipy); if (prntsets) { fprintf(outfile, "\nSets included in the consensus tree\n"); printset(n); for (i = 0; i < n2; i++) { if (!grouping[i]) { grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type)); timesseen[i] = (double *)Malloc(sizeof(double)); } memcpy(grouping[i], group2[i], setsz * sizeof(group_type)); *timesseen[i] = *times2[i]; } n = n2; fprintf(outfile, "\n\nSets NOT included in consensus tree:"); if (n2 == 0) fprintf(outfile, " NONE\n"); else { putc('\n', outfile); printset(n); } } putc('\n', outfile); if (strict) fprintf(outfile, "\nStrict consensus tree\n"); if (mre) fprintf(outfile, "\nExtended majority rule consensus tree\n"); if (ml) { fprintf(outfile, "\nM consensus tree (l = %4.2f)\n", mlfrac); fprintf(outfile, " l\n"); } if (mr) fprintf(outfile, "\nMajority rule consensus tree\n"); printree(); free(nayme); for (i = 0; i < maxgrp; i++) free(grouping[i]); free(grouping); for (i = 0; i < maxgrp; i++) free(order[i]); free(order); for (i = 0; i < maxgrp; i++) if (timesseen[i] != NULL) free(timesseen[i]); free(timesseen); } /* consensus */ void rehash() { group_type *s; long i, j; double temp, ss, smult; boolean done; long old_maxgrp = maxgrp; long new_maxgrp = maxgrp*2; tmseen2 = (double **)Malloc(new_maxgrp*sizeof(double *)); grping2 = (group_type **)Malloc(new_maxgrp*sizeof(group_type *)); order2 = (long **)Malloc(new_maxgrp*sizeof(long *)); lengths2 = (double *)Malloc(new_maxgrp*sizeof(double)); tchange2 = (double *)Malloc(new_maxgrp*sizeof(double)); for (i = 0; i < new_maxgrp; i++) { tmseen2[i] = NULL; grping2[i] = NULL; order2[i] = NULL; lengths2[i] = 0.0; tchange2[i] = 0.0; } smult = (sqrt(5.0) - 1) / 2; s = (group_type *)Malloc(setsz * sizeof(group_type)); for (i = 0; i < old_maxgrp; i++) { long old_index = *order[i]; long new_index = -1; memcpy(s, grouping[old_index], setsz * sizeof(group_type)); ss = 0.0; for (j = 0; j < setsz; j++) ss += s[j] /* pow(2, SETBITS*j)*/; temp = ss * smult; new_index = (long)(new_maxgrp * (temp - floor(temp))); done = false; while (!done) { if (!grping2[new_index]) { grping2[new_index] = (group_type *)Malloc(setsz * sizeof(group_type)); memcpy(grping2[new_index], grouping[old_index], setsz * sizeof(group_type)); *order2[i] = new_index; tmseen2[new_index] = (double *)Malloc(sizeof(double)); *tmseen2[new_index] = *timesseen[old_index]; lengths2[new_index] = lengths[old_index]; tchange2[new_index] = timesseen_changes[old_index]; free(grouping[old_index]); free(timesseen[old_index]); free(order[i]); grouping[old_index] = NULL; timesseen[old_index] = NULL; order[i] = NULL; done = true; /* successfully found place for this item */ } else { new_index++; if (new_index >= new_maxgrp) new_index -= new_maxgrp; } } } free(lengths); free(timesseen); free(grouping); free(order); free(timesseen_changes); free(s); timesseen = tmseen2; grouping = grping2; lengths = lengths2; order = order2; timesseen_changes = tchange2; maxgrp = new_maxgrp; } /* rehash */ void enternodeset(node* r) { /* enter a set of species into the hash table */ long i, j, start; double ss, n; boolean done, same; double times ; group_type *s; s = r->nodeset; /* do not enter full sets */ same = true; for (i = 0; i < setsz; i++) if (s[i] != fullset[i]) same = false; if (same) return; times = trweight; ss = 0.0; /* compute the hashcode for the set */ n = ((sqrt(5.0) - 1.0) / 2.0); /* use an irrational multiplier */ for (i = 0; i < setsz; i++) ss += s[i] * n; i = (long)(maxgrp * (ss - floor(ss))) + 1; /* use fractional part of code */ start = i; done = false; /* go through seeing if it is there */ while (!done) { if (grouping[i - 1]) { /* ... i.e. if group is absent, or */ same = false; /* (will be false if timesseen = 0) */ if (!(timesseen[i-1] == 0)) { /* ... if timesseen = 0 */ same = true; for (j = 0; j < setsz; j++) { if (s[j] != grouping[i - 1][j]) same = false; } } } if (grouping[i - 1] && same) { /* if it is there, increment timesseen */ *timesseen[i - 1] += times; lengths[i - 1] = nodep[r->index - 1]->v; done = true; } else if (!grouping[i - 1]) { /* if not there and slot empty ... */ grouping[i - 1] = (group_type *)Malloc(setsz * sizeof(group_type)); lasti++; order[lasti] = (long *)Malloc(sizeof(long)); timesseen[i - 1] = (double *)Malloc(sizeof(double)); memcpy(grouping[i - 1], s, setsz * sizeof(group_type)); *timesseen[i - 1] = times; *order[lasti] = i - 1; done = true; lengths[i - 1] = nodep[r->index -1]->v; } else { /* otherwise look to put it in next slot ... */ i++; if (i > maxgrp) i -= maxgrp; } if (!done && i == start) { /* if no place to put it, expand hash table */ rehash(); done = true; enternodeset(r); /* calls this procedure again, but now there should be space */ } } } /* enternodeset */ /* recursively crawls through tree, setting nodeset values to be the * bitwise OR of bits from downstream nodes */ void accumulate(node *r) { node *q; long i; /* zero out nodeset values. since we are re-using tree nodes, * the malloc only happens the first time we encounter a node. */ if (!r->nodeset) { r->nodeset = (group_type *)Malloc(setsz * sizeof(group_type)); } for (i = 0; i < setsz; i++) { r->nodeset[i] = 0L; } if (r->tip) { /* tip nodes should have a single bit set corresponding to index-1 */ i = (r->index-1) / (long)SETBITS; r->nodeset[i] = 1L << (r->index - 1 - i*SETBITS); } else { /* for loop should not visit r->back -- we've likely come from there */ for (q = r->next; q != r; q = q->next) { /* recursive call to this function */ accumulate(q->back); /* bitwise OR of bits from downstream nodes */ for (i = 0; i < setsz; i++) r->nodeset[i] |= q->back->nodeset[i]; } } if ((!r->tip && (r->next->next != r)) || r->tip) enternodeset(r); } /* accumulate */ void dupname2(Char *name, node *p, node *this) { /* search for a duplicate name recursively */ node *q; if (p->tip) { if (p != this) { if (namesSearch(p->nayme)) { printf("\n\nERROR in user tree: duplicate name found: "); puts(p->nayme); printf("\n\n"); exxit(-1); } else { namesAdd(p->nayme); } } } else { q = p; while (p->next != q) { dupname2(name, p->next->back, this); p = p->next; } } } /* dupname2 */ void dupname(node *p) { /* Recursively searches tree, starting at p, to verify that * each tip name occurs only once. When called with root as * its argument, at final recusive exit, all tip names should * be in the hash "hashp". */ node *q; if (p->tip) { if (namesSearch(p->nayme)) { printf("\n\nERROR in user tree: duplicate name found: "); puts(p->nayme); printf("\n\n"); exxit(-1); } else { namesAdd(p->nayme); } } else { q = p; while (p->next != q) { dupname(p->next->back); p = p->next; } } } /* dupname */ void missingnameRecurs(node *p) { /* search for missing names in first tree */ node *q; if (p->tip) { if (!namesSearch(p->nayme)) { printf("\n\nERROR in user tree: name %s not found in first tree\n\n\n", p->nayme); exxit(-1); } } else { q = p; while (p->next != q) { missingnameRecurs(p->next->back); p = p->next; } } } /* missingnameRecurs */ /** * wrapper for recursive missingname function */ void missingname(node *p){ missingnameRecurs(p); namesCheckTable(); } /* missingname */ void gdispose(node *p) { /* go through tree throwing away nodes */ node *q, *r; if (p->tip) { chuck(&grbg, p); return; } q = p->next; while (q != p) { gdispose(q->back); r = q; q = q->next; chuck(&grbg, r); } chuck(&grbg, p); } /* gdispose */ void initreenode(node *p) { /* traverse tree and assign species names to tip nodes */ node *q; if (p->tip) { memcpy(nayme[p->index - 1], p->nayme, MAXNCH); } else { q = p->next; while (q && q != p) { initreenode(q->back); q = q->next; } } } /* initreenode */ void reroot(node *outgroup, long *nextnode) { /* reorients and reorients tree, placing root at outgroup */ long i; node *p, *q; double newv; /* count root's children & find last */ p = root; i = 0; while (p->next != root) { p = p->next; i++; } if (i == 2) { /* 2 children: */ q = root->next; newv = q->back->v + p->back->v; /* if outgroup is already here, just move * its length to the other branch and finish */ if (outgroup == p->back) { /* flip branch order at root so that outgroup * is first, just to be consistent */ root->next = p; p->next = q; q->next = root; q->back->v = newv; p->back->v = 0; return; } if (outgroup == q) { p->back->v = newv; q->back->v = 0; return; } /* detach root by linking child nodes */ q->back->back = p->back; p->back->back = q->back; p->back->v = newv; q->back->v = newv; } else { /* 3+ children */ p->next = root->next; /* join old root nodes */ nodep[root->index-1] = root->next; /* make root->next the primary node */ /* create new root nodes */ gnu(&grbg, &root->next); q = root->next; gnu(&grbg, &q->next); p = q->next; p->next = root; q->tip = false; p->tip = false; nodep[*nextnode] = root; (*nextnode)++; root->index = *nextnode; root->next->index = root->index; root->next->next->index = root->index; } newv = outgroup->v; /* root is 3 "floating" nodes */ /* q == root->next */ /* p == root->next->next */ /* attach root at outgroup */ q->back = outgroup; p->back = outgroup->back; outgroup->back->back = p; outgroup->back = q; outgroup->v = 0; outgroup->back->v = 0; root->v = 0; p->v = newv; p->back->v = newv; reorient(root); } /* reroot */ void reorient(node* n) { node* p; if ( n->tip ) return; if ( nodep[n->index - 1] != n ) { nodep[n->index - 1] = n; if ( n->back ) n->v = n->back->v; } for ( p = n->next ; p != n ; p = p->next) reorient(p->back); } void store_pattern (pattern_elm ***pattern_array, int trees_in_file) { /* put a tree's groups into a pattern array. Don't forget that when not Adams, grouping[] is not compressed. . . */ long i, total_groups=0, j=0, k; /* First, find out how many groups exist in the given tree. */ for (i = 0 ; i < maxgrp ; i++) if ((grouping[i] != NULL) && (*timesseen[i] > timesseen_changes[i])) /* If this is group exists and is present in the current tree, */ total_groups++ ; /* Then allocate a space to store the bit patterns. . . */ for (i = 0 ; i < setsz ; i++) { pattern_array[i][trees_in_file] = (pattern_elm *) Malloc(sizeof(pattern_elm)) ; pattern_array[i][trees_in_file]->apattern = (group_type *) Malloc (total_groups * sizeof (group_type)) ; pattern_array[i][trees_in_file]->length = (double *) Malloc (maxgrp * sizeof (double)) ; for ( j = 0 ; j < maxgrp ; j++ ) { pattern_array[i][trees_in_file]->length[j] = -1; } pattern_array[i][trees_in_file]->patternsize = (long *)Malloc(sizeof(long)); } j = 0; /* Then go through groupings again, and copy in each element appropriately. */ for (i = 0 ; i < maxgrp ; i++) if (grouping[i] != NULL) { if (*timesseen[i] > timesseen_changes[i]) { for (k = 0 ; k < setsz ; k++) pattern_array[k][trees_in_file]->apattern[j] = grouping[i][k] ; pattern_array[0][trees_in_file]->length[j] = lengths[i]; j++ ; timesseen_changes[i] = *timesseen[i] ; /* EWFIX.BUG.756 updates timesseen_changes to the current value pointed to by timesseen treedist uses this to determine if group i has been seen by comparing timesseen_changes[i] (the count now) with timesseen[i] (the count after reading next tree) We could make treedist more efficient by not keeping timesseen (and groupings, etc) around, but doing it this way allows us to share code between treedist and consense. */ } } *pattern_array[0][trees_in_file]->patternsize = total_groups; } /* store_pattern */ boolean samename(naym name1, plotstring name2) { return !(strncmp(name1, name2, MAXNCH)); } /* samename */ void reordertips() { /* Reorders nodep[] and indexing to match species order from first tree */ /* Assumes tree has spp tips and nayme[] has spp elements, and that there is a * one-to-one mapping between tip names and the names in nayme[]. */ long i, j; node *t; for (i = 0; i < spp-1; i++) { for (j = i + 1; j < spp; j++) { if (samename(nayme[i], nodep[j]->nayme)) { /* switch the pointers in * nodep[] and set index accordingly for each node. */ t = nodep[i]; nodep[i] = nodep[j]; nodep[i]->index = i+1; nodep[j] = t; nodep[j]->index = j+1; break; /* next i */ } } } } /* reordertips */ void read_groups (pattern_elm ****pattern_array, long total_trees, long tip_count, AjPPhyloTree* treesource) { /* read the trees. Accumulate sets. */ int i, j, k; boolean haslengths, initial; long nextnode, trees_read = 0; int itree=0; char *treestr; /* do allocation first *****************************************/ grouping = (group_type **) Malloc(maxgrp*sizeof(group_type *)); lengths = (double *) Malloc(maxgrp*sizeof(double)); timesseen_changes = (double*)Malloc(maxgrp*sizeof(double)); for (i = 0; i < maxgrp; i++) timesseen_changes[i] = 0.0; for (i = 0; i < maxgrp; i++) grouping[i] = NULL; order = (long **) Malloc(maxgrp*sizeof(long *)); for (i = 0; i < maxgrp; i++) order[i] = NULL; timesseen = (double **)Malloc(maxgrp*sizeof(double *)); for (i = 0; i < maxgrp; i++) timesseen[i] = NULL; nayme = (naym *)Malloc(tip_count*sizeof(naym)); hashp = (hashtype)Malloc(sizeof(namenode) * NUM_BUCKETS); for (i=0;iTree); allocate_nodep(&nodep, treestr, &spp); assert(spp == tip_count); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initconsnode,true,-1); if (!initial) { missingname(root); reordertips(); } else { initial = false; dupname(root); initreenode(root); } if (goteof) continue; ntrees += trweight; if (noroot) { reroot(nodep[outgrno - 1], &nextnode); didreroot = outgropt; } accumulate(root); gdispose(root); for (j = 0; j < 2*(1+spp); j++) nodep[j] = NULL; free(nodep); /* Added by Dan F. */ if (tree_pairing != NO_PAIRING) { /* If we're computing pairing or need separate tree sets, store the current pattern as an element of it's trees array. */ store_pattern ((*pattern_array), trees_read) ; trees_read++ ; } } freegrbg(&grbg); } /* read_groups */ void clean_up_final(void) { long i; for(i=0;inext; if(p->nodeset) free(p->nodeset); free(p); } } /*freegrbg */ PHYLIPNEW-3.69.650/src/wagner.c0000664000175000017500000003320010775447512012516 00000000000000 #include "phylip.h" #include "disc.h" #include "wagner.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ void inputmixturestr(AjPStr str, bitptr wagner0) { /* input mixture of methods */ /* used in mix, move, & penny */ long i, j, k; Char ch; boolean wag; for (i = 0; i < (words); i++) wagner0[i] = 0; j = 0; k = 1; for (i = 1; i <= (chars); i++) { ch = ajStrGetCharPos(str, i-1); uppercase(&ch); wag = false; if (ch == 'W' || ch == '?') wag = true; else if (ch == 'S' || ch == 'C') wag = false; else { printf("BAD METHOD: %c\n", ch); exxit(-1); } j++; if (j > bits) { j = 1; k++; } if (wag) wagner0[k - 1] = (long)wagner0[k - 1] | (1L << j); } } /* inputmixturestr */ void printmixture(FILE *filename, bitptr wagner) { /* print out list of parsimony methods */ /* used in mix, move, & penny */ long i, k, l; fprintf(filename, "Parsimony methods:\n"); l = 0; k = 1; for (i = 1; i <= nmlngth + 3; i++) putc(' ', filename); for (i = 1; i <= (chars); i++) { newline(filename, i, 55, nmlngth + 3); l++; if (l > bits) { l = 1; k++; } if (((1L << l) & wagner[k - 1]) != 0) putc('W', filename); else putc('S', filename); if (i % 5 == 0) putc(' ', filename); } fprintf(filename, "\n\n"); } /* printmixture */ void fillin(node2 *p,long fullset,boolean full,bitptr wagner,bitptr zeroanc) { /* Sets up for each node in the tree two statesets. stateone and statezero are the sets of character states that must be 1 or must be 0, respectively, in a most parsimonious reconstruction, based on the information at or above this node. Note that this state assignment may change based on information further down the tree. If a character is in both sets it is in state "P". If in neither, it is "?". */ long i; long l0, l1, r0, r1, st, wa, za; for (i = 0; i < (words); i++) { if (full) { l0 = p->next->back->fulstte0[i]; l1 = p->next->back->fulstte1[i]; r0 = p->next->next->back->fulstte0[i]; r1 = p->next->next->back->fulstte1[i]; } else { l0 = p->next->back->empstte0[i]; l1 = p->next->back->empstte1[i]; r0 = p->next->next->back->empstte0[i]; r1 = p->next->next->back->empstte1[i]; } st = (l1 & r0) | (l0 & r1); wa = wagner[i]; za = zeroanc[i]; if (full) { p->fulstte1[i] = (l1 | r1) & (~(st & (wa | za))); p->fulstte0[i] = (l0 | r0) & (~(st & (wa | (fullset & (~za))))); p->fulsteps[i] = st; } else { p->empstte1[i] = (l1 | r1) & (~(st & (wa | za))); p->empstte0[i] = (l0 | r0) & (~(st & (wa | (fullset & (~za))))); p->empsteps[i] = st; } } } /* fillin */ void count(long *stps, bitptr zeroanc, steptr numszero, steptr numsone) { /* counts the number of steps in a fork of the tree. The program spends much of its time in this PROCEDURE */ /* used in mix & penny */ long i, j, l; j = 1; l = 0; for (i = 0; i < (chars); i++) { l++; if (l > bits) { l = 1; j++; } if (((1L << l) & stps[j - 1]) != 0) { if (((1L << l) & zeroanc[j - 1]) != 0) numszero[i] += weight[i]; else numsone[i] += weight[i]; } } } /* count */ void postorder(node2 *p, long fullset, boolean full, bitptr wagner, bitptr zeroanc) { /* traverses a binary tree, calling PROCEDURE fillin at a node's descendants before calling fillin at the node2 */ /* used in mix & penny */ if (p->tip) return; postorder(p->next->back, fullset, full, wagner, zeroanc); postorder(p->next->next->back, fullset, full, wagner, zeroanc); if (!p->visited) { fillin(p, fullset, full, wagner, zeroanc); if (!full) p->visited = true; } } /* postorder */ void cpostorder(node2 *p, boolean full, bitptr zeroanc, steptr numszero, steptr numsone) { /* traverses a binary tree, calling PROCEDURE count at a node's descendants before calling count at the node2 */ /* used in mix & penny */ if (p->tip) return; cpostorder(p->next->back, full, zeroanc, numszero, numsone); cpostorder(p->next->next->back, full, zeroanc, numszero, numsone); if (full) count(p->fulsteps, zeroanc, numszero, numsone); else count(p->empsteps, zeroanc, numszero, numsone); } /* cpostorder */ void filltrav(node2 *r, long fullset, boolean full, bitptr wagner, bitptr zeroanc) { /* traverse to fill in interior node states */ if (r->tip) return; filltrav(r->next->back, fullset, full, wagner, zeroanc); filltrav(r->next->next->back, fullset, full, wagner, zeroanc); fillin(r, fullset, full, wagner, zeroanc); } /* filltrav */ void hyprint(struct htrav_vars2 *htrav, boolean unknown, boolean noroot, boolean didreroot, bitptr wagner, Char *guess) { /* print out states at node2 */ long i, j, k; char l; boolean dot, a0, a1, s0, s1; if (htrav->bottom) { if (noroot && !didreroot) fprintf(outfile, " "); else fprintf(outfile, "root "); } else fprintf(outfile, "%3ld ", htrav->r->back->index - spp); if (htrav->r->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[htrav->r->index - 1][i], outfile); } else fprintf(outfile, "%4ld ", htrav->r->index - spp); if (htrav->bottom && noroot && !didreroot) fprintf(outfile, " "); else if (htrav->nonzero) fprintf(outfile, " yes "); else if (unknown) fprintf(outfile, " ? "); else if (htrav->maybe) fprintf(outfile, " maybe "); else fprintf(outfile, " no "); for (j = 1; j <= (chars); j++) { newline(outfile, j, 40, nmlngth + 17); k = (j - 1) / bits + 1; l = (j - 1) % bits + 1; dot = (((1L << l) & wagner[k - 1]) == 0 && guess[j - 1] == '?'); s0 = (((1L << l) & htrav->r->empstte0[k - 1]) != 0); s1 = (((1L << l) & htrav->r->empstte1[k - 1]) != 0); a0 = (((1L << l) & htrav->zerobelow->bits_[k - 1]) != 0); a1 = (((1L << l) & htrav->onebelow->bits_[k - 1]) != 0); dot = (dot || ((!htrav->bottom || !noroot || didreroot) && a1 == s1 && a0 == s0)); if (dot) putc('.', outfile); else { if (s0) putc('0', outfile); else if (s1) putc('1', outfile); else putc('?', outfile); } if (j % 5 == 0) putc(' ', outfile); } putc('\n', outfile); } /* hyprint */ void hyptrav(node2 *r_, boolean unknown, bitptr dohyp, long fullset, boolean noroot, boolean didreroot, bitptr wagner, bitptr zeroanc, bitptr oneanc, pointptr2 treenode, Char *guess, gbit *garbage) { /* compute, print out states at one interior node2 */ /* used in mix & penny */ struct htrav_vars2 vars; long i; long l0, l1, r0, r1, s0, s1, a0, a1, temp, dh, wa; vars.r = r_; disc_gnu(&vars.zerobelow, &garbage); disc_gnu(&vars.onebelow, &garbage); vars.bottom = (vars.r->back == NULL); vars.maybe = false; vars.nonzero = false; if (vars.bottom) { memcpy(vars.zerobelow->bits_, zeroanc, words*sizeof(long)); memcpy(vars.onebelow->bits_, oneanc, words*sizeof(long)); } else { memcpy(vars.zerobelow->bits_, treenode[vars.r->back->index - 1]->empstte0, words*sizeof(long)); memcpy(vars.onebelow->bits_, treenode[vars.r->back->index - 1]->empstte1, words*sizeof(long)); } for (i = 0; i < (words); i++) { dh = dohyp[i]; s0 = vars.r->empstte0[i]; s1 = vars.r->empstte1[i]; a0 = vars.zerobelow->bits_[i]; a1 = vars.onebelow->bits_[i]; if (!vars.r->tip) { wa = wagner[i]; l0 = vars.r->next->back->empstte0[i]; l1 = vars.r->next->back->empstte1[i]; r0 = vars.r->next->next->back->empstte0[i]; r1 = vars.r->next->next->back->empstte1[i]; s0 = (wa & ((a0 & l0) | (a0 & r0) | (l0 & r0))) | (dh & fullset & (~wa) & s0); s1 = (wa & ((a1 & l1) | (a1 & r1) | (l1 & r1))) | (dh & fullset & (~wa) & s1); temp = fullset & (~(s0 | s1 | l1 | l0 | r1 | r0)); s0 |= temp & a0; s1 |= temp & a1; vars.r->empstte0[i] = s0; vars.r->empstte1[i] = s1; } vars.maybe = (vars.maybe || (dh & (s0 | s1)) != (a0 | a1)); vars.nonzero = (vars.nonzero || ((s1 & a0) | (s0 & a1)) != 0); } hyprint(&vars,unknown, noroot, didreroot, wagner, guess); if (!vars.r->tip) { hyptrav(vars.r->next->back,unknown,dohyp, fullset, noroot,didreroot, wagner, zeroanc, oneanc, treenode, guess, garbage); hyptrav(vars.r->next->next->back, unknown,dohyp, fullset, noroot, didreroot, wagner, zeroanc, oneanc, treenode, guess, garbage); } disc_chuck(vars.zerobelow, &garbage); disc_chuck(vars.onebelow, &garbage); } /* hyptrav */ void hypstates(long fullset, boolean full, boolean noroot, boolean didreroot, node2 *root, bitptr wagner, bitptr zeroanc, bitptr oneanc, pointptr2 treenode, Char *guess, gbit *garbage) { /* fill in and describe states at interior nodes */ /* used in mix & penny */ boolean unknown; bitptr dohyp; long i, j, k; for (i = 0; i < (words); i++) { zeroanc[i] = 0; oneanc[i] = 0; } unknown = false; for (i = 0; i < (chars); i++) { j = i / bits + 1; k = i % bits + 1; if (guess[i] == '0') zeroanc[j - 1] = ((long)zeroanc[j - 1]) | (1L << k); if (guess[i] == '1') oneanc[j - 1] = ((long)oneanc[j - 1]) | (1L << k); unknown = (unknown || ((((1L << k) & wagner[j - 1]) == 0) && guess[i] == '?')); } dohyp = (bitptr)Malloc(words*sizeof(long)); for (i = 0; i < (words); i++) dohyp[i] = wagner[i] | zeroanc[i] | oneanc[i]; filltrav(root, fullset, full, wagner, zeroanc); fprintf(outfile, "From To Any Steps? "); fprintf(outfile, "State at upper node\n"); fprintf(outfile, " "); fprintf(outfile, "( . means same as in the node below it on tree)\n\n"); hyptrav(root,unknown,dohyp, fullset, noroot, didreroot, wagner, zeroanc, oneanc, treenode, guess, garbage); free(dohyp); } /* hypstates */ void drawline(long i, double scale, node2 *root) { /* draws one row of the tree diagram by moving up tree */ node2 *p, *q, *r, *first =NULL, *last =NULL; long n, j; boolean extra, done; p = root; q = root; extra = false; if (i == p->ycoord && p == root) { if (p->index - spp >= 10) fprintf(outfile, "-%2ld", p->index - spp); else fprintf(outfile, "--%ld", p->index - spp); extra = true; } else fprintf(outfile, " "); do { if (!p->tip) { r = p->next; done = false; do { if (i >= r->back->ymin && i <= r->back->ymax) { q = r->back; done = true; } r = r->next; } while (!(done || r == p)); first = p->next->back; r = p->next; while (r->next != p) r = r->next; last = r->back; } done = (p == q); n = (long)(scale * (p->xcoord - q->xcoord) + 0.5); if (n < 3 && !q->tip) n = 3; if (extra) { n--; extra = false; } if (q->ycoord == i && !done) { putc('+', outfile); if (!q->tip) { for (j = 1; j <= n - 2; j++) putc('-', outfile); if (q->index - spp >= 10) fprintf(outfile, "%2ld", q->index - spp); else fprintf(outfile, "-%ld", q->index - spp); extra = true; } else { for (j = 1; j < n; j++) putc('-', outfile); } } else if (!p->tip) { if (last->ycoord > i && first->ycoord < i && i != p->ycoord) { putc('!', outfile); for (j = 1; j < n; j++) putc(' ', outfile); } else { for (j = 1; j <= n; j++) putc(' ', outfile); } } else { for (j = 1; j <= n; j++) putc(' ', outfile); } if (p != q) p = q; } while (!done); if (p->ycoord == i && p->tip) { for (j = 0; j < nmlngth; j++) putc(nayme[p->index - 1][j], outfile); } putc('\n', outfile); } /* drawline */ void printree(boolean treeprint,boolean noroot,boolean didreroot,node2 *root) { /* prints out diagram of the tree */ /* used in mix & penny */ long tipy, i; double scale; putc('\n', outfile); if (!treeprint) return; putc('\n', outfile); tipy = 1; coordinates2(root, &tipy); scale = 1.5; putc('\n', outfile); for (i = 1; i <= (tipy - down); i++) drawline(i, scale, root); if (noroot) { fprintf(outfile, "\n remember:"); if (didreroot) fprintf(outfile, " (although rooted by outgroup)"); fprintf(outfile, " this is an unrooted tree!\n"); } putc('\n', outfile); } /* printree */ void writesteps(boolean weights, steptr numsteps) { /* write number of steps */ /* used in mix & penny */ long i, j, k; if (weights) fprintf(outfile, "weighted "); fprintf(outfile, "steps in each character:\n"); fprintf(outfile, " "); for (i = 0; i <= 9; i++) fprintf(outfile, "%4ld", i); fprintf(outfile, "\n *-----------------------------------------\n"); for (i = 0; i <= (chars / 10); i++) { fprintf(outfile, "%5ld", i * 10); putc('!', outfile); for (j = 0; j <= 9; j++) { k = i * 10 + j; if (k == 0 || k > chars) fprintf(outfile, " "); else fprintf(outfile, "%4ld", numsteps[k - 1] + extras[k - 1]); } putc('\n', outfile); } putc('\n', outfile); } /* writesteps */ PHYLIPNEW-3.69.650/src/proml.c0000664000175000017500000027570011616234204012364 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Lucas Mix, Akiko Fuseki, Sean Lamont, Andrew Keeffe, Dan Fineman, and Patrick Colacurcio. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef long vall[maxcategs]; typedef double contribarr[maxcategs]; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ void init_protmats(void); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void initmemrates(void); void makeprotfreqs(void); void allocrest(void); void doinit(void); void inputoptions(void); void input_protdata(AjPSeqset, long); void makeweights(void); void prot_makevalues(long, pointarray, long, long, sequence, steptr); void prot_inittable(void); void alloc_pmatrix(long); void getinput(void); void inittravtree(node *); void prot_nuview(node *); void prot_slopecurv(node *, double, double *, double *, double *); void makenewv(node *); void update(node *); void smooth(node *); void make_pmatrix(double **, double **, double **, long, double, double, double *, double **); double prot_evaluate(node *, boolean); void treevaluate(void); void promlcopy(tree *, tree *, long, long); void proml_re_move(node **, node **); void insert_(node *, node *, boolean); void addtraverse(node *, node *, boolean); void rearrange(node *, node *); void proml_coordinates(node *, double, long *, double *); void proml_printree(void); void sigma(node *, double *, double *, double *); void describe(node *); void prot_reconstr(node *, long); void rectrav(node *, long, long); void summarize(void); void initpromlnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void dnaml_treeout(node *); void buildnewtip(long, tree *); void buildsimpletree(tree *); void free_all_protx (long, pointarray); void maketree(void); void clean_up(void); void globrearrange(void); void proml_unroot(node* root, node** nodep, long nonodes) ; void reallocsites(void); void prot_freetable(void); void free_pmatrix(long sib); void alloclrsaves(void); void freelrsaves(void); void resetlrsaves(void); /* function prototypes */ #endif extern sequence y; long rcategs; boolean haslengths; long oldendsite=0; Char infilename[100], intreename[100], catfilename[100], weightfilename[100]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; double *rate, *rrate, *probcat; long nonodes2, sites, weightsum, categs, datasets, ith, njumble, jumb; long inseed, inseed0, parens; boolean global, jumble, weights, trout, usertree, inserting = false, ctgry, rctgry, auto_, hypstate, progress, mulsets, justwts, firstset, improve, smoothit, polishing, lngths, gama, invar, usepmb, usepam, usejtt; tree curtree, bestree, bestree2, priortree; node *qwhere, *grbg, *addwhere; double cv, alpha, lambda, invarfrac, bestyet; long *enterorder; steptr aliasweight; contribarr *contribution, like, nulike, clai; double **term, **slopeterm, **curveterm; longer seed; char *progname; char aachar[26]="ARNDCQEGHILKMFPSTWYVBZX?*-"; node **lrsaves; /* Local variables for maketree, propagated globally for c version: */ long k, nextsp, numtrees, maxwhich, mx, mx0, mx1, shimotrees; double dummy, maxlogl; boolean succeeded, smoothed; double **l0gf; double *l0gl; double **tbl; Char ch, ch2; long col; vall *mp; /* Variables introduced to allow for protein probability calculations */ long max_num_sibs; /* maximum number of siblings used in a */ /* nuview calculation. determines size */ /* final size of pmatrices */ double *eigmat; /* eig matrix variable */ double **probmat; /* prob matrix variable */ double ****dpmatrix; /* derivative of pmatrix */ double ****ddpmatrix; /* derivative of xpmatrix */ double *****pmatrices; /* matrix of probabilities of protien */ /* conversion. The 5 subscripts refer */ /* to sibs, rcategs, categs, final and */ /* initial states, respectively. */ double freqaa[20]; /* amino acid frequencies */ /* this JTT matrix decomposition thanks to Elisabeth Tillier */ static double jtteigmat[] = {+0.00000000000000,-1.81721720738768,-1.87965834528616,-1.61403121885431, -1.53896608443751,-1.40486966367848,-1.30995061286931,-1.24668414819041, -1.17179756521289,-0.31033320987464,-0.34602837857034,-1.06031718484613, -0.99900602987105,-0.45576774888948,-0.86014403434677,-0.54569432735296, -0.76866956571861,-0.60593589295327,-0.65119724379348,-0.70249806480753}; static double jttprobmat[20][20] = {{+0.07686196156903,+0.05105697447152,+0.04254597872702,+0.05126897436552, +0.02027898986051,+0.04106097946952,+0.06181996909002,+0.07471396264303, +0.02298298850851,+0.05256897371552,+0.09111095444453,+0.05949797025102, +0.02341398829301,+0.04052997973502,+0.05053197473402,+0.06822496588753, +0.05851797074102,+0.01433599283201,+0.03230298384851,+0.06637396681302}, {-0.04445795120462,-0.01557336502860,-0.09314817363516,+0.04411372100382, -0.00511178725134,+0.00188472427522,-0.02176250428454,-0.01330231089224, +0.01004072641973,+0.02707838224285,-0.00785039050721,+0.02238829876349, +0.00257470703483,-0.00510311699563,-0.01727154263346,+0.20074235330882, -0.07236268502973,-0.00012690116016,-0.00215974664431,-0.01059243778174}, {+0.09480046389131,+0.00082658405814,+0.01530023104155,-0.00639909042723, +0.00160605602061,+0.00035896642912,+0.00199161318384,-0.00220482855717, -0.00112601328033,+0.14840201765438,-0.00344295714983,-0.00123976286718, -0.00439399942758,+0.00032478785709,-0.00104270266394,-0.02596605592109, -0.05645800566901,+0.00022319903170,-0.00022792271829,-0.16133258048606}, {-0.06924141195400,-0.01816245289173,-0.08104005811201,+0.08985697111009, +0.00279659017898,+0.01083740322821,-0.06449599336038,+0.01794514261221, +0.01036809141699,+0.04283504450449,+0.00634472273784,+0.02339134834111, -0.01748667848380,+0.00161859106290,+0.00622486432503,-0.05854130195643, +0.15083728660504,+0.00030733757661,-0.00143739522173,-0.05295810171941}, {-0.14637948915627,+0.02029296323583,+0.02615316895036,-0.10311538564943, -0.00183412744544,-0.02589124656591,+0.11073673851935,+0.00848581728407, +0.00106057791901,+0.05530240732939,-0.00031533506946,-0.03124002869407, -0.01533984125301,-0.00288717337278,+0.00272787410643,+0.06300929916280, +0.07920438311152,-0.00041335282410,-0.00011648873397,-0.03944076085434}, {-0.05558229086909,+0.08935293782491,+0.04869509588770,+0.04856877988810, -0.00253836047720,+0.07651693957635,-0.06342453535092,-0.00777376246014, -0.08570270266807,+0.01943016473512,-0.00599516526932,-0.09157595008575, -0.00397735155663,-0.00440093863690,-0.00232998056918,+0.02979967701162, -0.00477299485901,-0.00144011795333,+0.01795114942404,-0.00080059359232}, {+0.05807741644682,+0.14654292420341,-0.06724975334073,+0.02159062346633, -0.00339085518294,-0.06829036785575,+0.03520631903157,-0.02766062718318, +0.03485632707432,-0.02436836692465,-0.00397566003573,-0.10095488644404, +0.02456887654357,+0.00381764117077,-0.00906261340247,-0.01043058066362, +0.01651199513994,-0.00210417220821,-0.00872508520963,-0.01495915462580}, {+0.02564617106907,+0.02960554611436,-0.00052356748770,+0.00989267817318, -0.00044034172141,-0.02279910634723,-0.00363768356471,-0.01086345665971, +0.01229721799572,+0.02633650142592,+0.06282966783922,-0.00734486499924, -0.13863936313277,-0.00993891943390,-0.00655309682350,-0.00245191788287, -0.02431633805559,-0.00068554031525,-0.00121383858869,+0.06280025239509}, {+0.11362428251792,-0.02080375718488,-0.08802750967213,-0.06531316372189, -0.00166626058292,+0.06846081717224,+0.07007301248407,-0.01713112936632, -0.05900588794853,-0.04497159138485,+0.04222484636983,+0.00129043178508, -0.01550337251561,-0.01553102163852,-0.04363429852047,+0.01600063777880, +0.05787328925647,-0.00008265841118,+0.02870014572813,-0.02657681214523}, {+0.01840541226842,+0.00610159018805,+0.01368080422265,+0.02383751807012, -0.00923516894192,+0.01209943150832,+0.02906782189141,+0.01992384905334, +0.00197323568330,+0.00017531415423,-0.01796698381949,+0.01887083962858, -0.00063335886734,-0.02365277334702,+0.01209445088200,+0.01308086447947, +0.01286727242301,-0.11420358975688,-0.01886991700613,+0.00238338728588}, {-0.01100105031759,-0.04250695864938,-0.02554356700969,-0.05473632078607, +0.00725906469946,-0.03003724918191,-0.07051526125013,-0.06939439879112, -0.00285883056088,+0.05334304124753,+0.12839241846919,-0.05883473754222, +0.02424304967487,+0.09134510778469,-0.00226003347193,-0.01280041778462, -0.00207988305627,-0.02957493909199,+0.05290385686789,+0.05465710875015}, {-0.01421274522011,+0.02074863337778,-0.01006411985628,+0.03319995456446, -0.00005371699269,-0.12266046460835,+0.02419847062899,-0.00441168706583, -0.08299118738167,-0.00323230913482,+0.02954035119881,+0.09212856795583, +0.00718635627257,-0.02706936115539,+0.04473173279913,-0.01274357634785, -0.01395862740618,-0.00071538848681,+0.04767640012830,-0.00729728326990}, {-0.03797680968123,+0.01280286509478,-0.08614616553187,-0.01781049963160, +0.00674319990083,+0.04208667754694,+0.05991325707583,+0.03581015660092, -0.01529816709967,+0.06885987924922,-0.11719120476535,-0.00014333663810, +0.00074336784254,+0.02893416406249,+0.07466151360134,-0.08182016471377, -0.06581536577662,-0.00018195976501,+0.00167443595008,+0.09015415667825}, {+0.03577726799591,-0.02139253448219,-0.01137813538175,-0.01954939202830, -0.04028242801611,-0.01777500032351,-0.02106862264440,+0.00465199658293, -0.02824805812709,+0.06618860061778,+0.08437791757537,-0.02533125946051, +0.02806344654855,-0.06970805797879,+0.02328376968627,+0.00692992333282, +0.02751392122018,+0.01148722812804,-0.11130404325078,+0.07776346000559}, {-0.06014297925310,-0.00711674355952,-0.02424493472566,+0.00032464353156, +0.00321221847573,+0.03257969053884,+0.01072805771161,+0.06892027923996, +0.03326534127710,-0.01558838623875,+0.13794237677194,-0.04292623056646, +0.01375763233229,-0.11125153774789,+0.03510076081639,-0.04531670712549, -0.06170413486351,-0.00182023682123,+0.05979891871679,-0.02551802851059}, {-0.03515069991501,+0.02310847227710,+0.00474493548551,+0.02787717003457, -0.12038329679812,+0.03178473522077,+0.04445111601130,-0.05334957493090, +0.01290386678474,-0.00376064171612,+0.03996642737967,+0.04777677295520, +0.00233689200639,+0.03917715404594,-0.01755598277531,-0.03389088626433, -0.02180780263389,+0.00473402043911,+0.01964539477020,-0.01260807237680}, {-0.04120428254254,+0.00062717164978,-0.01688703578637,+0.01685776910152, +0.02102702093943,+0.01295781834163,+0.03541815979495,+0.03968150445315, -0.02073122710938,-0.06932247350110,+0.11696314241296,-0.00322523765776, -0.01280515661402,+0.08717664266126,+0.06297225078802,-0.01290501780488, -0.04693925076877,-0.00177653675449,-0.08407812137852,-0.08380714022487}, {+0.03138655228534,-0.09052573757196,+0.00874202219428,+0.06060593729292, -0.03426076652151,-0.04832468257386,+0.04735628794421,+0.14504653737383, -0.01709111334001,-0.00278794215381,-0.03513813820550,-0.11690294831883, -0.00836264902624,+0.03270980973180,-0.02587764129811,+0.01638786059073, +0.00485499822497,+0.00305477087025,+0.02295754527195,+0.00616929722958}, {-0.04898722042023,-0.01460879656586,+0.00508708857036,+0.07730497806331, +0.04252420017435,+0.00484232580349,+0.09861807969412,-0.05169447907187, -0.00917820907880,+0.03679081047330,+0.04998537112655,+0.00769330211980, +0.01805447683564,-0.00498723245027,-0.14148416183376,-0.05170281760262, -0.03230723310784,-0.00032890672639,-0.02363523071957,+0.03801365471627}, {-0.02047562162108,+0.06933781779590,-0.02101117884731,-0.06841945874842, -0.00860967572716,-0.00886650271590,-0.07185241332269,+0.16703684361030, -0.00635847581692,+0.00811478913823,+0.01847205842216,+0.06700967948643, +0.00596607376199,+0.02318239240593,-0.10552958537847,-0.01980199747773, -0.02003785382406,-0.00593392430159,-0.00965391033612,+0.00743094349652}}; /* this PMB matrix decomposition due to Elisabeth Tillier */ static double pmbeigmat[20] = {0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, -1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, -1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, -0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, -0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; static double pmbprobmat[20][20] = {{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, 0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, 0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, 0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, 0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, {0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, 0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, -0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, -0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, 0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, {-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, -0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, 0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, 0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, -0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, {0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, 0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, 0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, 0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, 0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, {0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, -0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, 0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, -0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, -0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, {-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, -0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, -0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, 0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, 0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, {0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, -0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, -0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, -0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, -0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, {0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, 0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, -0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, 0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, -0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, {0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, -0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, 0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, -0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, -0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, {-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, -0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, -0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, -0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, 0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, {-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, -0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, -0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, 0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, 0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, {0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, -0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, 0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, -0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, 0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, {-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, -0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, 0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, 0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, -0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, {-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, 0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, 0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, 0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, -0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, {0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, -0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, -0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, 0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, -0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, {0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, -0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, -0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, 0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, 0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, {0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, 0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, -0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, -0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, -0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, {0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, -0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, 0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, 0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, 0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, {0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, -0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, 0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, 0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, 0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, {0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, -0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, -0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, 0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, 0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} ; /* dcmut version of PAM model from http://www.ebi.ac.uk/goldman-srv/dayhoff/ */ static double pameigmat[] = {0,-1.93321786301018,-2.20904642493621,-1.74835983874903, -1.64854548332072,-1.54505559488222,-1.33859384676989,-1.29786201193594, -0.235548517495575,-0.266951066089808,-0.28965813670665,-1.10505826965282, -1.04323310568532,-0.430423720979904,-0.541719761016713,-0.879636093986914, -0.711249353378695,-0.725050487280602,-0.776855937389452,-0.808735559461343}; static double pamprobmat[20][20] ={ {0.08712695644, 0.04090397955, 0.04043197978, 0.04687197656, 0.03347398326, 0.03825498087, 0.04952997524, 0.08861195569, 0.03361898319, 0.03688598156, 0.08535695732, 0.08048095976, 0.01475299262, 0.03977198011, 0.05067997466, 0.06957696521, 0.05854197073, 0.01049399475, 0.02991598504, 0.06471796764}, {0.07991048383, 0.006888314018, 0.03857806206, 0.07947073194, 0.004895492884, 0.03815829405, -0.1087562465, 0.008691167141, -0.0140554828, 0.001306404001, -0.001888411299, -0.006921303342, 0.0007655604228, 0.001583298443, 0.006879590446, -0.171806883, 0.04890917949, 0.0006700432804, 0.0002276237277, -0.01350591875}, {-0.01641514483, -0.007233933239, -0.1377830621, 0.1163201333, -0.002305138017, 0.01557250366, -0.07455879489, -0.003225343503, 0.0140630487, 0.005112274204, 0.001405731862, 0.01975833782, -0.001348402973, -0.001085733262, -0.003880514478, 0.0851493313, -0.01163526615, -0.0001197903399, 0.002056153393, 0.0001536095643}, {0.009669278686, -0.006905863869, 0.101083544, 0.01179903104, -0.003780967591, 0.05845105878, -0.09138357299, -0.02850503638, -0.03233951408, 0.008708065876, -0.004700705411, -0.02053221579, 0.001165851398, -0.001366585849, -0.01317695074, 0.1199985703, -0.1146346193, -0.0005953021314, -0.0004297615194, 0.007475695618}, {0.1722243502, -0.003737582995, -0.02964873222, -0.02050116381, -0.0004530478465, -0.02460043205, 0.02280768412, -0.02127364909, 0.01570095258, 0.1027744285, -0.005330539586, 0.0179697651, -0.002904077286, -0.007068126663, -0.0142869583, -0.01444241844, -0.08218861544, 0.0002069181629, 0.001099671379, -0.1063484263}, {-0.1553433627, -0.001169168032, 0.02134785337, 0.0007602305436, 0.0001395330122, 0.03194992019, -0.01290252206, 0.03281720789, -0.01311103735, 0.1177254769, -0.008008783885, -0.02375317548, -0.002817809762, -0.008196682776, 0.01731267617, 0.01853526375, 0.08249908546, -2.788771776e-05, 0.001266182191, -0.09902299976}, {-0.03671080341, 0.0274168035, 0.04625877597, 0.07520706414, -0.0001833803619, -0.1207833161, -0.006415807779, -0.005465629648, 0.02778273972, 0.007589688485, -0.02945266034, -0.03797542064, 0.07044042052, -0.002018573865, 0.01845277071, 0.006901513991, -0.02430934639, -0.0005919635873, -0.001266962331, -0.01487591261}, {-0.03060317816, 0.01182361623, 0.04200270053, 0.05406235279, -0.0003920498815, -0.09159709348, -0.009602690652, -0.00382944418, 0.01761361993, 0.01605684317, 0.05198878008, 0.02198696949, -0.09308930025, -0.00102622863, 0.01477637127, 0.0009314065393, -0.01860959472, -0.0005964703968, -0.002694284083, 0.02079767439}, {0.0195976494, -0.005104484936, 0.007406728707, 0.01236244954, 0.0201446796, 0.007039564785, 0.01276942134, 0.02641595685, 0.002764624354, 0.001273314658, -0.01335316035, 0.01105658671, 2.148773499e-05, -0.02692205639, 0.0118684991, 0.01212624708, 0.01127770094, -0.09842754796, -0.01942336432, 0.007105703151}, {-0.01819461888, -0.01509348507, -0.01297636935, -0.01996453439, 0.1715705905, -0.01601550692, -0.02122706144, -0.02854628494, -0.009351082371, -0.001527995472, -0.010198224, -0.03609537551, -0.003153182095, 0.02395980501, -0.01378664626, -0.005992611421, -0.01176810875, 0.003132361603, 0.03018439539, -0.004956065656}, {-0.02733614784, -0.02258066705, -0.0153112506, -0.02475728664, -0.04480525045, -0.01526640341, -0.02438517425, -0.04836914601, -0.00635964824, 0.02263169831, 0.09794101931, -0.04004304158, 0.008464393478, 0.1185443142, -0.02239294163, -0.0281550321, -0.01453581604, -0.0246742804, 0.0879619849, 0.02342867605}, {0.06483718238, 0.1260012082, -0.006496013283, 0.009914915531, -0.004181603532, 0.0003493226286, 0.01408035752, -0.04881663016, -0.03431167356, -0.01768005602, 0.02362447761, -0.1482364784, -0.01289035619, -0.001778893279, -0.05240099752, 0.05536174567, 0.06782165352, -0.003548568717, 0.001125301173, -0.03277489363}, {0.06520296909, -0.0754802543, 0.03139281903, -0.03266449554, -0.004485188002, -0.03389072036, -0.06163274338, -0.06484769882, 0.05722658289, -0.02824079619, 0.01544837349, 0.03909752708, 0.002029218884, 0.003151939572, -0.05471208363, 0.07962008342, 0.125916047, 0.0008696184937, -0.01086027514, -0.05314092355}, {0.004543119081, 0.01935177735, 0.01905511007, 0.02682993409, -0.01199617967, 0.01426278655, 0.02472521255, 0.03864795501, 0.02166224804, -0.04754243479, -0.1921545477, 0.03621321546, -0.02120627881, 0.04928097895, 0.009396088815, 0.01748042052, -6.173742851e-05, -0.003168033098, 0.07723565812, -0.08255529309}, {0.06710378668, -0.09441410284, -0.004801776989, 0.008830272165, -0.01021645042, -0.02764365608, 0.004250361851, 0.1648777542, -0.037446109, 0.004541057635, -0.0296980702, -0.1532325189, -0.008940580901, 0.006998050812, 0.02338809379, 0.03175059182, 0.02033965512, 0.006388075608, 0.001762762044, 0.02616280361}, {0.01915943021, -0.05432967274, 0.01249342683, 0.06836622457, 0.002054462161, -0.01233535859, 0.07087282652, -0.08948637051, -0.1245896013, -0.02204522882, 0.03791481736, 0.06557467874, 0.005529294156, -0.006296644235, 0.02144530752, 0.01664230081, 0.02647078439, 0.001737725271, 0.01414149877, -0.05331990116}, {0.0266659303, 0.0564142853, -0.0263767738, -0.08029726006, -0.006059357163, -0.06317558457, -0.0911894019, 0.05401487057, -0.08178072458, 0.01580699778, -0.05370550396, 0.09798653264, 0.003934944022, 0.01977291947, 0.0441198541, 0.02788220393, 0.03201877081, -0.00206161759, -0.005101423308, 0.03113033802}, {0.02980360751, -0.009513246268, -0.009543527165, -0.02190644172, -0.006146440672, 0.01207009085, -0.0126989156, -0.1378266418, 0.0275235217, 0.00551720592, -0.03104791544, -0.07111701247, -0.006081754489, -0.01337494521, 0.1783961085, 0.01453225059, 0.01938736048, 0.0004488631071, 0.0110844398, 0.02049339243}, {-0.01433508581, 0.01258858175, -0.004294252236, -0.007146532854, 0.009541628809, 0.008040155729, -0.006857781832, 0.05584120066, 0.007749418365, -0.05867835844, 0.08008131283, -0.004877854222, -0.0007128540743, 0.09489058424, 0.06421121962, 0.00271493526, -0.03229944773, -0.001732026038, -0.08053448316, -0.1241903609}, {-0.009854113227, 0.01294129929, -0.00593064392, -0.03016833115, -0.002018439732, -0.00792418722, -0.03372768732, 0.07828561288, 0.007722254639, -0.05067377561, 0.1191848621, 0.005059475202, 0.004762387166, -0.1029870175, 0.03537190114, 0.001089956203, -0.02139157573, -0.001015245062, 0.08400521847, -0.08273195059}}; void init_protmats(void) { long l; eigmat = (double *) Malloc (20 * sizeof(double)); for (l = 0; l <= 19; l++) if (usejtt) eigmat[l] = jtteigmat[l]; else { if (usepmb) eigmat[l] = pmbeigmat[l]; else eigmat[l] = pameigmat[l]; } probmat = (double **) Malloc (20 * sizeof(double *)); for (l = 0; l <= 19; l++) if (usejtt) probmat[l] = jttprobmat[l]; else { if (usepmb) probmat[l] = pmbprobmat[l]; else probmat[l] = pamprobmat[l]; } } /* init_protmats */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { long i; double probsum=0.0; AjPStr model = NULL; AjPStr gammamethod = NULL; AjPFloat hmmrates; AjPFloat hmmprob; AjPFloat arrayval; AjBool rough; ctgry = false; rctgry = false; categs = 1; rcategs = 1; auto_ = false; gama = false; global = false; hypstate = false; improve = false; invar = false; jumble = false; njumble = 1; lngths = false; lambda = 1.0; outgrno = 1; outgropt = false; trout = true; usertree = false; weights = false; printdata = false; progress = true; treeprint = true; usejtt = false; usepmb = false; usepam = false; interleaved = true; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; lngths = ajAcdGetBoolean("lengths"); } numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; datasets = numseqs; } else if (numwts > 1) { mulsets = true; datasets = numwts; justwts = true; } model = ajAcdGetListSingle("model"); if(ajStrMatchC(model, "j")) usejtt = true; if(ajStrMatchC(model, "h")) usepmb = true; if(ajStrMatchC(model, "d")) usepam = true; categs = ajAcdGetInt("ncategories"); if (categs > 1) { ctgry = true; rate = (double *) Malloc(categs * sizeof(double)); arrayval = ajAcdGetArray("rate"); emboss_initcategs(arrayval, categs, rate); } else{ rate = (double *) Malloc(categs*sizeof(double)); rate[0] = 1.0; } phyloratecat = ajAcdGetProperties("categories"); gammamethod = ajAcdGetListSingle("gammatype"); if(ajStrMatchC(gammamethod, "n")) { rrate = (double *) Malloc(rcategs*sizeof(double)); probcat = (double *) Malloc(rcategs*sizeof(double)); rrate[0] = 1.0; probcat[0] = 1.0; } else { rctgry = true; auto_ = ajAcdGetBoolean("adjsite"); if(auto_) { lambda = ajAcdGetFloat("lambda"); lambda = 1 / lambda; } } if(ajStrMatchC(gammamethod, "g")) { gama = true; rcategs = ajAcdGetInt("ngammacat"); cv = ajAcdGetFloat("gammacoefficient"); alpha = 1.0 / (cv*cv); initmemrates(); initgammacat(rcategs, alpha, rrate, probcat); } else if(ajStrMatchC(gammamethod, "i")) { invar = true; rcategs = ajAcdGetInt("ninvarcat"); cv = ajAcdGetFloat("invarcoefficient"); alpha = 1.0 / (cv*cv); invarfrac = ajAcdGetFloat("invarfrac"); initmemrates(); initgammacat(rcategs-1, alpha, rrate, probcat); for (i=0; i < rcategs-1 ; i++) probcat[i] = probcat[i]*(1.0-invarfrac); probcat[rcategs-1] = invarfrac; rrate[rcategs-1] = 0.0; } else if(ajStrMatchC(gammamethod, "h")) { rcategs = ajAcdGetInt("nhmmcategories"); initmemrates(); hmmrates = ajAcdGetArray("hmmrates"); emboss_initcategs(hmmrates, rcategs,rrate); hmmprob = ajAcdGetArray("hmmprobabilities"); for (i=0; i < rcategs; i++){ probcat[i] = ajFloatGet(hmmprob, i); probsum += probcat[i]; } } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; if(!usertree) { global = ajAcdGetBoolean("global"); rough = ajAcdGetBoolean("rough"); if(!rough) improve = true; njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); hypstate = ajAcdGetBoolean("hypstate"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } init_protmats(); } /* emboss_getoptions */ void initmemrates(void) { probcat = (double *) Malloc(rcategs * sizeof(double)); rrate = (double *) Malloc(rcategs * sizeof(double)); } void makeprotfreqs(void) { /* calculate amino acid frequencies based on eigmat */ long i, mineig; mineig = 0; for (i = 0; i <= 19; i++) if (fabs(eigmat[i]) < fabs(eigmat[mineig])) mineig = i; memcpy(freqaa, probmat[mineig], 20 * sizeof(double)); for (i = 0; i <= 19; i++) freqaa[i] = fabs(freqaa[i]); } /* makeprotfreqs */ void reallocsites() { long i; for (i = 0; i < spp; i++) y[i] = (Char *) Malloc(sites*sizeof(Char)); free(category); free(weight); free(alias); free(ally); free(location); free(aliasweight); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long)); for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; makeweights(); } void allocrest(void) { long i; y = (Char **) Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *) Malloc(sites*sizeof(Char)); nayme = (naym *) Malloc(spp*sizeof(naym)); enterorder = (long *) Malloc(spp*sizeof(long)); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long)); } /* allocrest */ void doinit(void) { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &sites, &nonodes2, 2); makeprotfreqs(); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); alloctree(&curtree.nodep, nonodes2, usertree); allocrest(); if (usertree) return; alloctree(&bestree.nodep, nonodes2, 0); alloctree(&priortree.nodep, nonodes2, 0); if (njumble <= 1) return; alloctree(&bestree2.nodep, nonodes2, 0); } /* doinit */ void inputoptions(void) { long i; if (!firstset) samenumspseq(seqsets[ith-1], &sites, ith); if (firstset) { for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; } if (justwts || weights) inputweightsstr(phyloweights->Str[ith-1], sites, weight, &weights); weightsum = 0; for (i = 0; i < sites; i++) weightsum += weight[i]; if ((ctgry && categs > 1) && (firstset || !justwts)) { inputcategsstr(phyloratecat->Str[0], 0, sites, category, categs, "ProML"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } if (weights && printdata) printweights(outfile, 0, sites, weight, "Sites"); fprintf(outfile, "\nAmino acid sequence Maximum Likelihood"); fprintf(outfile, " method, version %s\n\n",VERSION); fprintf(outfile, "%s model of amino acid change\n\n", (usejtt ? "Jones-Taylor-Thornton" : usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM")); } /* inputoptions */ void input_protdata(AjPSeqset seqset, long chars) { /* input the names and sequences for each species */ /* used by proml */ long i, j, k, l; Char charstate; if (printdata) headings(chars, "Sequences", "---------"); for(i=0;i chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (j > 1 && y[j - 1][k - 1] == y[0][k - 1]) charstate = '.'; else charstate = y[j - 1][k - 1]; putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* input_protdata */ void makeweights(void) { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = 0; aliasweight[i - 1] = weight[i - 1]; location[i - 1] = 0; } sitesort2 (sites, aliasweight); sitecombine2(sites, aliasweight); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { if (aliasweight[i - 1] > 0) endsite = i; } for (i = 1; i <= endsite; i++) { location[alias[i - 1] - 1] = i; ally[alias[i - 1] - 1] = alias[i - 1]; } term = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) term[i] = (double *) Malloc(rcategs * sizeof(double)); slopeterm = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) slopeterm[i] = (double *) Malloc(rcategs * sizeof(double)); curveterm = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) curveterm[i] = (double *) Malloc(rcategs * sizeof(double)); mp = (vall *) Malloc(sites*sizeof(vall)); contribution = (contribarr *) Malloc(endsite*sizeof(contribarr)); } /* makeweights */ void prot_makevalues(long categs, pointarray treenode, long endsite, long spp, sequence y, steptr alias) { /* set up fractional likelihoods at tips */ /* a version of makevalues2 found in seq.c */ /* used by proml */ long i, j, k, l; long b; for (k = 0; k < endsite; k++) { j = alias[k]; for (i = 0; i < spp; i++) { for (l = 0; l < categs; l++) { memset(treenode[i]->protx[k][l], 0, sizeof(double)*20); switch (y[i][j - 1]) { case 'A': treenode[i]->protx[k][l][0] = 1.0; break; case 'R': treenode[i]->protx[k][l][(long)arginine - (long)alanine] = 1.0; break; case 'N': treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; break; case 'D': treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; break; case 'C': treenode[i]->protx[k][l][(long)cysteine - (long)alanine] = 1.0; break; case 'Q': treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; break; case 'E': treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; break; case 'G': treenode[i]->protx[k][l][(long)glycine - (long)alanine] = 1.0; break; case 'H': treenode[i]->protx[k][l][(long)histidine - (long)alanine] = 1.0; break; case 'I': treenode[i]->protx[k][l][(long)isoleucine - (long)alanine] = 1.0; break; case 'L': treenode[i]->protx[k][l][(long)leucine - (long)alanine] = 1.0; break; case 'K': treenode[i]->protx[k][l][(long)lysine - (long)alanine] = 1.0; break; case 'M': treenode[i]->protx[k][l][(long)methionine - (long)alanine] = 1.0; break; case 'F': treenode[i]->protx[k][l][(long)phenylalanine - (long)alanine] = 1.0; break; case 'P': treenode[i]->protx[k][l][(long)proline - (long)alanine] = 1.0; break; case 'S': treenode[i]->protx[k][l][(long)serine - (long)alanine] = 1.0; break; case 'T': treenode[i]->protx[k][l][(long)threonine - (long)alanine] = 1.0; break; case 'W': treenode[i]->protx[k][l][(long)tryptophan - (long)alanine] = 1.0; break; case 'Y': treenode[i]->protx[k][l][(long)tyrosine - (long)alanine] = 1.0; break; case 'V': treenode[i]->protx[k][l][(long)valine - (long)alanine] = 1.0; break; case 'B': treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; break; case 'Z': treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; break; case 'X': /* unknown aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '?': /* unknown aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '*': /* stop codon symbol */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '-': /* deletion event-absent data or aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; } } } } } /* prot_makevalues */ void free_pmatrix(long sib) { long j,k,l; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(pmatrices[sib][j][k][l]); free(pmatrices[sib][j][k]); } free(pmatrices[sib][j]); } free(pmatrices[sib]); } void alloc_pmatrix(long sib) { /* Allocate memory for a new pmatrix. Called iff num_sibs>max_num_sibs */ long j, k, l; double ****temp_matrix; temp_matrix = (double ****) Malloc (rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { temp_matrix[j] = (double ***) Malloc(categs * sizeof(double **)); for (k = 0; k < categs; k++) { temp_matrix[j][k] = (double **) Malloc(20 * sizeof (double *)); for (l = 0; l < 20; l++) temp_matrix[j][k][l] = (double *) Malloc(20 * sizeof(double)); } } pmatrices[sib] = temp_matrix; max_num_sibs++; } /* alloc_pmatrix */ void prot_freetable() { long i,j,k,l; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(ddpmatrix[j][k][l]); free(ddpmatrix[j][k]); } free(ddpmatrix[j]); } free(ddpmatrix); for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(dpmatrix[j][k][l]); free(dpmatrix[j][k]); } free(dpmatrix[j]); } free(dpmatrix); for (j = 0; j < rcategs; j++) free(tbl[j]); free(tbl); for ( i = 0 ; i < max_num_sibs ; i++ ) free_pmatrix(i); free(pmatrices); } void prot_inittable(void) { /* Define a lookup table. Precompute values and print them out in tables */ /* Allocate memory for the pmatrices, dpmatices and ddpmatrices */ long i, j, k, l; double sumrates; /* Allocate memory for pmatrices, the array of pointers to pmatrices */ pmatrices = (double *****) Malloc ( spp * sizeof(double ****)); /* Allocate memory for the first 2 pmatrices, the matrix of conversion */ /* probabilities, but only once per run (aka not on the second jumble. */ alloc_pmatrix(0); alloc_pmatrix(1); /* Allocate memory for one dpmatrix, the first derivative matrix */ dpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { dpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); for (k = 0; k < categs; k++) { dpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); for (l = 0; l < 20; l++) dpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); } } /* Allocate memory for one ddpmatrix, the second derivative matrix */ ddpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { ddpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); for (k = 0; k < categs; k++) { ddpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); for (l = 0; l < 20; l++) ddpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); } } /* Allocate memory and assign values to tbl, the matrix of possible rates*/ tbl = (double **) Malloc( rcategs * sizeof(double *)); for (j = 0; j < rcategs; j++) tbl[j] = (double *) Malloc( categs * sizeof(double)); for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) tbl[j][k] = rrate[j]*rate[k]; sumrates = 0.0; for (i = 0; i < endsite; i++) { for (j = 0; j < rcategs; j++) sumrates += aliasweight[i] * probcat[j] * tbl[j][category[alias[i] - 1] - 1]; } sumrates /= (double)sites; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) { tbl[j][k] /= sumrates; } if(jumb > 1) return; if (gama) { fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); fprintf(outfile, " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); } if (rcategs > 1) { fprintf(outfile, "\nStates in HMM Rate of change Probability\n\n"); for (i = 0; i < rcategs; i++) if (probcat[i] < 0.0001) fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.001) fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.01) fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); else fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); putc('\n', outfile); if (auto_) fprintf(outfile, "Expected length of a patch of sites having the same rate = %8.3f\n", 1/lambda); putc('\n', outfile); } if (categs > 1) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (k = 0; k < categs; k++) fprintf(outfile, "%9ld%16.3f\n", k+1, rate[k]); } if ((rcategs > 1) || (categs >> 1)) fprintf(outfile, "\n\n"); } /* prot_inittable */ void getinput(void) { /* reads the input data */ /* void debugtree(tree*, FILE*) */ if (!justwts || firstset) inputoptions(); if (!justwts || firstset) input_protdata(seqsets[ith-1], sites); if ( !firstset ) freelrsaves(); makeweights(); alloclrsaves(); setuptree2(&curtree); if (!usertree) { setuptree2(&bestree); setuptree2(&priortree); if (njumble > 1) setuptree2(&bestree2); } prot_allocx(nonodes2, rcategs, curtree.nodep, usertree); if (!usertree) { prot_allocx(nonodes2, rcategs, bestree.nodep, 0); prot_allocx(nonodes2, rcategs, priortree.nodep, 0); if (njumble > 1) prot_allocx(nonodes2, rcategs, bestree2.nodep, 0); } prot_makevalues(rcategs, curtree.nodep, endsite, spp, y, alias); } /* getinput */ void inittravtree(node *p) { /* traverse tree to set initialized and v to initial values */ node* q; p->initialized = false; p->back->initialized = false; if ( usertree && (!lngths || p->iter) ) { p->v = initialv; p->back->v = initialv; } if ( !p->tip ) { q = p->next; while ( q != p ) { inittravtree(q->back); q = q->next; } } } /* inittravtree */ void prot_nuview(node *p) { long i, j, k, l, m, num_sibs, sib_index; node *sib_ptr, *sib_back_ptr; psitelike prot_xx, x2; double lw, prod7; double **pmat; double maxx; double correction; /* Figure out how many siblings the current node has */ /* and be sure that pmatrices is large enough */ num_sibs = count_sibs(p); for (i = 0; i < num_sibs; i++) if (pmatrices[i] == NULL) alloc_pmatrix(i); /* Recursive calls, should be called for all children */ sib_ptr = p; for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (!sib_back_ptr->tip && !sib_back_ptr->initialized) prot_nuview(sib_back_ptr); } /* Make pmatrices for all possible combinations of category, rcateg */ /* and sib */ sib_ptr = p; /* return to p */ for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; lw = sib_back_ptr->v; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) make_pmatrix(pmatrices[sib_index][j][k], NULL, NULL, 0, lw, tbl[j][k], eigmat, probmat); } for (i = 0; i < endsite; i++) { maxx = 0; correction = 0; k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { /* initialize to 1 all values of prot_xx */ for (m = 0; m <= 19; m++) prot_xx[m] = 1; sib_ptr = p; /* return to p */ /* loop through all sibs and calculate likelihoods for all possible*/ /* amino acid combinations */ for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if ( j == 0) correction += sib_back_ptr->underflows[i]; memcpy(x2, sib_back_ptr->protx[i][j], sizeof(psitelike)); pmat = pmatrices[sib_index][j][k]; for (m = 0; m <= 19; m++) { prod7 = 0; for (l = 0; l <= 19; l++) prod7 += (pmat[m][l] * x2[l]); prot_xx[m] *= prod7; if ( prot_xx[m] > maxx && sib_index == (num_sibs - 1)) maxx = prot_xx[m]; } } /* And the final point of this whole function: */ memcpy(p->protx[i][j], prot_xx, sizeof(psitelike)); } p->underflows[i] = 0; if ( maxx < MIN_DOUBLE ) fix_protx(p,i,maxx,rcategs); p->underflows[i] += correction; } p->initialized = true; } /* prot_nuview */ void prot_slopecurv(node *p,double y,double *like,double *slope,double *curve) { /* compute log likelihood, slope and curvature at node p */ long i, j, k, l, m, lai; double sum, sumc, sumterm, lterm, sumcs, sumcc, sum2, slope2, curve2; double frexm = 0; /* frexm = freqaa[m]*x1[m] */ /* frexml = frexm*x2[l] */ double prod4m, prod5m, prod6m; /* elements of prod4-5 for */ /* each m */ double **pmat, **dpmat, **ddpmat; /* local pointers to global*/ /* matrices */ double prod4, prod5, prod6; contribarr thelike, nulike, nuslope, nucurve, theslope, thecurve, clai, cslai, cclai; node *q; psitelike x1, x2; q = p->back; sum = 0.0; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { make_pmatrix(pmatrices[0][j][k], dpmatrix[j][k], ddpmatrix[j][k], 2, y, tbl[j][k], eigmat, probmat); } } for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { memcpy(x1, p->protx[i][j], sizeof(psitelike)); memcpy(x2, q->protx[i][j], sizeof(psitelike)); pmat = pmatrices[0][j][k]; dpmat = dpmatrix[j][k]; ddpmat = ddpmatrix[j][k]; prod4 = 0.0; prod5 = 0.0; prod6 = 0.0; for (m = 0; m <= 19; m++) { prod4m = 0.0; prod5m = 0.0; prod6m = 0.0; frexm = x1[m] * freqaa[m]; for (l = 0; l <= 19; l++) { prod4m += x2[l] * pmat[m][l]; prod5m += x2[l] * dpmat[m][l]; prod6m += x2[l] * ddpmat[m][l]; } prod4 += frexm * prod4m; prod5 += frexm * prod5m; prod6 += frexm * prod6m; } term[i][j] = prod4; slopeterm[i][j] = prod5; curveterm[i][j] = prod6; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * term[i][j]; if (sumterm <= 0.0) sumterm = 0.000000001; /* ? shouldn't get here ?? */ lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) { term[i][j] = term[i][j] / sumterm; slopeterm[i][j] = slopeterm[i][j] / sumterm; curveterm[i][j] = curveterm[i][j] / sumterm; } sum += (aliasweight[i] * lterm); } for (i = 0; i < rcategs; i++) { thelike[i] = 1.0; theslope[i] = 0.0; thecurve[i] = 0.0; } for (i = 0; i < sites; i++) { sumc = 0.0; sumcs = 0.0; sumcc = 0.0; for (k = 0; k < rcategs; k++) { sumc += probcat[k] * thelike[k]; sumcs += probcat[k] * theslope[k]; sumcc += probcat[k] * thecurve[k]; } sumc *= lambda; sumcs *= lambda; sumcc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, term[lai - 1], rcategs*sizeof(double)); memcpy(cslai, slopeterm[lai - 1], rcategs*sizeof(double)); memcpy(cclai, curveterm[lai - 1], rcategs*sizeof(double)); if (weight[i] > 1) { for (j = 0; j < rcategs; j++) { if (clai[j] > 0.0) clai[j] = exp(weight[i]*log(clai[j])); else clai[j] = 0.0; if (cslai[j] > 0.0) cslai[j] = exp(weight[i]*log(cslai[j])); else cslai[j] = 0.0; if (cclai[j] > 0.0) cclai[j] = exp(weight[i]*log(cclai[j])); else cclai[j] = 0.0; } } for (j = 0; j < rcategs; j++) { nulike[j] = ((1.0 - lambda) * thelike[j] + sumc) * clai[j]; nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs) * clai[j] + ((1.0 - lambda) * thelike[j] + sumc) * cslai[j]; nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc) * clai[j] + 2.0 * ((1.0 - lambda) * theslope[j] + sumcs) * cslai[j] + ((1.0 - lambda) * thelike[j] + sumc) * cclai[j]; } } else { for (j = 0; j < rcategs; j++) { nulike[j] = ((1.0 - lambda) * thelike[j] + sumc); nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs); nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc); } } memcpy(thelike, nulike, rcategs*sizeof(double)); memcpy(theslope, nuslope, rcategs*sizeof(double)); memcpy(thecurve, nucurve, rcategs*sizeof(double)); } sum2 = 0.0; slope2 = 0.0; curve2 = 0.0; for (i = 0; i < rcategs; i++) { sum2 += probcat[i] * thelike[i]; slope2 += probcat[i] * theslope[i]; curve2 += probcat[i] * thecurve[i]; } sum += log(sum2); (*like) = sum; (*slope) = slope2 / sum2; /* Expressed in terms of *slope to prevent overflow */ (*curve) = curve2 / sum2 - *slope * *slope; } /* prot_slopecurv */ void makenewv(node *p) { /* Newton-Raphson algorithm improvement of a branch length */ long it, ite; double y, yold=0, yorig, like, slope, curve, oldlike=0; boolean done, firsttime, better; node *q; q = p->back; y = p->v; yorig = y; done = false; firsttime = true; it = 1; ite = 0; while ((it < iterations) && (ite < 20) && (!done)) { prot_slopecurv(p, y, &like, &slope, &curve); better = false; if (firsttime) { /* if no older value of y to compare with */ yold = y; oldlike = like; firsttime = false; better = true; } else { if (like > oldlike) { /* update the value of yold if it was better */ yold = y; oldlike = like; better = true; it++; } } if (better) { y = y + slope/fabs(curve); /* Newton-Raphson, forced uphill-wards */ if (y < epsilon) y = epsilon; } else { if (fabs(y - yold) < epsilon) ite = 20; y = (y + (7 * yold)) / 8; /* retract 87% of way back */ } ite++; done = fabs(y-yold) < 0.1*epsilon; } smoothed = (fabs(yold-yorig) < epsilon) && (yorig > 1000.0*epsilon); p->v = yold; /* the last one that had better likelihood */ q->v = yold; curtree.likelihood = oldlike; } /* makenewv */ void update(node *p) { node *q; if (!p->tip && !p->initialized) prot_nuview(p); if (!p->back->tip && !p->back->initialized) prot_nuview(p->back); if ((!usertree) || (usertree && !lngths) || p->iter) { makenewv(p); if ( smoothit ) { inittrav(p); inittrav(p->back); } else if ( inserting && !p->tip ) { for ( q = p->next; q != p; q = q->next ) q->initialized = false; } } } /* update */ void smooth(node *p) { long i, num_sibs; node *sib_ptr; smoothed = false; update(p); if (p->tip) return; num_sibs = count_sibs(p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; if (polishing || (smoothit && !smoothed)) { smooth(sib_ptr->back); p->initialized = false; sib_ptr->initialized = false; } } } /* smooth */ void make_pmatrix(double **matrix, double **dmat, double **ddmat, long derivative, double lz, double rat, double *eigmat, double **probmat) { /* Computes the R matrix such that matrix[m][l] is the joint probability */ /* of m and l. */ /* Computes a P matrix such that matrix[m][l] is the conditional */ /* probability of m given l. This is accomplished by dividing all terms */ /* in the R matrix by freqaa[m], the frequency of l. */ long k, l, m; /* (l) original character state */ /* (m) final character state */ /* (k) lambda counter */ double p0, p1, p2, q; double elambdat[20], delambdat[20], ddelambdat[20]; /* exponential term for matrix */ /* and both derivative matrices */ for (k = 0; k <= 19; k++) { elambdat[k] = exp(lz * rat * eigmat[k]); if(derivative != 0) { delambdat[k] = (elambdat[k] * rat * eigmat[k]); ddelambdat[k] = (delambdat[k] * rat * eigmat[k]); } } for (m = 0; m <= 19; m++) { for (l = 0; l <= 19; l++) { p0 = 0.0; p1 = 0.0; p2 = 0.0; for (k = 0; k <= 19; k++) { q = probmat[k][m] * probmat[k][l]; p0 += (q * elambdat[k]); if(derivative !=0) { p1 += (q * delambdat[k]); p2 += (q * ddelambdat[k]); } } matrix[m][l] = p0 / freqaa[m]; if(derivative != 0) { dmat[m][l] = p1 / freqaa[m]; ddmat[m][l] = p2 / freqaa[m]; } } } } /* make_pmatrix */ double prot_evaluate(node *p, boolean saveit) { contribarr tterm; double sum, sum2, sumc, y, prod4, prodl, frexm, sumterm, lterm; double **pmat; long i, j, k, l, m, lai; node *q; psitelike x1, x2; sum = 0.0; q = p->back; y = p->v; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) make_pmatrix(pmatrices[0][j][k],NULL,NULL,0,y,tbl[j][k],eigmat,probmat); for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { memcpy(x1, p->protx[i][j], sizeof(psitelike)); memcpy(x2, q->protx[i][j], sizeof(psitelike)); prod4 = 0.0; pmat = pmatrices[0][j][k]; for (m = 0; m <= 19; m++) { prodl = 0.0; for (l = 0; l <= 19; l++) prodl += (pmat[m][l] * x2[l]); frexm = x1[m] * freqaa[m]; prod4 += (prodl * frexm); } tterm[j] = prod4; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * tterm[j]; if (sumterm < 0.0) sumterm = 0.00000001; /* ??? */ lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) clai[j] = tterm[j] / sumterm; memcpy(contribution[i], clai, rcategs*sizeof(double)); if (saveit && !auto_ && usertree && (which <= shimotrees)) l0gf[which - 1][i] = lterm; sum += aliasweight[i] * lterm; } for (j = 0; j < rcategs; j++) like[j] = 1.0; for (i = 0; i < sites; i++) { sumc = 0.0; for (k = 0; k < rcategs; k++) sumc += probcat[k] * like[k]; sumc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; } else { for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc); } memcpy(like, nulike, rcategs*sizeof(double)); } sum2 = 0.0; for (i = 0; i < rcategs; i++) sum2 += probcat[i] * like[i]; sum += log(sum2); curtree.likelihood = sum; if (!saveit || auto_ || !usertree) return sum; if(which <= shimotrees) l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; return sum; } if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } return sum; } /* prot_evaluate */ void treevaluate(void) { /* evaluate a user tree */ long i; inittravtree(curtree.start); polishing = true; smoothit = true; for (i = 1; i <= smoothings * 4; i++) smooth (curtree.start); dummy = prot_evaluate(curtree.start, true); } /* treevaluate */ void promlcopy(tree *a, tree *b, long nonodes, long categs) { /* copy tree a to tree b */ long i, j=0; node *p, *q; for (i = 0; i < spp; i++) { prot_copynode(a->nodep[i], b->nodep[i], categs); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next ) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { prot_copynode(p, q, categs); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->likelihood = a->likelihood; b->start = a->start; /* start used in dnaml only */ b->root = a->root; /* root used in dnamlk only */ } /* promlcopy */ void proml_re_move(node **p, node **q) { /* remove p and record in q where it was */ long i; /* assumes bifurcation (OK) */ *q = (*p)->next->back; hookup(*q, (*p)->next->next->back); (*p)->next->back = NULL; (*p)->next->next->back = NULL; (*q)->v += (*q)->back->v; (*q)->back->v = (*q)->v; if ( smoothit ) { inittrav((*q)); inittrav((*q)->back); inittrav((*p)->back); } if ( smoothit ) { for ( i = 0 ; i < smoothings ; i++ ) { smooth(*q); smooth((*q)->back); } } else smooth(*q); } /* proml_re_move */ void insert_(node *p, node *q, boolean dooinit) { /* Insert q near p */ long i, j, num_sibs; node *r, *sib_ptr; r = p->next->next; hookup(r, q->back); hookup(p->next, q); q->v = 0.5 * q->v; q->back->v = q->v; r->v = q->v; r->back->v = r->v; p->initialized = false; if (dooinit) { inittrav(p); inittrav(q); inittrav(q->back); } i = 1; inserting = true; while (i <= smoothings) { smooth(p); if (!p->tip) { num_sibs = count_sibs(p); sib_ptr = p; for (j=0; j < num_sibs; j++) { smooth(sib_ptr->next->back); sib_ptr = sib_ptr->next; } } i++; } inserting = false; } /* insert_ */ void addtraverse(node *p, node *q, boolean contin) { /* try adding p at q, proceed recursively through tree */ long i, num_sibs; double like, vsave = 0; node *qback = NULL, *sib_ptr; if (!smoothit) { vsave = q->v; qback = q->back; } insert_(p, q, false); like = prot_evaluate(p, false); if (like > bestyet + LIKE_EPSILON || bestyet == UNDEFINED) { bestyet = like; if (smoothit) { addwhere = q; promlcopy(&curtree, &bestree, nonodes2, rcategs); } else qwhere = q; succeeded = true; } if (smoothit) promlcopy(&priortree, &curtree, nonodes2, rcategs); else { hookup (q, qback); q->v = vsave; q->back->v = vsave; curtree.likelihood = bestyet; } if (!q->tip && contin) { num_sibs = count_sibs(q); if (q == curtree.start) num_sibs++; sib_ptr = q; for (i=0; i < num_sibs; i++) { addtraverse(p, sib_ptr->next->back, contin); sib_ptr = sib_ptr->next; } } } /* addtraverse */ void globrearrange(void) { /* does global rearrangements */ tree globtree; tree oldtree; int i,j,k,l,num_sibs,num_sibs2; node *where,*sib_ptr,*sib_ptr2; double oldbestyet = curtree.likelihood; int success = false; alloctree(&globtree.nodep,nonodes2,0); alloctree(&oldtree.nodep,nonodes2,0); setuptree2(&globtree); setuptree2(&oldtree); prot_allocx(nonodes2, rcategs, globtree.nodep, 0); prot_allocx(nonodes2, rcategs, oldtree.nodep, 0); promlcopy(&curtree,&globtree,nonodes2,rcategs); promlcopy(&curtree,&oldtree,nonodes2,rcategs); bestyet = curtree.likelihood; for ( i = spp ; i < nonodes2 ; i++ ) { num_sibs = count_sibs(curtree.nodep[i]); sib_ptr = curtree.nodep[i]; if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); for ( j = 0 ; j <= num_sibs ; j++ ) { proml_re_move(&sib_ptr,&where); promlcopy(&curtree,&priortree,nonodes2,rcategs); qwhere = where; if (where->tip) { promlcopy(&oldtree,&curtree,nonodes2,rcategs); promlcopy(&oldtree,&bestree,nonodes2,rcategs); sib_ptr=sib_ptr->next; continue; } else num_sibs2 = count_sibs(where); sib_ptr2 = where; for ( k = 0 ; k < num_sibs2 ; k++ ) { addwhere = NULL; addtraverse(sib_ptr,sib_ptr2->back,true); if ( !smoothit ) { if (succeeded && qwhere != where && qwhere != where->back) { insert_(sib_ptr,qwhere,true); smoothit = true; for (l = 1; l<=smoothings; l++) { smooth (where); smooth (where->back); } smoothit = false; success = true; promlcopy(&curtree,&globtree,nonodes2,rcategs); promlcopy(&priortree,&curtree,nonodes2,rcategs); } } else if ( addwhere && where != addwhere && where->back != addwhere && bestyet > globtree.likelihood) { promlcopy(&bestree,&globtree,nonodes2,rcategs); success = true; } sib_ptr2 = sib_ptr2->next; } promlcopy(&oldtree,&curtree,nonodes2,rcategs); promlcopy(&oldtree,&bestree,nonodes2,rcategs); sib_ptr = sib_ptr->next; } } promlcopy(&globtree,&curtree,nonodes2,rcategs); promlcopy(&globtree,&bestree,nonodes2,rcategs); if (success && globtree.likelihood > oldbestyet) { succeeded = true; } else { succeeded = false; } bestyet = globtree.likelihood; prot_freex(nonodes2,oldtree.nodep); prot_freex(nonodes2,globtree.nodep); freetree2(globtree.nodep,nonodes2); freetree2(oldtree.nodep,nonodes2); } /* globrearrange */ void freelrsaves() { long i,j; for ( i = 0 ; i < NLRSAVES ; i++ ) { for (j = 0; j < oldendsite; j++) free(lrsaves[i]->protx[j]); free(lrsaves[i]->protx); free(lrsaves[i]->underflows); free(lrsaves[i]); } free(lrsaves); } void alloclrsaves() { long i,j; lrsaves = Malloc(NLRSAVES * sizeof(node*)); oldendsite = endsite; for ( i = 0 ; i < NLRSAVES ; i++ ) { lrsaves[i] = Malloc(sizeof(node)); lrsaves[i]->protx = Malloc(endsite*sizeof(ratelike)); lrsaves[i]->underflows = Malloc(endsite * sizeof (double)); for (j = 0; j < endsite; j++) lrsaves[i]->protx[j] = (pratelike)Malloc(rcategs*sizeof(psitelike)); } } /* alloclrsaves */ void rearrange(node *p, node *pp) { /* rearranges the tree locally moving pp around near p */ long i, num_sibs; node *q, *r, *sib_ptr; node *rnb=NULL, *rnnb=NULL; /* assumes bifurcations (OK) */ if (!p->tip && !p->back->tip) { curtree.likelihood = bestyet; if (p->back->next != pp) r = p->back->next; else r = p->back->next->next; if (!smoothit) { rnb = r->next->back; rnnb = r->next->next->back; prot_copynode(r,lrsaves[0],rcategs); prot_copynode(r->next,lrsaves[1],rcategs); prot_copynode(r->next->next,lrsaves[2],rcategs); prot_copynode(p->next,lrsaves[3],rcategs); prot_copynode(p->next->next,lrsaves[4],rcategs); } else promlcopy(&curtree, &bestree, nonodes2, rcategs); proml_re_move(&r, &q); if (smoothit) promlcopy(&curtree, &priortree, nonodes2, rcategs); else qwhere = q; num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; addtraverse(r, sib_ptr->back, false); } if (smoothit) promlcopy(&bestree, &curtree, nonodes2, rcategs); else { if (qwhere == q) { hookup(rnb,r->next); hookup(rnnb,r->next->next); prot_copynode(lrsaves[0],r,rcategs); prot_copynode(lrsaves[1],r->next,rcategs); prot_copynode(lrsaves[2],r->next->next,rcategs); prot_copynode(lrsaves[3],p->next,rcategs); prot_copynode(lrsaves[4],p->next->next,rcategs); rnb->v = r->next->v; rnnb->v = r->next->next->v; r->back->v = r->v; curtree.likelihood = bestyet; } else { insert_(r, qwhere, true); smoothit = true; for (i = 1; i<=smoothings; i++) { smooth(r); smooth(r->back); } smoothit = false; promlcopy(&curtree, &bestree, nonodes2, rcategs); } } } if (!p->tip) { num_sibs = count_sibs(p); if (p == curtree.start) num_sibs++; sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; rearrange(sib_ptr->back, p); } } } /* rearrange */ void proml_coordinates(node *p, double lengthsum, long *tipy, double *tipmax) { /* establishes coordinates of nodes */ node *q, *first, *last; double xx; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { xx = q->v; if (xx > 100.0) xx = 100.0; proml_coordinates(q->back, lengthsum + xx, tipy,tipmax); q = q->next; } while ((p == curtree.start || p != q) && (p != curtree.start || p->next != q)); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if (p == curtree.start) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* proml_coordinates */ void proml_printree(void) { /* prints out diagram of the tree2 */ long tipy; double scale, tipmax; long i; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; proml_coordinates(curtree.start, 0.0, &tipy, &tipmax); scale = 1.0 / (long)(tipmax + 1.000); for (i = 1; i <= (tipy - down); i++) drawline2(i, scale, curtree); putc('\n', outfile); } /* proml_printree */ void sigma(node *p, double *sumlr, double *s1, double *s2) { /* compute standard deviation */ double tt, aa, like, slope, curv; prot_slopecurv(p, p->v, &like, &slope, &curv); tt = p->v; p->v = epsilon; p->back->v = epsilon; aa = prot_evaluate(p, false); p->v = tt; p->back->v = tt; (*sumlr) = prot_evaluate(p, false) - aa; if (curv < -epsilon) { (*s1) = p->v + (-slope - sqrt(slope * slope - 3.841 * curv)) / curv; (*s2) = p->v + (-slope + sqrt(slope * slope - 3.841 * curv)) / curv; } else { (*s1) = -1.0; (*s2) = -1.0; } } /* sigma */ void describe(node *p) { /* print out information for one branch */ long i, num_sibs; node *q, *sib_ptr; double sumlr, sigma1, sigma2; if (!p->tip && !p->initialized) prot_nuview(p); if (!p->back->tip && !p->back->initialized) prot_nuview(p->back); q = p->back; if (q->tip) { fprintf(outfile, " "); for (i = 0; i < nmlngth; i++) putc(nayme[q->index-1][i], outfile); fprintf(outfile, " "); } else fprintf(outfile, " %4ld ", q->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, "%15.5f", q->v); if (!usertree || (usertree && !lngths) || p->iter) { sigma(q, &sumlr, &sigma1, &sigma2); if (sigma1 <= sigma2) fprintf(outfile, " ( zero, infinity)"); else { fprintf(outfile, " ("); if (sigma2 <= 0.0) fprintf(outfile, " zero"); else fprintf(outfile, "%9.5f", sigma2); fprintf(outfile, ",%12.5f", sigma1); putc(')', outfile); } if (sumlr > 1.9205) fprintf(outfile, " *"); if (sumlr > 2.995) putc('*', outfile); } putc('\n', outfile); if (!p->tip) { num_sibs = count_sibs(p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; describe(sib_ptr->back); } } } /* describe */ void prot_reconstr(node *p, long n) { /* reconstruct and print out acid at site n+1 at node p */ long i, j, k, first, num_sibs = 0; double f, sum, xx[20]; node *q = NULL; if (p->tip) putc(y[p->index-1][n], outfile); else { num_sibs = count_sibs(p); if ((ally[n] == 0) || (location[ally[n]-1] == 0)) putc('.', outfile); else { j = location[ally[n]-1] - 1; sum = 0; for (i = 0; i <= 19; i++) { f = p->protx[j][mx-1][i]; if (!p->tip) { q = p; for (k = 0; k < num_sibs; k++) { q = q->next; f *= q->protx[j][mx-1][i]; } } f = sqrt(f); xx[i] = f * freqaa[i]; sum += xx[i]; } for (i = 0; i <= 19; i++) xx[i] /= sum; first = 0; for (i = 0; i <= 19; i++) if (xx[i] > xx[first]) first = i; if (xx[first] > 0.95) putc(aachar[first], outfile); else putc(tolower((int)aachar[first]), outfile); if (rctgry && rcategs > 1) mx = mp[n][mx - 1]; else mx = 1; } } } /* prot_reconstr */ void rectrav(node *p, long m, long n) { /* print out segment of reconstructed sequence for one branch */ long i; node *q; putc(' ', outfile); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, " "); mx = mx0; for (i = m; i <= n; i++) { if ((i % 10 == 0) && (i != m)) putc(' ', outfile); prot_reconstr(p, i); } putc('\n', outfile); if (!p->tip) for ( q = p->next; q != p; q = q->next ) rectrav(q->back, m, n); mx1 = mx; } /* rectrav */ void summarize(void) { /* print out branch length information and node numbers */ long i, j, mm=0, num_sibs; double mode, sum; double like[maxcategs],nulike[maxcategs]; double **marginal; node *sib_ptr; if (!treeprint) return; fprintf(outfile, "\nremember: "); if (outgropt) fprintf(outfile, "(although rooted by outgroup) "); fprintf(outfile, "this is an unrooted tree!\n\n"); fprintf(outfile, "Ln Likelihood = %11.5f\n", curtree.likelihood); fprintf(outfile, "\n Between And Length"); if (!(usertree && lngths && haslengths)) fprintf(outfile, " Approx. Confidence Limits"); fprintf(outfile, "\n"); fprintf(outfile, " ------- --- ------"); if (!(usertree && lngths && haslengths)) fprintf(outfile, " ------- ---------- ------"); fprintf(outfile, "\n\n"); for (i = spp; i < nonodes2; i++) { /* So this works with arbitrary multifurcations */ if (curtree.nodep[i]) { num_sibs = count_sibs (curtree.nodep[i]); sib_ptr = curtree.nodep[i]; for (j = 0; j < num_sibs; j++) { sib_ptr->initialized = false; sib_ptr = sib_ptr->next; } } } describe(curtree.start->back); /* So this works with arbitrary multifurcations */ num_sibs = count_sibs(curtree.start); sib_ptr = curtree.start; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; describe(sib_ptr->back); } fprintf(outfile, "\n"); if (!(usertree && lngths && haslengths)) { fprintf(outfile, " * = significantly positive, P < 0.05\n"); fprintf(outfile, " ** = significantly positive, P < 0.01\n\n"); } dummy = prot_evaluate(curtree.start, false); if (rctgry && rcategs > 1) { for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; mp[i][j] = j + 1; for (k = 1; k <= rcategs; k++) { if (k != j + 1) { if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { nulike[j] = lambda * probcat[k - 1] * like[k - 1]; mp[i][j] = k; } } } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); } mode = 0.0; mx = 1; for (i = 1; i <= rcategs; i++) { if (probcat[i - 1] * like[i - 1] > mode) { mx = i; mode = probcat[i - 1] * like[i - 1]; } } mx0 = mx; fprintf(outfile, "Combination of categories that contributes the most to the likelihood:\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 1; i <= sites; i++) { fprintf(outfile, "%ld", mx); if (i % 10 == 0) putc(' ', outfile); if (i % 60 == 0 && i != sites) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } mx = mp[i - 1][mx - 1]; } fprintf(outfile, "\n\n"); marginal = (double **) Malloc(sites*sizeof(double *)); for (i = 0; i < sites; i++) marginal[i] = (double *) Malloc(rcategs*sizeof(double)); for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) { nulike[j] /= sum; marginal[i][j] = nulike[j]; } memcpy(like, nulike, rcategs * sizeof(double)); } for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = 0; i < sites; i++) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } marginal[i][j] *= like[j] * probcat[j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); sum = 0.0; for (j = 0; j < rcategs; j++) sum += marginal[i][j]; for (j = 0; j < rcategs; j++) marginal[i][j] /= sum; } fprintf(outfile, "Most probable category at each site if > 0.95"); fprintf(outfile, " probability (\".\" otherwise)\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 0; i < sites; i++) { sum = 0.0; mm = 0; for (j = 0; j < rcategs; j++) if (marginal[i][j] > sum) { sum = marginal[i][j]; mm = j; } if (sum >= 0.95) fprintf(outfile, "%ld", mm+1); else putc('.', outfile); if ((i+1) % 60 == 0) { if (i != 0) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } } else if ((i+1) % 10 == 0) putc(' ', outfile); } putc('\n', outfile); for (i = 0; i < sites; i++) free(marginal[i]); free(marginal); } putc('\n', outfile); if (hypstate) { fprintf(outfile, "Probable sequences at interior nodes:\n\n"); fprintf(outfile, " node "); for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) putc(' ', outfile); fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n"); if (!rctgry || (rcategs == 1)) mx0 = 1; for (i = 0; i < sites; i += 60) { k = i + 59; if (k >= sites) k = sites - 1; rectrav(curtree.start, i, k); rectrav(curtree.start->back, i, k); putc('\n', outfile); mx0 = mx1; } } } /* summarize */ void initpromlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; malloc_ppheno((*p), endsite, rcategs); nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); malloc_ppheno(*p, endsite, rcategs); (*p)->index = nodei; break; case tip: match_names_to_data(str, nodep, p, spp); break; case iter: (*p)->initialized = false; (*p)->v = initialv; (*p)->iter = true; if ((*p)->back != NULL){ (*p)->back->iter = true; (*p)->back->v = initialv; (*p)->back->initialized = false; } break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); (*p)->v = valyew / divisor; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; case hsnolength: haslengths = false; break; default: /* cases hslength, treewt, unittrwt */ break; /* should never occur */ } } /* initpromlnode */ void dnaml_treeout(node *p) { /* write out file with representation of final tree2 */ /* Only works for bifurcations! */ long i, n, w; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index-1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index-1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { putc('(', outtree); col++; dnaml_treeout(p->next->back); putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } dnaml_treeout(p->next->next->back); if (p == curtree.start) { putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } dnaml_treeout(p->back); } putc(')', outtree); col++; } x = p->v; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p == curtree.start) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.5f", (int)(w + 7), x); col += w + 8; } } /* dnaml_treeout */ void buildnewtip(long m, tree *tr) { node *p; p = tr->nodep[nextsp + spp - 3]; hookup(tr->nodep[m - 1], p); p->v = initialv; p->back->v = initialv; } /* buildnewtip */ void buildsimpletree(tree *tr) { hookup(tr->nodep[enterorder[0] - 1], tr->nodep[enterorder[1] - 1]); tr->nodep[enterorder[0] - 1]->v = 1.0; tr->nodep[enterorder[0] - 1]->back->v = 1.0; tr->nodep[enterorder[1] - 1]->v = 1.0; tr->nodep[enterorder[1] - 1]->back->v = 1.0; buildnewtip(enterorder[2], tr); insert_(tr->nodep[enterorder[2] - 1]->back, tr->nodep[enterorder[0] - 1], false); } /* buildsimpletree */ void free_all_protx (long nonodes, pointarray treenode) { /* used in proml */ long i, j, k; node *p; /* Zero thru spp are tips, */ for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(treenode[i]->protx[j]); free(treenode[i]->protx); } /* The rest are rings (i.e. triads) */ for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]; do { for (k = 0; k < endsite; k++) free(p->protx[k]); free(p->protx); p = p->next; } while (p != treenode[i]); } } } /* free_all_protx */ void proml_unroot(node* root, node** nodep, long nonodes) { node *p,*r,*q; double newl; long i; long numsibs; numsibs = count_sibs(root); if ( numsibs > 2 ) { q = root; r = root; while (!(q->next == root)) q = q->next; q->next = root->next; for(i=0 ; i < endsite ; i++){ free(r->protx[i]); r->protx[i] = NULL; } free(r->protx); r->protx = NULL; chuck(&grbg, r); curtree.nodep[spp] = q; } else { /* Bifurcating root - remove entire root fork */ /* Join oldlen on each side of root */ newl = root->next->oldlen + root->next->next->oldlen; root->next->back->oldlen = newl; root->next->next->back->oldlen = newl; /* Join v on each side of root */ newl = root->next->v + root->next->next->v; root->next->back->v = newl; root->next->next->back->v = newl; /* Connect root's children */ root->next->back->back=root->next->next->back; root->next->next->back->back = root->next->back; /* Move nodep entries down one and set indices */ for ( i = spp; i < nonodes-1; i++ ) { p = nodep[i+1]; nodep[i] = p; nodep[i+1] = NULL; if ( nodep[i] == NULL ) /* This may happen in a multifurcating intree */ break; do { p->index = i+1; p = p->next; } while (p != nodep[i]); } /* Free protx arrays from old root */ for(i=0 ; i < endsite ; i++){ free(root->protx[i]); free(root->next->protx[i]); free(root->next->next->protx[i]); root->protx[i] = NULL; root->next->protx[i] = NULL; root->next->next->protx[i] = NULL; } free(root->protx); free(root->next->protx); free(root->next->next->protx); chuck(&grbg,root->next->next); chuck(&grbg,root->next); chuck(&grbg,root); } } /* proml_unroot */ void maketree(void) { long i, j; boolean dummy_first, goteof; pointarray dummy_treenode=NULL; long nextnode; node *root; char* treestr; prot_inittable(); if (usertree) { if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); l0gl = (double *) Malloc(shimotrees * sizeof(double)); l0gf = (double **) Malloc(shimotrees * sizeof(double *)); for (i=0; i < shimotrees; ++i) l0gf[i] = (double *) Malloc(endsite * sizeof(double)); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } which = 1; /* This taken out of tree read, used to be [spp-1], but referring to [0] produces output identical to what the pre-modified dnaml produced. */ while (which <= numtrees) { /* These initializations required each time through the loop since multiple trees require re-initialization */ haslengths = true; nextnode = 0; dummy_first = true; goteof = false; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, dummy_treenode, &goteof, &dummy_first, curtree.nodep, &nextnode, &haslengths, &grbg, initpromlnode,false,nonodes2); proml_unroot(root,curtree.nodep,nonodes2); if (goteof && (which <= numtrees)) { /* if we hit the end of the file prematurely */ printf ("\n"); printf ("ERROR: trees missing at end of file.\n"); printf ("\tExpected number of trees:\t\t%ld\n", numtrees); printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1); exxit (-1); } curtree.start = curtree.nodep[0]->back; if ( outgropt ) curtree.start = curtree.nodep[outgrno - 1]->back; treevaluate(); proml_printree(); summarize(); if (trout) { col = 0; dnaml_treeout(curtree.start); } if(which < numtrees){ prot_freex_notip(nextnode, curtree.nodep); gdispose(curtree.start, &grbg, curtree.nodep); } else nonodes2 = nextnode; which++; } FClose(intree); putc('\n', outfile); if (!auto_ && numtrees > 1 && weightsum > 1 ) standev2(numtrees, maxwhich, 0, endsite-1, maxlogl, l0gl, l0gf, aliasweight, seed); } else { /* no user tree */ for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); if (progress) { printf("\nAdding species:\n"); writename(0, 3, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } nextsp = 3; polishing = false; smoothit = improve; buildsimpletree(&curtree); curtree.start = curtree.nodep[enterorder[0] - 1]->back; nextsp = 4; while (nextsp <= spp) { buildnewtip(enterorder[nextsp - 1], &curtree); bestyet = UNDEFINED; if (smoothit) promlcopy(&curtree, &priortree, nonodes2, rcategs); addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, curtree.start, true); if (smoothit) promlcopy(&bestree, &curtree, nonodes2, rcategs); else { insert_(curtree.nodep[enterorder[nextsp - 1] - 1]->back, qwhere, true); smoothit = true; for (i = 1; i<=smoothings; i++) { smooth(curtree.start); smooth(curtree.start->back); } smoothit = false; promlcopy(&curtree, &bestree, nonodes2, rcategs); bestyet = curtree.likelihood; } if (progress) { writename(nextsp - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (global && nextsp == spp && progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = spp ; j < nonodes2 ; j++) if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } succeeded = true; while (succeeded) { succeeded = false; if (global && nextsp == spp && progress) { printf(" "); fflush(stdout); } if (global && nextsp == spp) globrearrange(); else rearrange(curtree.start, curtree.start->back); if (global && nextsp == spp && progress) putchar('\n'); } nextsp++; } if (global && progress) { putchar('\n'); fflush(stdout); #ifdef WIN32 phyFillScreenColor(); #endif } promlcopy(&curtree, &bestree, nonodes2, rcategs); if (njumble > 1) { if (jumb == 1) promlcopy(&bestree, &bestree2, nonodes2, rcategs); else if (bestree2.likelihood < bestree.likelihood) promlcopy(&bestree, &bestree2, nonodes2, rcategs); } if (jumb == njumble) { if (njumble > 1) promlcopy(&bestree2, &curtree, nonodes2, rcategs); curtree.start = curtree.nodep[outgrno - 1]->back; for (i = 0; i < nonodes2; i++) { if (i < spp) curtree.nodep[i]->initialized = false; else { curtree.nodep[i]->initialized = false; curtree.nodep[i]->next->initialized = false; curtree.nodep[i]->next->next->initialized = false; } } treevaluate(); proml_printree(); summarize(); if (trout) { col = 0; dnaml_treeout(curtree.start); } } } if (usertree) { free(l0gl); for (i=0; i < shimotrees; i++) free(l0gf[i]); free(l0gf); } prot_freetable(); if (jumb < njumble) return; free(contribution); free(mp); for (i=0; i < endsite; i++) free(term[i]); free(term); for (i=0; i < endsite; i++) free(slopeterm[i]); free(slopeterm); for (i=0; i < endsite; i++) free(curveterm[i]); free(curveterm); free_all_protx(nonodes2, curtree.nodep); if (!usertree) { free_all_protx(nonodes2, bestree.nodep); free_all_protx(nonodes2, priortree.nodep); if (njumble > 1) free_all_protx(nonodes2, bestree2.nodep); } if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n", outtreename); } } /* maketree */ void clean_up(void) { /* Free and/or close stuff */ long i; free (rrate); free (probcat); free (rate); /* Seems to require freeing every time... */ for (i = 0; i < spp; i++) { free (y[i]); } free (y); free (nayme); free (enterorder); free (category); free (weight); free (alias); free (ally); free (location); free (aliasweight); free (probmat); free (eigmat); FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif } /* clean_up */ int main(int argc, Char *argv[]) { /* Protein Maximum Likelihood */ #ifdef MAC argc = 1; /* macsetup("ProML",""); */ argv[0] = "ProML"; #endif init(argc,argv); emboss_getoptions("fproml", argc, argv); progname = argv[0]; firstset = true; ibmpc = IBMCRT; ansi = ANSICRT; grbg = NULL; doinit(); for (ith = 1; ith <= datasets; ith++) { if (datasets > 1) { fprintf(outfile, "Data set # %ld:\n", ith); printf("\nData set # %ld:\n", ith); } getinput(); if (ith == 1) firstset = false; if (usertree) { max_num_sibs = 0; maketree(); } else for (jumb = 1; jumb <= njumble; jumb++) { max_num_sibs = 0; maketree(); } } clean_up(); printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Protein Maximum Likelihood */ PHYLIPNEW-3.69.650/src/cont.c0000664000175000017500000001526310775447511012206 00000000000000/* version 3.6. (c) Copyright 1999-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #include #include "phylip.h" #include "cont.h" void alloctree(pointarray *treenode, long nonodes) { /* allocate treenode dynamically */ /* used in contml & contrast */ long i, j; node *p, *q; *treenode = (pointarray)Malloc(nonodes*sizeof(node *)); for (i = 0; i < spp; i++) (*treenode)[i] = (node *)Malloc(sizeof(node)); for (i = spp; i < nonodes; i++) { q = NULL; for (j = 1; j <= 3; j++) { p = (node *)Malloc(sizeof(node)); p->next = q; q = p; } p->next->next->next = p; (*treenode)[i] = p; } } /* alloctree */ void freetree(pointarray *treenode, long nonodes) { long i, j; node *p, *q; for (i = 0; i < spp; i++) free((*treenode)[i]); for (i = spp; i < nonodes; i++) { p = (*treenode)[i]; for (j = 1; j <= 3; j++) { q = p; p = p->next; free(q); } } free(*treenode); } /* freetree */ void setuptree(tree *a, long nonodes) { /* initialize a tree */ /* used in contml & contrast */ long i, j; node *p; for (i = 1; i <= spp; i++) { a->nodep[i - 1]->back = NULL; a->nodep[i - 1]->tip = (i <= spp); a->nodep[i - 1]->iter = true; a->nodep[i - 1]->index = i; } for (i = spp + 1; i <= nonodes; i++) { p = a->nodep[i - 1]; for (j = 1; j <= 3; j++) { p->back = NULL; p->tip = false; p->iter = true; p->index = i; p = p->next; } } a->likelihood = -DBL_MAX; a->start = a->nodep[0]; } /* setuptree */ void allocview(tree *a, long nonodes, long totalleles) { /* allocate view */ /* used in contml */ long i, j; node *p; for (i = 0; i < spp; i++) a->nodep[i]->view = (phenotype3)Malloc(totalleles*sizeof(double)); for (i = spp; i < nonodes; i++) { p = a->nodep[i]; for (j = 1; j <= 3; j++) { p->view = (phenotype3)Malloc(totalleles*sizeof(double)); p = p->next; } } } /* allocview */ void freeview(tree *a, long nonodes) { /* deallocate view */ /* used in contml */ long i, j; node *p; for (i = 0; i < spp; i++) free(a->nodep[i]->view); for (i = spp; i < nonodes; i++) { p = a->nodep[i]; for (j = 1; j <= 3; j++) { free(p->view); p = p->next; } } } /* freeview */ void standev2(long numtrees, long maxwhich, long a, long b, double maxlogl, double *l0gl, double **l0gf, longer seed) { /* compute and write standard deviation of user trees */ /* used in contml */ double **covar, *P, *f; long i, j, k; double sumw, sum, sum2, sd; double temp; #define SAMPLES 1000 /* ????? if numtrees too big for Shimo, truncate */ if (numtrees == 2) { fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); fprintf(outfile, "Tree logL Diff logL Its S.D."); fprintf(outfile, " Significantly worse?\n\n"); i = 1; while (i <= numtrees) { fprintf(outfile, "%3ld%10.1f", i, l0gl[i - 1]); if (maxwhich == i) fprintf(outfile, " <------ best\n"); else { sumw = 0.0; sum = 0.0; sum2 = 0.0; for (j = a; j <= b; j++) { sumw += 1; temp = l0gf[i - 1][j] - l0gf[maxwhich - 1][j]; sum += temp; sum2 += temp * temp; } temp = sum / sumw; sd = sqrt(sumw / (sumw - 1.0) * (sum2 - temp * temp)); fprintf(outfile, "%10.1f%12.4f", (l0gl[i - 1])-maxlogl, sd); if (sum > 1.95996 * sd) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } i++; } fprintf(outfile, "\n\n"); } else { /* Shimodaira-Hasegawa test using normal approximation */ if(numtrees > MAXSHIMOTREES){ fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" , MAXSHIMOTREES, numtrees); numtrees = MAXSHIMOTREES; } else { fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); } covar = (double **)Malloc(numtrees*sizeof(double *)); sumw = b-a+1; for (i = 0; i < numtrees; i++) covar[i] = (double *)Malloc(numtrees*sizeof(double)); for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ sum = l0gl[i]/sumw; for (j = 0; j <=i; j++) { sum2 = l0gl[j]/sumw; temp = 0.0; for (k = a; k <= b ; k++) { temp = temp + (l0gf[i][k]-sum)*(l0gf[j][k]-sum2); } covar[i][j] = temp; if (i != j) covar[j][i] = temp; } } for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition of trees x trees covariance matrix */ sum = 0.0; for (j = 0; j <= i-1; j++) sum = sum + covar[i][j] * covar[i][j]; temp = sqrt(covar[i][i] - sum); covar[i][i] = temp; for (j = i+1; j < numtrees; j++) { sum = 0.0; for (k = 0; k < i; k++) sum = sum + covar[i][k] * covar[j][k]; if (fabs(temp) < 1.0E-12) covar[j][i] = 0.0; else covar[j][i] = (covar[j][i] - sum)/temp; } } f = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ for (i = 0; i < numtrees; i++) P[i] = 0.0; for (i = 1; i < SAMPLES; i++) { /* loop over resampled trees */ for (j = 0; j < numtrees; j++) { /* draw vectors */ sum = 0.0; for (k = 0; k <= j; k++) sum += normrand(seed)*covar[j][k]; f[j] = sum; } sum = f[1]; for (j = 1; j < numtrees; j++) /* get max of vector */ if (f[j] > sum) sum = f[j]; for (j = 0; j < numtrees; j++) /* accumulate P's */ if (maxlogl-l0gl[j] < sum-f[j]) P[j] += 1.0/SAMPLES; } fprintf(outfile, "Tree logL Diff logL P value"); fprintf(outfile, " Significantly worse?\n\n"); for (i = 0; i < numtrees; i++) { fprintf(outfile, "%3ld%10.1f", i+1, l0gl[i]); if ((maxwhich-1) == i) fprintf(outfile, " <------ best\n"); else { fprintf(outfile, " %9.1f %10.3f", l0gl[i]-maxlogl, P[i]); if (P[i] < 0.05) fprintf(outfile, " Yes\n"); else fprintf(outfile, " No\n"); } } fprintf(outfile, "\n"); free(P); /* free the variables we Malloc'ed */ free(f); for (i = 0; i < numtrees; i++) free(covar[i]); free(covar); } } /* standev */ PHYLIPNEW-3.69.650/src/drawgram.c0000664000175000017500000010142511605067345013037 00000000000000 #ifdef OSX_CARBON #include #endif #include "phylip.h" #include "draw.h" /* Version 3.6. Copyright (c) 1986-2004 by The University of Washington and Written by Joseph Felsenstein and Christopher A. Meacham. Additional code written by Hisashi Horino, Sean Lamont, Andrew Keefe, Daniel Fineman, Akiko Fuseki, Doug Buxton and Michal Palczewski. Permission is granted to copy, distribute, and modify this program provided that (1) this copyright message is not removed and (2) no fee is charged for this program. */ #ifdef MAC char* about_message = "Drawgram unrooted tree plotting program\r" "PHYLIP version 3.6 (c) Copyright 1986-2004\r" "by The University of Washington.\r" "Written by Joseph Felsenstein and Christopher A. Meacham.\r" "Additional code written by Hisashi Horino, Sean Lamont, Andrew Keefe,\r" "Daniel Fineman, Akiko Fuseki, Doug Buxton and Michal Palczewski.\r" "Permission is granted to copy, distribute and modify this program\r" "provided that\r" "(1) This copyright message is not removed and\r" "(2) no fee is charged for this program."; #endif #define gap 0.5 /* distance in character heights between the end of a branch and the start of the name */ FILE *plotfile; AjPFile embossplotfile; const char *pltfilename; char trefilename[FNMLNGTH]; char *progname; AjPPhyloTree* phylotrees = NULL; long nextnode, strpwide, strpdeep, strpdiv, strptop, strpbottom, payge, numlines, hpresolution, iteration; boolean preview, previewing, dotmatrix, haslengths, uselengths, empty, rescaled, firstscreens, pictbold, pictitalic, pictshadow, pictoutline, multiplot, finished; double xmargin, ymargin, topoflabels, bottomoflabels, rightoflabels, leftoflabels, tipspacing,maxheight, scale, xscale, yscale, xoffset, yoffset, nodespace, stemlength, treedepth, xnow, ynow, xunitspercm, yunitspercm, xsize, ysize, xcorner, ycorner, labelheight,labelrotation,expand, rootx, rooty, bscale, xx0, yy0, fontheight, maxx, minx, maxy, miny; double pagex, pagey, paperx, papery, hpmargin, vpmargin; double *textlength, *firstlet; striptype stripe; plottertype plotter, oldplotter, previewer; growth grows; treestyle style; node *root; pointarray nodep; pointarray treenode; fonttype font; long filesize; Char ch, resopts; double trweight; /* starting here, needed to make sccs version happy */ boolean goteof; node *grbg; long *zeros; /* ... down to here */ enum {yes, no} penchange, oldpenchange; static enum {weighted, intermediate, centered, inner, vshaped} nodeposition; winactiontype winaction; #ifndef X_DISPLAY_MISSING String res[]= { "*.input: True", "*.menubar.orientation: horizontal", "*.menubar.borderWidth: 0", "*.drawing_area.background: #CCFFFF", "*.drawing_area.foreground: #000000", "*.menubar.right: ChainLeft", "*.menubar.bottom: ChainTop", "*.menubar.top: ChainTop", "*.menubar.left: ChainLeft", "*.drawing_area.fromVert: menubar", "*.drawing_area.top: ChainTop", "*.drawing_area.bottom: ChainBottom", "*.drawing_area.left: ChainLeft", "*.drawing_area.right: ChainRight", "*.dialog.label: Drawgram -- Rooted tree plotting program\\n\\n\\nPHYLIP version 3.6. (c) Copyright 1993-2002 by The University of Washington.\\nWritten by Joseph Felsenstein, Andrew Keeffe, Akiko Fuseki, Sean Lamont\\nand Dan Fineman\\nPermission is granted to copy and use this program provided no fee is\\ncharged for it and provided that this copyright notice is not removed.", NULL }; #endif #ifndef OLDC /* function prototypes */ void emboss_getoptions(char *pgm, int argc, char *argv[]); void initdrawgramnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char**); void initialparms(void); char showparms(void); void getparms(char); void calctraverse(node *, double, double *); void calculate(void); void rescale(void); void setup_environment(Char *argv[], boolean *); void user_loop(boolean *); /* function prototypes */ #endif void initdrawgramnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ long i; boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; for (i=0;inayme[i] = '\0'; nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); (*p)->index = nodei; break; case tip: (*ntips)++; gnu(grbg, p); nodep[(*ntips) - 1] = *p; setupnode(*p, *ntips); (*p)->tip = true; (*p)->naymlength = len ; strncpy ((*p)->nayme, str, MAXNCH); break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); if (!minusread) (*p)->oldlen = valyew / divisor; else (*p)->oldlen = 0.0; break; case hsnolength: haslengths = false; break; default: /* cases hslength,treewt,unittrwt,iter */ break; /* should never occur */ } } /* initdrawgramnode */ void initialparms() { /* initialize parameters */ /* these are set by emboss_getoptions */ /* // plotter = DEFPLOTTER; // previewer = DEFPREV; // preview = true; */ paperx=20.6375; pagex=20.6375; papery=26.9875; pagey=26.9875; strcpy(fontname,"Times-Roman"); plotrparms(spp); /* initial, possibly bogus, parameters */ style = phenogram; grows = horizontal; labelrotation = 90.0; nodespace = 3.0; stemlength = 0.05; treedepth = 0.5 / 0.95; rescaled = true; bscale = 1.0; uselengths = haslengths; if (uselengths) nodeposition = weighted; else nodeposition = centered; xmargin = 0.08 * xsize; ymargin = 0.08 * ysize; hpmargin = 0.02*pagex; vpmargin = 0.02*pagey; } /* initialparms */ void emboss_getoptions(char *pgm, int argc, char *argv[]) { /* get from user the relevant parameters for the plotter and diagram */ boolean getgrows; int m, n; AjPStr getstyle = NULL; AjPStr plottercode = NULL; AjPStr getpreviewer = NULL; AjPStr getnodeposition = NULL; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); n = (int)((pagex-hpmargin-0.01)/(paperx-hpmargin)+1.0); m = (int)((pagey-vpmargin-0.01)/(papery-vpmargin)+1.0); phylotrees = ajAcdGetTree("intreefile"); plottercode = ajAcdGetListSingle("plotter"); getplotter(ajStrGetCharFirst(plottercode)); preview = true; getpreviewer = ajAcdGetListSingle("previewer"); /* sets plotter variable */ if(ajStrMatchC(getpreviewer, "n")) { preview = false; previewer = other; /* Added by Dan F. */ } else if(ajStrMatchC(getpreviewer, "i")) previewer = ibm; else if(ajStrMatchC(getpreviewer, "m")) previewer = mac; else if(ajStrMatchC(getpreviewer, "x")) previewer = xpreview; else if(ajStrMatchC(getpreviewer, "w")) previewer = winpreview; else if(ajStrMatchC(getpreviewer, "i")) previewer = tek; else if(ajStrMatchC(getpreviewer, "i")) previewer = decregis; else if(ajStrMatchC(getpreviewer, "o")) previewer = other; getgrows = ajAcdGetBoolean("grows"); if(getgrows) grows = horizontal; else grows = vertical; getstyle = ajAcdGetListSingle("style"); if(ajStrMatchC(getstyle, "c")) style = cladogram; else if(ajStrMatchC(getstyle, "p")) style = phenogram; else if(ajStrMatchC(getstyle, "e")) style = eurogram; else if(ajStrMatchC(getstyle, "s")) style = swoopogram; else if(ajStrMatchC(getstyle, "v")) style = curvogram; else if(ajStrMatchC(getstyle, "o")) { style = circular; treedepth = 1.0; } uselengths = ajAcdGetBoolean("lengths"); labelrotation = ajAcdGetFloat("labelrotation"); if(plotter==ray) { xmargin = ajAcdGetFloat("xrayshade"); ymargin = ajAcdGetFloat("yrayshade"); } else { xmargin = ajAcdGetFloat("xmargin"); ymargin = ajAcdGetFloat("ymargin"); } rescaled = ajAcdGetToggle("rescaled"); if(rescaled) bscale = ajAcdGetFloat("bscale"); treedepth = ajAcdGetFloat("treedepth"); stemlength = ajAcdGetFloat("stemlength"); nodespace = ajAcdGetFloat("nodespace"); nodespace = 1.0 / nodespace; m = ajAcdGetFloat("pagesheight"); n = ajAcdGetFloat("pageswidth"); paperx = ajAcdGetFloat("paperx"); papery = ajAcdGetFloat("papery"); hpmargin = ajAcdGetFloat("hpmargin"); vpmargin = ajAcdGetFloat("vpmargin"); pagex = ((double)n * (paperx-hpmargin)+hpmargin); pagey = ((double)m * (papery-vpmargin)+vpmargin); getnodeposition = ajAcdGetListSingle("nodeposition"); if(ajStrMatchC(getnodeposition, "i")) nodeposition = intermediate; else if(ajStrMatchC(getnodeposition, "w")) nodeposition = weighted; else if(ajStrMatchC(getnodeposition, "c")) nodeposition = centered; else if(ajStrMatchC(getnodeposition, "i")) nodeposition = inner; else if(ajStrMatchC(getnodeposition, "v")) nodeposition = vshaped; embossplotfile = ajAcdGetOutfile("plotfile"); emboss_openfile(embossplotfile, &plotfile, &pltfilename); } /* getparms */ void calctraverse(node *p, double lengthsum, double *tipx) { /* traverse to establish initial node coordinates */ double x1, y1, x2, y2, x3, x4, x5, w1, w2, sumwx, sumw, nodeheight; node *pp, *plast, *panc; if (p == root) nodeheight = 0.0; else if (uselengths) nodeheight = lengthsum + fabs(p->oldlen); else nodeheight = 1.0; if (nodeheight > maxheight) maxheight = nodeheight; if (p->tip) { p->xcoord = *tipx; p->tipsabove = 1; if (uselengths) p->ycoord = nodeheight; else p->ycoord = 1.0; *tipx += tipspacing; return; } sumwx = 0.0; sumw = 0.0; p->tipsabove = 0; pp = p->next; x3 = 0.0; do { calctraverse(pp->back, nodeheight, tipx); p->tipsabove += pp->back->tipsabove; sumw += pp->back->tipsabove; sumwx += pp->back->tipsabove * pp->back->xcoord; if (fabs(pp->back->xcoord - 0.5) < fabs(x3 - 0.5)) x3 = pp->back->xcoord; plast = pp; pp = pp->next; } while (pp != p); x1 = p->next->back->xcoord; x2 = plast->back->xcoord; y1 = p->next->back->ycoord; y2 = plast->back->ycoord; switch (nodeposition) { case weighted: w1 = y1 - p->ycoord; w2 = y2 - p->ycoord; if (w1 + w2 <= 0.0) p->xcoord = (x1 + x2) / 2.0; else p->xcoord = (w2 * x1 + w1 * x2) / (w1 + w2); break; case intermediate: p->xcoord = (x1 + x2) / 2.0; break; case centered: p->xcoord = sumwx / sumw; break; case inner: p->xcoord = x3; break; case vshaped: if (iteration > 1) { if (!(p == root)) { panc = nodep[p->back->index-1]; w1 = p->ycoord - panc->ycoord; w2 = y1 - p->ycoord; if (w1+w2 < 0.000001) x4 = (x1+panc->xcoord)/2.0; else x4 = (w1*x1+w2*panc->xcoord)/(w1+w2); w2 = y2 - p->ycoord; if (w1+w2 < 0.000001) x5 = (x2+panc->xcoord)/2.0; else x5 = (w1*x2+w2*panc->xcoord)/(w1+w2); if (panc->xcoord < p->xcoord) p->xcoord = x5; else p->xcoord = x4; } else { if ((y1-2*p->ycoord+y2) < 0.000001) p->xcoord = (x1+x2)/2; else p->xcoord = ((y2-p->ycoord)*x1+(y1-p->ycoord))/(y1-2*p->ycoord+y2); } } break; } if (uselengths) { p->ycoord = nodeheight; return; } if (nodeposition != inner) { p->ycoord = (y1 + y2 - sqrt((y1 + y2) * (y1 + y2) - 4 * (y1 * y2 - (x2 - p->xcoord) * (p->xcoord - x1)))) / 2.0; /* this formula comes from the requirement that the vector from (x,y) to (x1,y1) be at right angles to that from (x,y) to (x2,y2) */ return; } if (fabs(x1 - 0.5) > fabs(x2 - 0.5)) { p->ycoord = y1 + x1 - x2; w1 = y2 - p->ycoord; } else { p->ycoord = y2 + x1 - x2; w1 = y1 - p->ycoord; } if (w1 < epsilon) p->ycoord -= fabs(x1 - x2); } /* calctraverse */ void calculate() { /* compute coordinates for tree */ double tipx; double sum, temp, maxtextlength, maxfirst=0, leftfirst, angle; double lef = 0.0, rig = 0.0, top = 0.0, bot = 0.0; double *firstlet, *textlength; long i; firstlet = (double *)Malloc(nextnode*sizeof(double)); textlength = (double *)Malloc(nextnode*sizeof(double)); for (i = 0; i < nextnode; i++) { nodep[i]->xcoord = 0.0; nodep[i]->ycoord = 0.0; if (nodep[i]->naymlength > 0) firstlet[i] = lengthtext(nodep[i]->nayme, 1L,fontname,font); else firstlet[i] = 0.0; } i = 0; do i++; while (!nodep[i]->tip); leftfirst = firstlet[i]; maxheight = 0.0; maxtextlength = 0.0; for (i = 0; i < nextnode; i++) { if (nodep[i]->tip) { textlength[i] = lengthtext(nodep[i]->nayme, nodep[i]->naymlength, fontname, font); if (textlength[i]-0.5*firstlet[i] > maxtextlength) { maxtextlength = textlength[i]-0.5*firstlet[i]; maxfirst = firstlet[i]; } } } maxtextlength = maxtextlength + 0.5*maxfirst; fontheight = heighttext(font,fontname); if (style == circular) { if (grows == vertical) angle = pi / 2.0; else angle = 2.0*pi; } else angle = pi * labelrotation / 180.0; maxtextlength /= fontheight; maxfirst /= fontheight; leftfirst /= fontheight; for (i = 0; i < nextnode; i++) { if (nodep[i]->tip) { textlength[i] /= fontheight; firstlet[i] /= fontheight; } } if (spp > 1) labelheight = 1.0 / (nodespace * (spp - 1)); else labelheight = 1.0 / nodespace; if (angle < pi / 6.0) tipspacing = (nodespace + cos(angle) * (maxtextlength - 0.5*maxfirst)) * labelheight; else if (spp > 1) { if (style == circular) { tipspacing = 1.0 / spp; } else tipspacing = 1.0 / (spp - 1.0); } else tipspacing = 1.0; finished = false; iteration = 1; do { if (style == circular) tipx = 1.0/(2.0*(double)spp); else tipx = 0.0; sum = 0.0; calctraverse(root, sum, &tipx); iteration++; } while ((nodeposition == vshaped) && (iteration < 4*spp)); rooty = root->ycoord; labelheight *= 1.0 - stemlength; for (i = 0; i < nextnode; i++) { if (rescaled) { if (style != circular) nodep[i]->xcoord *= 1.0 - stemlength; nodep[i]->ycoord = stemlength * treedepth + (1.0 - stemlength) * treedepth * (nodep[i]->ycoord - rooty) / (maxheight - rooty); nodep[i]->oldtheta = angle; } else { nodep[i]->xcoord = nodep[i]->xcoord * (maxheight - rooty) / treedepth; nodep[i]->ycoord = stemlength / (1 - stemlength) * (maxheight - rooty) + nodep[i]->ycoord; nodep[i]->oldtheta = angle; } } topoflabels = 0.0; bottomoflabels = 0.0; leftoflabels = 0.0; rightoflabels = 0.0; if (style == circular) { for (i = 0; i < nextnode; i++) { temp = nodep[i]->xcoord; if (grows == vertical) { nodep[i]->xcoord = (1.0+nodep[i]->ycoord * cos((1.5-2.0*temp)*pi)/treedepth)/2.0; nodep[i]->ycoord = (1.0+nodep[i]->ycoord * sin((1.5-2.0*temp)*pi)/treedepth)/2.0; nodep[i]->oldtheta = (1.5-2.0*temp)*pi; } else { nodep[i]->xcoord = (1.0+nodep[i]->ycoord * cos((1.0-2.0*temp)*pi)/treedepth)/2.0; nodep[i]->ycoord = (1.0+nodep[i]->ycoord * sin((1.0-2.0*temp)*pi)/treedepth)/2.0; nodep[i]->oldtheta = (1.0-2.0*temp)*pi; } } tipspacing *= 2.0*pi; } maxx = nodep[0]->xcoord; maxy = nodep[0]->ycoord; minx = nodep[0]->xcoord; if (style == circular) miny = nodep[0]->ycoord; else miny = 0.0; for (i = 1; i < nextnode; i++) { if (nodep[i]->xcoord > maxx) maxx = nodep[i]->xcoord; if (nodep[i]->ycoord > maxy) maxy = nodep[i]->ycoord; if (nodep[i]->xcoord < minx) minx = nodep[i]->xcoord; if (nodep[i]->ycoord < miny) miny = nodep[i]->ycoord; } if (style == circular) { for (i = 0; i < nextnode; i++) { if (nodep[i]->tip) { angle = nodep[i]->oldtheta; if (cos(angle) < 0.0) angle -= pi; top = (nodep[i]->ycoord - maxy) / labelheight + sin(nodep[i]->oldtheta); rig = (nodep[i]->xcoord - maxx) / labelheight + cos(nodep[i]->oldtheta); bot = (miny - nodep[i]->ycoord) / labelheight - sin(nodep[i]->oldtheta); lef = (minx - nodep[i]->xcoord) / labelheight - cos(nodep[i]->oldtheta); if (cos(nodep[i]->oldtheta) > 0) { if (sin(angle) > 0.0) top += sin(angle) * textlength[i]; top += sin(angle - 1.25 * pi) * gap * firstlet[i]; if (sin(angle) < 0.0) bot -= sin(angle) * textlength[i]; bot -= sin(angle - 0.75 * pi) * gap * firstlet[i]; if (sin(angle) > 0.0) rig += cos(angle - 0.75 * pi) * gap * firstlet[i]; else rig += cos(angle - 1.25 * pi) * gap * firstlet[i]; rig += cos(angle) * textlength[i]; if (sin(angle) > 0.0) lef -= cos(angle - 1.25 * pi) * gap * firstlet[i]; else lef -= cos(angle - 0.75 * pi) * gap * firstlet[i]; } else { if (sin(angle) < 0.0) top -= sin(angle) * textlength[i]; top += sin(angle + 0.25 * pi) * gap * firstlet[i]; if (sin(angle) > 0.0) bot += sin(angle) * textlength[i]; bot -= sin(angle - 0.25 * pi) * gap * firstlet[i]; if (sin(angle) > 0.0) rig += cos(angle - 0.25 * pi) * gap * firstlet[i]; else rig += cos(angle + 0.25 * pi) * gap * firstlet[i]; if (sin(angle) < 0.0) rig += cos(angle) * textlength[i]; if (sin(angle) > 0.0) lef -= cos(angle + 0.25 * pi) * gap * firstlet[i]; else lef -= cos(angle - 0.25 * pi) * gap * firstlet[i]; lef += cos(angle) * textlength[i]; } } if (top > topoflabels) topoflabels = top; if (bot > bottomoflabels) bottomoflabels = bot; if (rig > rightoflabels) rightoflabels = rig; if (lef > leftoflabels) leftoflabels = lef; } topoflabels *= labelheight; bottomoflabels *= labelheight; leftoflabels *= labelheight; rightoflabels *= labelheight; } if (style != circular) { topoflabels = labelheight * (1.0 + sin(angle) * (maxtextlength - 0.5 * maxfirst) + cos(angle) * 0.5 * maxfirst); rightoflabels = labelheight * (cos(angle) * (textlength[nextnode-1] - 0.5 * maxfirst) + sin(angle) * 0.5); leftoflabels = labelheight * (cos(angle) * leftfirst * 0.5 + sin(angle) * 0.5); } rooty = miny; free(firstlet); free(textlength); } /* calculate */ void rescale() { /* compute coordinates of tree for plot or preview device */ long i; double treeheight, treewidth, extrax, extray, temp; treeheight = maxy - miny; treewidth = maxx - minx; if (style == circular) { treewidth = 1.0; treeheight = 1.0; if (!rescaled) { if (uselengths) { labelheight *= (maxheight - rooty) / treedepth; topoflabels *= (maxheight - rooty) / treedepth; bottomoflabels *= (maxheight - rooty) / treedepth; leftoflabels *= (maxheight - rooty) / treedepth; rightoflabels *= (maxheight - rooty) / treedepth; treewidth *= (maxheight - rooty) / treedepth; } } } treewidth += rightoflabels + leftoflabels; treeheight += topoflabels + bottomoflabels; if (grows == vertical) { if (!rescaled) expand = bscale; else { expand = (xsize - 2 * xmargin) / treewidth; if ((ysize - 2 * ymargin) / treeheight < expand) expand = (ysize - 2 * ymargin) / treeheight; } extrax = (xsize - 2 * xmargin - treewidth * expand) / 2.0; extray = (ysize - 2 * ymargin - treeheight * expand) / 2.0; } else { if (!rescaled) expand = bscale; else { expand = (ysize - 2 * ymargin) / treewidth; if ((xsize - 2 * xmargin) / treeheight < expand) expand = (xsize - 2 * xmargin) / treeheight; } extrax = (xsize - 2 * xmargin - treeheight * expand) / 2.0; extray = (ysize - 2 * ymargin - treewidth * expand) / 2.0; } for (i = 0; i < nextnode; i++) { nodep[i]->xcoord = expand * (nodep[i]->xcoord + leftoflabels); nodep[i]->ycoord = expand * (nodep[i]->ycoord + bottomoflabels); if ((style != circular) && (grows == horizontal)) { temp = nodep[i]->ycoord; nodep[i]->ycoord = expand * treewidth - nodep[i]->xcoord; nodep[i]->xcoord = temp; } nodep[i]->xcoord += xmargin + extrax; nodep[i]->ycoord += ymargin + extray; } if (style == circular) { xx0 = xmargin+extrax+expand*(leftoflabels + 0.5); yy0 = ymargin+extray+expand*(bottomoflabels + 0.5); } else if (grows == vertical) rooty = ymargin + extray; else rootx = xmargin + extrax; } /* rescale */ void plottree(node *p, node *q) { /* plot part or all of tree on the plotting device */ long i; double x00=0, y00=0, x1, y1, x2, y2, x3, y3, x4, y4, cc, ss, f, g, fract=0, minny, miny; node *pp; x2 = xscale * (xoffset + p->xcoord); y2 = yscale * (yoffset + p->ycoord); if (style == circular) { x00 = xscale * (xx0 + xoffset); y00 = yscale * (yy0 + yoffset); } if (p != root) { x1 = xscale * (xoffset + q->xcoord); y1 = yscale * (yoffset + q->ycoord); switch (style) { case cladogram: plot(penup, x1, y1); plot(pendown, x2, y2); break; case phenogram: plot(penup, x1, y1); if (grows == vertical) plot(pendown, x2, y1); else plot(pendown, x1, y2); plot(pendown, x2, y2); break; case curvogram: plot(penup, x1, y1) ; curvespline(x1,y1,x2,y2,(boolean)(grows == vertical),20); break; case eurogram: plot(penup, x1, y1); if (grows == vertical) plot(pendown, x2, (2 * y1 + y2) / 3); else plot(pendown, (2 * x1 + x2) / 3, y2); plot(pendown, x2, y2); break; case swoopogram: plot(penup, x1, y1); if ((grows == vertical && fabs(y1 - y2) >= epsilon) || (grows == horizontal && fabs(x1 - x2) >= epsilon)) { if (grows == vertical) miny = p->ycoord; else miny = p->xcoord; pp = q->next; while (pp != q) { if (grows == vertical) minny = pp->back->ycoord; else minny = pp->back->xcoord; if (minny < miny) miny = minny; pp = pp->next; } if (grows == vertical) miny = yscale * (yoffset + miny); else miny = xscale * (xoffset + miny); if (grows == vertical) fract = 0.3333 * (miny - y1) / (y2 - y1); else fract = 0.3333 * (miny - x1) / (x2 - x1); } if ((grows == vertical && fabs(y1 - y2) >= epsilon) || (grows == horizontal && fabs(x1 - x2) >= epsilon)) { if (grows == vertical) miny = p->ycoord; else miny = p->xcoord; pp = q->next; while (pp != q) { if (grows == vertical) minny = pp->back->ycoord; else minny = pp->back->xcoord; if (minny < miny) miny = minny; pp = pp->next; } if (grows == vertical) miny = yscale * (yoffset + miny); else miny = xscale * (xoffset + miny); if (grows == vertical) fract = 0.3333 * (miny - y1) / (y2 - y1); else fract = 0.3333 * (miny - x1) / (x2 - x1); } swoopspline(x1,y1,x1+fract*(x2-x1),y1+fract*(y2-y1), x2,y2,(boolean)(grows != vertical),segments); break; case circular: plot(penup, x1, y1); if (fabs(x1-x00)+fabs(y1-y00) > 0.00001) { g = ((x1-x00)*(x2-x00)+(y1-y00)*(y2-y00)) /sqrt(((x1-x00)*(x1-x00)+(y1-y00)*(y1-y00)) *((x2-x00)*(x2-x00)+(y2-y00)*(y2-y00))); if (g > 1.0) g = 1.0; if (g < -1.0) g = -1.0; f = acos(g); if ((x2-x00)*(y1-y00)>(x1-x00)*(y2-y00)) f = -f; if (fabs(g-1.0) > 0.0001) { cc = cos(f/segments); ss = sin(f/segments); x3 = x1; y3 = y1; for (i = 1; i <= segments; i++) { x4 = x00 + cc*(x3-x00) - ss*(y3-y00); y4 = y00 + ss*(x3-x00) + cc*(y3-y00); x3 = x4; y3 = y4; plot(pendown, x3, y3); } } } plot(pendown, x2, y2); break; } } else { if (style == circular) { x1 = x00; y1 = y00; } else { if (grows == vertical) { x1 = xscale * (xoffset + p->xcoord); y1 = yscale * (yoffset + rooty); } else { x1 = xscale * (xoffset + rootx); y1 = yscale * (yoffset + p->ycoord); } } plot(penup, x1, y1); plot(pendown, x2, y2); } if (p->tip) return; pp = p->next; while (pp != p) { plottree(pp->back, p); pp = pp->next; } } /* plottree */ void plotlabels(char *fontname) { long i; double compr, dx = 0, dy = 0, labangle, cosl, sinl, cosv, sinv, vec; boolean left, right; node *lp; double *firstlet; firstlet = (double *)Malloc(nextnode*sizeof(double)); textlength = (double *)Malloc(nextnode*sizeof(double)); compr = xunitspercm / yunitspercm; if (penchange == yes) changepen(labelpen); for (i = 0; i < nextnode; i++) { if (nodep[i]->tip) { lp = nodep[i]; firstlet[i] = lengthtext(nodep[i]->nayme,1L,fontname,font) /fontheight; textlength[i] = lengthtext(nodep[i]->nayme, nodep[i]->naymlength, fontname, font)/fontheight; labangle = nodep[i]->oldtheta; if (cos(labangle) < 0.0) labangle += pi; cosl = cos(labangle); sinl = sin(labangle); vec = sqrt(1.0+firstlet[i]*firstlet[i]); cosv = firstlet[i]/vec; sinv = 1.0/vec; if (style == circular) { right = cos(nodep[i]->oldtheta) > 0.0; left = !right; if (right) { dx = labelheight * expand * cos(nodep[i]->oldtheta); dy = labelheight * expand * sin(nodep[i]->oldtheta); dx -= labelheight * expand * 0.5 * vec * (cosl*sinv-sinl*cosv); dy -= labelheight * expand * 0.5 * vec * (sinl*sinv+cosl*cosv); } if (left) { dx = labelheight * expand * cos(nodep[i]->oldtheta); dy = labelheight * expand * sin(nodep[i]->oldtheta); dx -= labelheight * expand * textlength[i] * cosl; dy -= labelheight * expand * textlength[i] * sinl; dx += labelheight * expand * 0.5 * vec * (cosl*cosv+sinl*sinv); dy -= labelheight * expand * 0.5 * vec * (-sinl*cosv+cosl*sinv); } } else { dx = labelheight * expand * cos(nodep[i]->oldtheta); dy = labelheight * expand * sin(nodep[i]->oldtheta); dx += labelheight * expand * 0.5 * vec * (cosl*cosv+sinl*sinv); dy += labelheight * expand * 0.5 * vec * (-sinl*cosv+cosl*sinv); } if (style == circular) { plottext(lp->nayme, lp->naymlength, labelheight * expand * xscale / compr, compr, xscale * (lp->xcoord + dx + xoffset), yscale * (lp->ycoord + dy + yoffset), 180 * (-labangle) / pi, font,fontname); } else { if (grows == vertical) plottext(lp->nayme, lp->naymlength, labelheight * expand * xscale / compr, compr, xscale * (lp->xcoord + dx + xoffset), yscale * (lp->ycoord + dy + yoffset), -labelrotation, font,fontname); else plottext(lp->nayme, lp->naymlength, labelheight * expand * yscale, compr, xscale * (lp->xcoord + dy + xoffset), yscale * (lp->ycoord - dx + yoffset), 90.0 - labelrotation, font,fontname); } } } if (penchange == yes) changepen(treepen); free(firstlet); free(textlength); } /* plotlabels */ void setup_environment(Char *argv[], boolean *canbeplotted) { boolean firsttree; char* treestr; /* Set up all kinds of fun stuff */ #ifdef MAC OSErr retcode; FInfo fndrinfo; macsetup("Drawgram","Preview"); #endif #ifdef TURBOC if ((registerbgidriver(EGAVGA_driver) <0) || (registerbgidriver(Herc_driver) <0) || (registerbgidriver(CGA_driver) <0)){ printf("Graphics error: %s ",grapherrormsg(graphresult())); exit(-1);} #endif printf("DRAWGRAM from PHYLIP version %s\n",VERSION); printf("Reading tree ... \n"); firsttree = true; treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); allocate_nodep(&nodep, treestr, &spp); treeread (&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdrawgramnode,true,-1); root->oldlen = 0.0; printf("Tree has been read.\n"); printf("Loading the font .... \n"); loadfont(font,argv[0]); printf("Font loaded.\n"); previewing = false; ansi = ANSICRT; ibmpc = IBMCRT; firstscreens = true; initialparms(); (*canbeplotted) = false; } /* setup_environment */ void user_loop(boolean *canbeplotted) { long stripedepth; while (!(*canbeplotted)) { /* // do { // input_char=showparms(); // firstscreens = false; //if (input_char != 'Y') //getparms(input_char); // } while (input_char != 'Y'); */ if (dotmatrix) { stripedepth = allocstripe(stripe,(strpwide/8), ((long)(yunitspercm * ysize))); strpdeep = stripedepth; strpdiv = stripedepth; } plotrparms(spp); numlines = dotmatrix ? ((long)floor(yunitspercm * ysize + 0.5) / strpdeep) :1; xscale = xunitspercm; yscale = yunitspercm; calculate(); rescale(); (*canbeplotted) = true; if (preview) { previewing = true; (*canbeplotted) = plotpreview(fontname,&xoffset,&yoffset, &scale,spp,root); } else { /*(*canbeplotted) = plot_without_preview(fontname,&xoffset,&yoffset, &scale,spp,root);*/ (*canbeplotted)=true; } if ((previewer == winpreview || previewer == xpreview || previewer == mac) && (winaction == quitnow)) { break; } } } /* user_loop */ int main(int argc, Char *argv[]) { boolean canbeplotted; #ifdef MAC boolean wasplotted = false; OSErr retcode; FInfo fndrinfo; #ifdef OSX_CARBON FSRef fileRef; FSSpec fileSpec; #endif #ifdef __MWERKS__ SIOUXSetTitle("\pPHYLIP: Drawtree"); #endif argv[0] = "Drawgram"; #endif grbg = NULL; progname = argv[0]; #ifndef X_DISPLAY_MISSING nargc=1; nargv=argv; #endif init(argc, argv); emboss_getoptions("fdrawgram",argc,argv); setup_environment(argv, &canbeplotted); user_loop(&canbeplotted); if (!((previewer == winpreview || previewer == xpreview || previewer == mac) && (winaction == quitnow))) { previewing = false; initplotter(spp,fontname); numlines = dotmatrix ? ((long)floor(yunitspercm * ysize + 0.5)/strpdeep) : 1; if (plotter != ibm) printf("\nWriting plot file ...\n"); drawit(fontname,&xoffset,&yoffset,numlines,root); finishplotter(); FClose(plotfile); #ifdef MAC wasplotted = true; #endif printf("\nPlot written to file \"%s\"\n\n", pltfilename); } FClose(intree); #ifdef MAC if (plotter == pict && wasplotted){ #ifdef OSX_CARBON FSPathMakeRef((unsigned char *)pltfilename, &fileRef, NULL); FSGetCatalogInfo(&fileRef, kFSCatInfoNone, NULL, NULL, &fileSpec, NULL); FSpGetFInfo(&fileSpec, &fndrinfo); fndrinfo.fdType='PICT'; fndrinfo.fdCreator='MDRW'; FSpSetFInfo(&fileSpec, &fndrinfo); #else retcode=GetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); fndrinfo.fdType='PICT'; fndrinfo.fdCreator='MDRW'; retcode=SetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); #endif } if (plotter == lw && wasplotted){ #ifdef OSX_CARBON FSPathMakeRef((unsigned char *)pltfilename, &fileRef, NULL); FSGetCatalogInfo(&fileRef, kFSCatInfoNone, NULL, NULL, &fileSpec, NULL); FSpGetFInfo(&fileSpec, &fndrinfo); fndrinfo.fdType='TEXT'; FSpSetFInfo(&fileSpec, &fndrinfo); #else retcode=GetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); fndrinfo.fdType='TEXT'; retcode=SetFInfo(CtoPstr(PLOTFILE),0,&fndrinfo); #endif } #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/pars.c0000664000175000017500000012755111616234204012200 00000000000000 #include "phylip.h" #include "discrete.h" /* version 3.6 (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define MAXNUMTREES 1000000 /* bigger than number of user trees can be */ AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees = NULL; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void makeweights(void); void doinput(void); void initparsnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void evaluate(node *); void tryadd(node *, node *, node *); void addpreorder(node *, node *, node *); void trydescendants(node *, node *, node *, node *, boolean); void trylocal(node *, node *); void trylocal2(node *, node *, node *); void tryrearr(node *p, boolean *); void repreorder(node *p, boolean *); void rearrange(node **); void describe(void); void pars_coordinates(node *, double, long *, double *); void pars_printree(void); void globrearrange(void); void grandrearr(void); void maketree(void); void freerest(void); void load_tree(long treei); void reallocchars(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], intreename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; node *root; long chars, col, msets, ith, njumble, jumb, maxtrees, numtrees; /* chars = number of sites in actual sequences */ long inseed, inseed0; double threshold; boolean jumble, usertree, thresh, weights, thorough, rearrfirst, trout, progress, stepbox, ancseq, mulsets, justwts, firstset, mulf, multf; steptr oldweight; longer seed; pointarray treenode; /* pointers to all nodes in tree */ long *enterorder; char *progname; long *zeros; unsigned char *zeros2; /* local variables for Pascal maketree, propagated globally for C version: */ long minwhich; double like, minsteps, bestyet, bestlike, bstlike2; boolean lastrearr, recompute; double nsteps[maxuser]; long **fsteps; node *there, *oldnufork; long *place; bestelm *bestrees; long *threshwt; discbaseptr nothing; gbases *garbage; node *temp, *temp1, *temp2, *tempsum, *temprm, *tempadd, *tempf, *tmp, *tmp1, *tmp2, *tmp3, *tmprm, *tmpadd; boolean *names; node *grbg; void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs=0; ajint numwts=0; long inseed0; jumble = false; njumble = 1; outgrno = 1; outgropt = false; thresh = false; thorough = true; rearrfirst = false; maxtrees = 100; trout = true; usertree = false; weights = false; mulsets = false; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; dotdiff = true; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); while (phylostates[numseqs]) numseqs++; if (numseqs > 1) { mulsets = true; msets = numseqs; } phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } if(!usertree) { thorough = ajAcdGetToggle("thorough"); if(thorough) rearrfirst = ajAcdGetBoolean("rearrange"); maxtrees = ajAcdGetInt("maxtrees"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; thresh = ajAcdGetToggle("dothreshold"); if(thresh) threshold = ajAcdGetFloat("threshold"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); if (ancseq || printdata) ajAcdGetBoolean("dotdiff"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nDiscrete character parsimony algorithm, version %s\n\n", VERSION); } /* emboss_getoptions */ void reallocchars() { long i; for (i = 0; i < spp; i++) { free(y[i]); y[i] = (Char *)Malloc(chars*sizeof(Char)); } for (i = 0; i < spp; i++){ free(convtab[i]); convtab[i] = (Char *)Malloc(chars*sizeof(Char)); } free(weight); free(oldweight); free(alias); free(ally); free(location); weight = (long *)Malloc(chars*sizeof(long)); oldweight = (long *)Malloc(chars*sizeof(long)); alias = (long *)Malloc(chars*sizeof(long)); ally = (long *)Malloc(chars*sizeof(long)); location = (long *)Malloc(chars*sizeof(long)); } void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc((chars+1)*sizeof(Char)); convtab = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) convtab[i] = (Char *)Malloc(chars*sizeof(Char)); bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1].btree = (long *)Malloc(nonodes*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); place = (long *)Malloc(nonodes*sizeof(long)); weight = (long *)Malloc(chars*sizeof(long)); oldweight = (long *)Malloc(chars*sizeof(long)); alias = (long *)Malloc(chars*sizeof(long)); ally = (long *)Malloc(chars*sizeof(long)); location = (long *)Malloc(chars*sizeof(long)); } /* alocrest */ void doinit() { /* initializes variables */ inputnumbersstate(phylostates[0], &spp, &chars, &nonodes, 1); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars); alloctree(&treenode, nonodes, usertree); allocrest(); } /* doinit */ void makeweights(void) { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= chars; i++) { alias[i - 1] = i; oldweight[i - 1] = weight[i - 1]; ally[i - 1] = i; } sitesort(chars, weight); sitecombine(chars); sitescrunch(chars); endsite = 0; for (i = 1; i <= chars; i++) { if (ally[i - 1] == i) endsite++; } for (i = 1; i <= endsite; i++) location[alias[i - 1] - 1] = i; if (!thresh) threshold = spp; threshwt = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) { weight[i] *= 10; threshwt[i] = (long)(threshold * weight[i] + 0.5); } zeros = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) zeros[i] = 0; zeros2 = (unsigned char *)Malloc(endsite*sizeof(unsigned char)); for (i = 0; i < endsite; i++) zeros2[i] = 0; } /* makeweights */ void doinput(void) { /* reads the input data */ long i; if (justwts) { if (firstset) discrete_inputdata(phylostates[0], chars); for (i = 0; i < chars; i++) weight[i] = 1; inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); if (justwts) { fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } else { if (!firstset) { samenumspstate(phylostates[ith-1], &chars, ith); reallocchars(); } discrete_inputdata(phylostates[0], chars); for (i = 0; i < chars; i++) weight[i] = 1; if (weights) { inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } } makeweights(); makevalues(treenode, zeros, zeros2, usertree); if (!usertree) { allocdiscnode(&temp, zeros, zeros2, endsite); allocdiscnode(&temp1, zeros, zeros2, endsite); allocdiscnode(&temp2, zeros, zeros2, endsite); allocdiscnode(&tempsum, zeros, zeros2, endsite); allocdiscnode(&temprm, zeros, zeros2, endsite); allocdiscnode(&tempadd, zeros, zeros2, endsite); allocdiscnode(&tempf, zeros, zeros2, endsite); allocdiscnode(&tmp, zeros, zeros2, endsite); allocdiscnode(&tmp1, zeros, zeros2, endsite); allocdiscnode(&tmp2, zeros, zeros2, endsite); allocdiscnode(&tmp3, zeros, zeros2, endsite); allocdiscnode(&tmprm, zeros, zeros2, endsite); allocdiscnode(&tmpadd, zeros, zeros2, endsite); } } /* doinput */ void initparsnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char **treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnudisctreenode(grbg, p, nodei, endsite, zeros, zeros2); treenode[nodei - 1] = *p; break; case nonbottom: gnudisctreenode(grbg, p, nodei, endsite, zeros, zeros2); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: /* if there is a length, read it and discard value */ processlength(&valyew, &divisor, ch, &minusread, treestr, parens); break; default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/ break; /*length should never occur */ } } /* initparsnode */ void evaluate(node *r) { /* determines the number of steps needed for a tree. this is the minimum number of steps needed to evolve sequences on this tree */ long i, steps; long term; double sum; sum = 0.0; for (i = 0; i < endsite; i++) { steps = r->numsteps[i]; if ((long)steps <= threshwt[i]) term = steps; else term = threshwt[i]; sum += (double)term; if (usertree && which <= maxuser) fsteps[which - 1][i] = term; } if (usertree && which <= maxuser) { nsteps[which - 1] = sum; if (which == 1) { minwhich = 1; minsteps = sum; } else if (sum < minsteps) { minwhich = which; minsteps = sum; } } like = -sum; } /* evaluate */ void tryadd(node *p, node *item, node *nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ long pos; double belowsum, parentsum; boolean found, collapse, changethere, trysave; if (!p->tip) { memcpy(temp->discbase, p->discbase, endsite*sizeof(unsigned char)); memcpy(temp->numsteps, p->numsteps, endsite*sizeof(long)); memcpy(temp->discnumnuc, p->discnumnuc, endsite*sizeof(discnucarray)); temp->numdesc = p->numdesc + 1; if (p->back) { multifillin(temp, tempadd, 1); sumnsteps2(tempsum, temp, p->back, 0, endsite, threshwt); } else { multisumnsteps(temp, tempadd, 0, endsite, threshwt); tempsum->sumsteps = temp->sumsteps; } if (tempsum->sumsteps <= -bestyet) { if (p->back) sumnsteps2(tempsum, temp, p->back, endsite+1, endsite, threshwt); else { multisumnsteps(temp, temp1, endsite+1, endsite, threshwt); tempsum->sumsteps = temp->sumsteps; } } p->sumsteps = tempsum->sumsteps; } if (p == root) sumnsteps2(temp, item, p, 0, endsite, threshwt); else { sumnsteps(temp1, item, p, 0, endsite); sumnsteps2(temp, temp1, p->back, 0, endsite, threshwt); } if (temp->sumsteps <= -bestyet) { if (p == root) sumnsteps2(temp, item, p, endsite+1, endsite, threshwt); else { sumnsteps(temp1, item, p, endsite+1, endsite); sumnsteps2(temp, temp1, p->back, endsite+1, endsite, threshwt); } } belowsum = temp->sumsteps; multf = false; like = -belowsum; if (!p->tip && belowsum >= p->sumsteps) { multf = true; like = -p->sumsteps; } trysave = true; if (!multf && p != root) { parentsum = treenode[p->back->index - 1]->sumsteps; if (belowsum >= parentsum) trysave = false; } if (lastrearr) { changethere = true; if (like >= bstlike2 && trysave) { if (like > bstlike2) found = false; else { addnsave(p, item, nufork, &root, &grbg, multf, treenode, place, zeros, zeros2); pos = 0; findtree(&found, &pos, nextree, place, bestrees); } if (!found) { collapse = collapsible(item, p, temp, temp1, temp2, tempsum, temprm, tmpadd, multf, root, zeros, zeros2, treenode); if (!thorough) changethere = !collapse; if (thorough || !collapse || like > bstlike2 || (nextree == 1)) { if (like > bstlike2) { addnsave(p, item, nufork, &root, &grbg, multf, treenode, place, zeros, zeros2); bestlike = bstlike2 = like; addbestever(&pos, &nextree, maxtrees, collapse, place, bestrees); } else addtiedtree(pos, &nextree, maxtrees, collapse, place, bestrees); } } } if (like >= bestyet) { if (like > bstlike2) bstlike2 = like; if (changethere && trysave) { bestyet = like; there = p; mulf = multf; } } } else if ((like > bestyet) || (like >= bestyet && trysave)) { bestyet = like; there = p; mulf = multf; } } /* tryadd */ void addpreorder(node *p, node *item, node *nufork) { /* traverses a n-ary tree, calling PROCEDURE tryadd at a node before calling tryadd at its descendants */ node *q; if (p == NULL) return; tryadd(p, item, nufork); if (!p->tip) { q = p->next; while (q != p) { addpreorder(q->back, item, nufork); q = q->next; } } } /* addpreorder */ void trydescendants(node *item, node *forknode, node *parent, node *parentback, boolean trybelow) { /* tries rearrangements at parent and below parent's descendants */ node *q, *tempblw; boolean bestever=0, belowbetter, multf=0, saved, trysave; double parentsum=0, belowsum; memcpy(temp->discbase, parent->discbase, endsite*sizeof(unsigned char)); memcpy(temp->numsteps, parent->numsteps, endsite*sizeof(long)); memcpy(temp->discnumnuc, parent->discnumnuc, endsite*sizeof(discnucarray)); temp->numdesc = parent->numdesc + 1; multifillin(temp, tempadd, 1); sumnsteps2(tempsum, parentback, temp, 0, endsite, threshwt); belowbetter = true; if (lastrearr) { parentsum = tempsum->sumsteps; if (-tempsum->sumsteps >= bstlike2) { belowbetter = false; bestever = false; multf = true; if (-tempsum->sumsteps > bstlike2) bestever = true; savelocrearr(item, forknode, parent, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = parent; mulf = true; } } } else if (-tempsum->sumsteps >= like) { there = parent; mulf = true; like = -tempsum->sumsteps; } if (trybelow) { sumnsteps(temp, parent, tempadd, 0, endsite); sumnsteps2(tempsum, temp, parentback, 0, endsite, threshwt); if (lastrearr) { belowsum = tempsum->sumsteps; if (-tempsum->sumsteps >= bstlike2 && belowbetter && (forknode->numdesc > 2 || (forknode->numdesc == 2 && parent->back->index != forknode->index))) { trysave = false; memcpy(temp->discbase, parentback->discbase, endsite*sizeof(unsigned char)); memcpy(temp->numsteps, parentback->numsteps, endsite*sizeof(long)); memcpy(temp->discnumnuc, parentback->discnumnuc, endsite*sizeof(discnucarray)); temp->numdesc = parentback->numdesc + 1; multifillin(temp, tempadd, 1); sumnsteps2(tempsum, parent, temp, 0, endsite, threshwt); if (-tempsum->sumsteps < bstlike2) { multf = false; bestever = false; trysave = true; } if (-belowsum > bstlike2) { multf = false; bestever = true; trysave = true; } if (trysave) { if (treenode[parent->index - 1] != parent) tempblw = parent->back; else tempblw = parent; savelocrearr(item, forknode, tempblw, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -belowsum; there = tempblw; mulf = false; } } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { if (treenode[parent->index - 1] != parent) tempblw = parent->back; else tempblw = parent; there = tempblw; mulf = false; } } } q = parent->next; while (q != parent) { if (q->back && q->back != item) { memcpy(temp1->discbase, q->discbase, endsite*sizeof(unsigned char)); memcpy(temp1->numsteps, q->numsteps, endsite*sizeof(long)); memcpy(temp1->discnumnuc, q->discnumnuc, endsite*sizeof(discnucarray)); temp1->numdesc = q->numdesc; multifillin(temp1, parentback, 0); if (lastrearr) belowbetter = (-parentsum < bstlike2); if (!q->back->tip) { memcpy(temp->discbase, q->back->discbase, endsite*sizeof(unsigned char)); memcpy(temp->numsteps, q->back->numsteps, endsite*sizeof(long)); memcpy(temp->discnumnuc, q->back->discnumnuc, endsite*sizeof(discnucarray)); temp->numdesc = q->back->numdesc + 1; multifillin(temp, tempadd, 1); sumnsteps2(tempsum, temp1, temp, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { belowbetter = false; bestever = false; multf = true; if (-tempsum->sumsteps > bstlike2) bestever = true; savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = q->back; mulf = true; } } } else if (-tempsum->sumsteps >= like) { like = -tempsum->sumsteps; there = q->back; mulf = true; } } sumnsteps(temp, q->back, tempadd, 0, endsite); sumnsteps2(tempsum, temp, temp1, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { trysave = false; multf = false; if (belowbetter) { bestever = false; trysave = true; } if (-tempsum->sumsteps > bstlike2) { bestever = true; trysave = true; } if (trysave) { if (treenode[q->back->index - 1] != q->back) tempblw = q; else tempblw = q->back; savelocrearr(item, forknode, tempblw, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = tempblw; mulf = false; } } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { if (treenode[q->back->index - 1] != q->back) tempblw = q; else tempblw = q->back; there = tempblw; mulf = false; } } } q = q->next; } } /* trydescendants */ void trylocal(node *item, node *forknode) { /* rearranges below forknode, below descendants of forknode when there are more than 2 descendants, then unroots the back of forknode and rearranges on its descendants */ node *q; boolean bestever, multf, saved; memcpy(temprm->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(temprm->numsteps, zeros, endsite*sizeof(long)); memcpy(temprm->olddiscbase, item->discbase, endsite*sizeof(unsigned char)); memcpy(temprm->oldnumsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempf->discbase, forknode->discbase, endsite*sizeof(unsigned char)); memcpy(tempf->numsteps, forknode->numsteps, endsite*sizeof(long)); memcpy(tempf->discnumnuc, forknode->discnumnuc, endsite*sizeof(discnucarray)); tempf->numdesc = forknode->numdesc - 1; multifillin(tempf, temprm, -1); if (!forknode->back) { sumnsteps2(tempsum, tempf, tempadd, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps > bstlike2) { bestever = true; multf = false; savelocrearr(item, forknode, forknode, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = forknode; mulf = false; } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = forknode; mulf = false; } } } else { sumnsteps(temp, tempf, tempadd, 0, endsite); sumnsteps2(tempsum, temp, forknode->back, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps > bstlike2) { bestever = true; multf = false; savelocrearr(item, forknode, forknode, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = forknode; mulf = false; } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = forknode; mulf = false; } } trydescendants(item, forknode, forknode->back, tempf, false); } q = forknode->next; while (q != forknode) { if (q->back != item) { memcpy(temp2->discbase, q->discbase, endsite*sizeof(unsigned char)); memcpy(temp2->numsteps, q->numsteps, endsite*sizeof(long)); memcpy(temp2->discnumnuc, q->discnumnuc, endsite*sizeof(discnucarray)); temp2->numdesc = q->numdesc - 1; multifillin(temp2, temprm, -1); if (!q->back->tip) { trydescendants(item, forknode, q->back, temp2, true); } else { sumnsteps(temp1, q->back, tempadd, 0, endsite); sumnsteps2(tempsum, temp1, temp2, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps > bstlike2) { multf = false; bestever = true; savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = q->back; mulf = false; } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = q->back; mulf = false; } } } } q = q->next; } } /* trylocal */ void trylocal2(node *item, node *forknode, node *other) { /* rearranges below forknode, below descendants of forknode when there are more than 2 descendants, then unroots the back of forknode and rearranges on its descendants. Used if forknode has binary descendants */ node *q; boolean bestever=0, multf, saved, belowbetter, trysave; memcpy(tempf->discbase, other->discbase, endsite*sizeof(unsigned char)); memcpy(tempf->numsteps, other->numsteps, endsite*sizeof(long)); memcpy(tempf->olddiscbase, forknode->discbase, endsite*sizeof(unsigned char)); memcpy(tempf->oldnumsteps, forknode->numsteps, endsite*sizeof(long)); tempf->numdesc = other->numdesc; if (forknode->back) trydescendants(item, forknode, forknode->back, tempf, false); if (!other->tip) { memcpy(temp->discbase, other->discbase, endsite*sizeof(unsigned char)); memcpy(temp->numsteps, other->numsteps, endsite*sizeof(long)); memcpy(temp->discnumnuc, other->discnumnuc, endsite*sizeof(discnucarray)); temp->numdesc = other->numdesc + 1; multifillin(temp, tempadd, 1); if (forknode->back) sumnsteps2(tempsum, forknode->back, temp, 0, endsite, threshwt); else sumnsteps2(tempsum, NULL, temp, 0, endsite, threshwt); belowbetter = true; if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { belowbetter = false; bestever = false; multf = true; if (-tempsum->sumsteps > bstlike2) bestever = true; savelocrearr(item, forknode, other, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = other; mulf = true; } } } else if (-tempsum->sumsteps >= like) { there = other; mulf = true; like = -tempsum->sumsteps; } if (forknode->back) { memcpy(temprm->discbase, forknode->back->discbase, endsite*sizeof(unsigned char)); memcpy(temprm->numsteps, forknode->back->numsteps, endsite*sizeof(long)); } else { memcpy(temprm->discbase, zeros2, endsite*sizeof(unsigned char)); memcpy(temprm->numsteps, zeros, endsite*sizeof(long)); } memcpy(temprm->olddiscbase, other->back->discbase, endsite*sizeof(unsigned char)); memcpy(temprm->oldnumsteps, other->back->numsteps, endsite*sizeof(long)); q = other->next; while (q != other) { memcpy(temp2->discbase, q->discbase, endsite*sizeof(unsigned char)); memcpy(temp2->numsteps, q->numsteps, endsite*sizeof(long)); memcpy(temp2->discnumnuc, q->discnumnuc, endsite*sizeof(discnucarray)); if (forknode->back) { temp2->numdesc = q->numdesc; multifillin(temp2, temprm, 0); } else { temp2->numdesc = q->numdesc - 1; multifillin(temp2, temprm, -1); } if (!q->back->tip) trydescendants(item, forknode, q->back, temp2, true); else { sumnsteps(temp1, q->back, tempadd, 0, endsite); sumnsteps2(tempsum, temp1, temp2, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { trysave = false; multf = false; if (belowbetter) { bestever = false; trysave = true; } if (-tempsum->sumsteps > bstlike2) { bestever = true; trysave = true; } if (trysave) { savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros, zeros2); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = q->back; mulf = false; } } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = q->back; mulf = false; } } } q = q->next; } } } /* trylocal2 */ void tryrearr(node *p, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success = TRUE and keeps the new tree. otherwise, restores the old tree */ node *forknode, *newfork, *other, *oldthere; double oldlike; boolean oldmulf; if (p->back == NULL) return; forknode = treenode[p->back->index - 1]; if (!forknode->back && forknode->numdesc <= 2 && alltips(forknode, p)) return; oldlike = bestyet; like = -10.0 * spp * chars; memcpy(tempadd->discbase, p->discbase, endsite*sizeof(unsigned char)); memcpy(tempadd->numsteps, p->numsteps, endsite*sizeof(long)); memcpy(tempadd->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long)); if (forknode->numdesc > 2) { oldthere = there = forknode; oldmulf = mulf = true; trylocal(p, forknode); } else { findbelow(&other, p, forknode); oldthere = there = other; oldmulf = mulf = false; trylocal2(p, forknode, other); } if ((like <= oldlike) || (there == oldthere && mulf == oldmulf)) return; recompute = true; re_move(p, &forknode, &root, recompute, treenode, &grbg, zeros, zeros2); if (mulf) add(there, p, NULL, &root, recompute, treenode, &grbg, zeros, zeros2); else { if (forknode->numdesc > 0) getnufork(&newfork, &grbg, treenode, zeros, zeros2); else newfork = forknode; add(there, p, newfork, &root, recompute, treenode, &grbg, zeros, zeros2); } if (like - oldlike > LIKE_EPSILON) { *success = true; bestyet = like; } } /* tryrearr */ void repreorder(node *p, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ node *q, *this; if (p == NULL) return; if (!p->visited) { tryrearr(p, success); p->visited = true; } if (!p->tip) { q = p; while (q->next != p) { this = q->next->back; repreorder(q->next->back,success); if (q->next->back == this) q = q->next; } } } /* repreorder */ void rearrange(node **r) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ boolean success=true; while (success) { success = false; clearvisited(treenode); repreorder(*r, &success); } } /* rearrange */ void describe() { /* prints ancestors, steps and table of numbers of steps in each site */ if (treeprint) { fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10.0); fprintf(outfile, "\n between and length\n"); fprintf(outfile, " ------- --- ------\n"); printbranchlengths(root); } if (stepbox) writesteps(chars, weights, oldweight, root); if (ancseq) { hypstates(chars, root, treenode, &garbage); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout3(root, nextree, &col, root); } } /* describe */ void pars_coordinates(node *p, double lengthsum, long *tipy, double *tipmax) { /* establishes coordinates of nodes */ node *q, *first, *last; double xx; if (p == NULL) return; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { xx = q->v; if (xx > 100.0) xx = 100.0; pars_coordinates(q->back, lengthsum + xx, tipy,tipmax); q = q->next; } while (p != q); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if ((p == root) || count_sibs(p) > 2) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* pars_coordinates */ void pars_printree() { /* prints out diagram of the tree2 */ long tipy; double scale, tipmax; long i; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; pars_coordinates(root, 0.0, &tipy, &tipmax); scale = 1.0 / (long)(tipmax + 1.000); for (i = 1; i <= (tipy - down); i++) drawline3(i, scale, root); putc('\n', outfile); } /* pars_printree */ void globrearrange() { /* does global rearrangements */ long j; double gotlike; boolean frommulti; node *item, *nufork; recompute = true; do { printf(" "); gotlike = bestlike; for (j = 0; j < nonodes; j++) { bestyet = -10.0 * spp * chars; if (j < spp) item = treenode[enterorder[j] -1]; else item = treenode[j]; if ((item != root) && ((j < spp) || ((j >= spp) && (item->numdesc > 0))) && !((item->back->index == root->index) && (root->numdesc == 2) && alltips(root, item))) { re_move(item, &nufork, &root, recompute, treenode, &grbg, zeros, zeros2); frommulti = (nufork->numdesc > 0); clearcollapse(treenode); there = root; memcpy(tempadd->discbase, item->discbase, endsite*sizeof(unsigned char)); memcpy(tempadd->numsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempadd->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long)); if (frommulti){ oldnufork = nufork; getnufork(&nufork, &grbg, treenode, zeros, zeros2); } addpreorder(root, item, nufork); if (frommulti) oldnufork = NULL; if (!mulf) add(there, item, nufork, &root, recompute, treenode, &grbg, zeros, zeros2); else add(there, item, NULL, &root, recompute, treenode, &grbg, zeros, zeros2); } if (progress) { if (j % ((nonodes / 72) + 1) == 0) putchar('.'); fflush(stdout); } } if (progress) putchar('\n'); } while (bestlike > gotlike); } /* globrearrange */ void load_tree(long treei) { /* restores a tree from bestrees */ long j, nextnode; boolean recompute = false; node *dummy; for (j = spp - 1; j >= 1; j--) re_move(treenode[j], &dummy, &root, recompute, treenode, &grbg, zeros, zeros2); root = treenode[0]; recompute = true; add(treenode[0], treenode[1], treenode[spp], &root, recompute, treenode, &grbg, zeros, zeros2); nextnode = spp + 2; for (j = 3; j <= spp; j++) { if (bestrees[treei].btree[j - 1] > 0) add(treenode[bestrees[treei].btree[j - 1] - 1], treenode[j - 1], treenode[nextnode++ - 1], &root, recompute, treenode, &grbg, zeros, zeros2); else add(treenode[treenode[-bestrees[treei].btree[j-1]-1]->back->index-1], treenode[j - 1], NULL, &root, recompute, treenode, &grbg, zeros, zeros2); } } /* load_tree */ void grandrearr() { /* calls either global rearrangement or local rearrangement on best trees */ long treei; boolean done; done = false; do { treei = findunrearranged(bestrees, nextree, true); if (treei < 0) done = true; else bestrees[treei].gloreange = true; if (!done) { load_tree(treei); globrearrange(); done = rearrfirst; } } while (!done); } /* grandrearr */ void maketree() { /* constructs a binary tree from the pointers in treenode. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j, nextnode; boolean done, firsttree, goteof, haslengths; node *item, *nufork, *dummy; pointarray nodep; char *treestr; if (!usertree) { for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); recompute = true; root = treenode[enterorder[0] - 1]; add(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1], treenode[spp], &root, recompute, treenode, &grbg, zeros, zeros2); if (progress) { printf("Adding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = false; oldnufork = NULL; for (i = 3; i <= spp; i++) { bestyet = -10.0 * spp * chars; item = treenode[enterorder[i - 1] - 1]; getnufork(&nufork, &grbg, treenode, zeros, zeros2); there = root; memcpy(tempadd->discbase, item->discbase, endsite*sizeof(unsigned char)); memcpy(tempadd->numsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempadd->olddiscbase, zeros2, endsite*sizeof(unsigned char)); memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long)); addpreorder(root, item, nufork); if (!mulf) add(there, item, nufork, &root, recompute, treenode, &grbg, zeros, zeros2); else add(there, item, NULL, &root, recompute, treenode, &grbg, zeros, zeros2); like = bestyet; rearrange(&root); if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = (i == spp); if (lastrearr) { bestlike = bestyet; if (jumb == 1) { bstlike2 = bestlike; nextree = 1; initbestrees(bestrees, maxtrees, true); initbestrees(bestrees, maxtrees, false); } if (progress) { printf("\nDoing global rearrangements"); if (rearrfirst) printf(" on the first of the trees tied for best\n"); else printf(" on all trees tied for best\n"); printf(" !"); for (j = 0; j < nonodes; j++) if (j % ((nonodes / 72) + 1) == 0) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } globrearrange(); rearrange(&root); } } done = false; while (!done && findunrearranged(bestrees, nextree, true) >= 0) { grandrearr(); done = rearrfirst; } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } recompute = false; for (i = spp - 1; i >= 1; i--) re_move(treenode[i], &dummy, &root, recompute, treenode, &grbg, zeros, zeros2); if (jumb == njumble) { collapsebestrees(&root, &grbg, treenode, bestrees, place, zeros, zeros2, chars, recompute, progress); if (treeprint) { putc('\n', outfile); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first %4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i <= (nextree - 2); i++) { root = treenode[0]; add(treenode[0], treenode[1], treenode[spp], &root, recompute, treenode, &grbg, zeros, zeros2); nextnode = spp + 2; for (j = 3; j <= spp; j++) { if (bestrees[i].btree[j - 1] > 0) add(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1], treenode[nextnode++ - 1], &root, recompute, treenode, &grbg, zeros, zeros2); else add(treenode[treenode[-bestrees[i].btree[j - 1]-1]->back->index-1], treenode[j - 1], NULL, &root, recompute, treenode, &grbg, zeros, zeros2); } reroot(treenode[outgrno - 1], root); postorder(root); evaluate(root); treelength(root, chars, treenode); pars_printree(); describe(); for (j = 1; j < spp; j++) re_move(treenode[j], &dummy, &root, recompute, treenode, &grbg, zeros, zeros2); } } } else { if (numtrees > MAXNUMTREES) { printf( "\n\nERROR: number of input trees is read incorrectly from %s\n\n", intreename); embExitBad(); } if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n"); } fsteps = (long **)Malloc(maxuser*sizeof(long *)); for (j = 1; j <= maxuser; j++) fsteps[j - 1] = (long *)Malloc(endsite*sizeof(long)); nodep = NULL; which = 1; while (which <= numtrees) { firsttree = true; nextnode = 0; haslengths = true; treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initparsnode,false,nonodes); if (treeprint) fprintf(outfile, "\n\n"); if (outgropt) reroot(treenode[outgrno - 1], root); postorder(root); evaluate(root); treelength(root, chars, treenode); pars_printree(); describe(); if (which < numtrees) gdispose(root, &grbg, treenode); which++; } FClose(intree); putc('\n', outfile); if (numtrees > 1 && chars > 1 ) standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed); for (j = 1; j <= maxuser; j++) free(fsteps[j - 1]); free(fsteps); } if (jumb == njumble) { if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) { printf("\nTree"); if ((usertree && numtrees > 1) || (!usertree && nextree != 2)) printf("s"); printf(" also written onto file \"%s\"\n", outtreename); } } } } /* maketree */ void freerest() { if (!usertree) { freenode(&temp); freenode(&temp1); freenode(&temp2); freenode(&tempsum); freenode(&temprm); freenode(&tempadd); freenode(&tempf); freenode(&tmp); freenode(&tmp1); freenode(&tmp2); freenode(&tmp3); freenode(&tmprm); freenode(&tmpadd); } freegrbg(&grbg); if (ancseq) freegarbage(&garbage); free(threshwt); free(zeros); free(zeros2); freenodes(nonodes, treenode); } /* freerest*/ int main(int argc, Char *argv[]) { /* Discrete character parsimony by uphill search */ /* reads in spp, chars, and the data. Then calls maketree to construct the tree */ #ifdef MAC argc = 1; /* macsetup("Pars",""); */ argv[0]="Pars"; #endif init(argc, argv); emboss_getoptions("fpars", argc, argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; garbage = NULL; grbg = NULL; doinit(); for (ith = 1; ith <= msets; ith++) { if (msets > 1 && !justwts) { fprintf(outfile, "\nData set # %ld:\n\n", ith); if (progress) printf("\nData set # %ld:\n\n", ith); } doinput(); if (ith == 1) firstset = false; for (jumb = 1; jumb <= njumble; jumb++) maketree(); freerest(); } FClose(infile); FClose(outfile); if (weights || justwts) FClose(weightfile); if (trout) FClose(outtree); if (usertree) FClose(intree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif if (progress) printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Discrete character parsimony by uphill search */ PHYLIPNEW-3.69.650/src/Makefile.am0000664000175000017500000001060411430235015013104 00000000000000## Process this file with automake to produce Makefile.in if !ESYSTEMLIBS LLINCLUDES = -I../../../ajax/expat -I../../../ajax/zlib NLINCLUDES = -I${embprefix}/include/ezlib -I${embprefix}/include/eexpat LLAIXLIBS = -L../../../ajax/expat/.libs -L../../../ajax/zlib/.libs NLAIXLIBS = -leexpat -lezlib LLADD = ../../../ajax/expat/libeexpat.la ../../../ajax/zlib/libezlib.la NLADD = -leexpat -lezlib endif if LOCALLINK AM_CPPFLAGS = -I../include -I../../../nucleus -I../../../ajax/pcre \ $(LLINCLUDES) \ -I../../../ajax/core -I../../../ajax/graphics \ -I../../../ajax/ensembl -I../../../ajax/ajaxdb \ -I../../../ajax/acd -I../../../plplot else AM_CPPFLAGS = -I../include -I${embprefix}/include \ -I${embprefix}/include/eplplot \ $(NLINCLUDES) \ -I${embprefix}/include/epcre endif if ISSHARED if ISAIXIA64 if LOCALLINK AIX_CFLAGS = -Wl,-bdynamic -Wl,-brtl -L../../../plplot/.libs \ -L../../../ajax/pcre/.libs $(LLAIXLIBS) \ -L../../../ajax/core/.libs \ -L../../../ajax/graphics/.libs -L../../../ajax/ensembl/.libs \ -L../../../ajax/ajaxdb/.libs -L../../../ajax/acd/.libs \ -L../../../nucleus/.libs \ -lnucleus -lacd -lajaxdb -lensembl -lajaxg -lajax -lepcre \ $(NLAIXLIBS) -leplplot else AIX_CFLAGS = -Wl,-bdynamic -Wl,-brtl -L${embprefix}/lib -lnucleus -lacd \ -lajaxdb -lensembl -lajaxg -lajax -lepcre $(NLAIXLIBS) -leplplot endif endif endif AM_CFLAGS = $(AIX_CFLAGS) $(WARN_CFLAGS) $(DEVWARN_CFLAGS) ## To add programs ## Add the program binary name to bin_PROGRAMS ## (using \ as a continuation character for multiple lines) ## ## And add a programname_SOURCES line to define the source files ## to be compiled and linked ## ## make will compile and link the program ## make install will copy the program to the install directory bin_PROGRAMS = fclique fconsense fcontml fcontrast \ fdnacomp fdnadist fdnainvar fdnaml fdnamlk fdnamove fdnapars fdnapenny \ fdolmove fdollop fdolpenny fdrawgram fdrawtree \ ffactor ffitch fgendist fkitsch fmix fmove fneighbor fpars \ fpenny fproml fpromlk fprotdist fprotpars \ frestdist frestml fretree \ fdiscboot ffreqboot frestboot fseqboot fseqbootall \ ftreedist ftreedistpair fclique_SOURCES = clique.c disc.c phylip.c fconsense_SOURCES = consense.c cons.c phylip.c fcontml_SOURCES = contml.c cont.c phylip.c fcontrast_SOURCES = contrast.c cont.c phylip.c fdnacomp_SOURCES = dnacomp.c seq.c phylip.c fdnadist_SOURCES = dnadist.c seq.c phylip.c fdnainvar_SOURCES = dnainvar.c seq.c phylip.c fdnaml_SOURCES = dnaml.c seq.c phylip.c fdnamlk_SOURCES = dnamlk.c seq.c phylip.c printree.c mlclock.c fdnamove_SOURCES = dnamove.c moves.c seq.c phylip.c fdnapenny_SOURCES = dnapenny.c seq.c phylip.c fdnapars_SOURCES = dnapars.c seq.c phylip.c fdolmove_SOURCES = dolmove.c disc.c moves.c dollo.c phylip.c fdollop_SOURCES = dollop.c disc.c dollo.c phylip.c fdolpenny_SOURCES = dolpenny.c disc.c dollo.c phylip.c fdrawgram_SOURCES = drawgram.c draw.c draw2.c phylip.c fdrawtree_SOURCES = drawtree.c draw.c draw2.c phylip.c ffactor_SOURCES = factor.c phylip.c ffitch_SOURCES = fitch.c dist.c phylip.c fgendist_SOURCES = gendist.c phylip.c fkitsch_SOURCES = kitsch.c dist.c phylip.c fmix_SOURCES = mix.c disc.c wagner.c phylip.c fmove_SOURCES = move.c disc.c moves.c wagner.c phylip.c fneighbor_SOURCES = neighbor.c dist.c phylip.c fpars_SOURCES = pars.c discrete.c phylip.c fpenny_SOURCES = penny.c disc.c wagner.c phylip.c fproml_SOURCES = proml.c seq.c phylip.c fpromlk_SOURCES = promlk.c seq.c phylip.c printree.c mlclock.c fprotdist_SOURCES = protdist.c seq.c phylip.c fprotpars_SOURCES = protpars.c seq.c phylip.c frestdist_SOURCES = restdist.c seq.c phylip.c frestml_SOURCES = restml.c seq.c phylip.c fretree_SOURCES = retree.c moves.c phylip.c ftreedist_SOURCES = treedist.c cons.c phylip.c ftreedistpair_SOURCES = treedistpair.c cons.c phylip.c fdiscboot_SOURCES = discboot.c seq.c phylip.c ffreqboot_SOURCES = freqboot.c seq.c phylip.c frestboot_SOURCES = restboot.c seq.c phylip.c fseqboot_SOURCES = seqboot.c seq.c phylip.c fseqbootall_SOURCES = seqbootall.c seq.c phylip.c if LOCALLINK LDADD = ../../../nucleus/libnucleus.la ../../../ajax/acd/libacd.la \ ../../../ajax/ajaxdb/libajaxdb.la \ ../../../ajax/ensembl/libensembl.la \ ../../../ajax/graphics/libajaxg.la \ ../../../ajax/core/libajax.la \ $(LLADD) \ ../../../ajax/pcre/libepcre.la \ ../../../plplot/libeplplot.la $(XLIB) else LDADD = -L${embprefix}/lib -lnucleus -lacd -lajaxdb -lensembl -lajaxg \ -lajax -lepcre $(NLADD) -leplplot $(XLIB) endif PHYLIPNEW-3.69.650/src/penny.c0000664000175000017500000004671411305225544012367 00000000000000 #include "phylip.h" #include "disc.h" #include "wagner.h" /* version 3.6. (c) Copyright 1993-2002 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define maxtrees 100 /* maximum number of trees to be printed out */ #define often 100 /* how often to notify how many trees examined */ #define many 1000 /* how many multiples of howoften before stop */ AjPPhyloState* phylostates = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloProp phyloanc = NULL; AjPPhyloProp phylomix = NULL; typedef long *treenumbers; typedef double *valptr; typedef long *placeptr; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(void); void doinput(void); void supplement(bitptr); void evaluate(node2 *); void addtraverse(node2 *,node2 *,node2 *,long *,long *,valptr,placeptr); void addit(long); void reroot(node2 *); void describe(void); void maketree(void); /* function prototypes */ #endif Char infilename[FNMLNGTH], weightfilename[FNMLNGTH], ancfilename[FNMLNGTH], mixfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; node2 *root; long outgrno, rno, howmany, howoften, col, msets, ith; /* outgrno indicates outgroup */ boolean weights, ancvar, questions, allsokal, allwagner, mixture, simple, trout, noroot, didreroot, outgropt, progress, treeprint, stepbox, ancseq, mulsets, firstset; boolean *ancone, *anczero, *ancone0, *anczero0, justwts; pointptr2 treenode; /* pointers to all nodes in tree */ double fracdone, fracinc; double threshold; double *threshwt; bitptr wagner, wagner0; boolean *added; Char *guess; steptr numsteps; long **bestorders, **bestrees; steptr numsone, numszero; gbit *garbage; long examined, mults; boolean firsttime, done, full; double like, bestyet; treenumbers current, order; long fullset; bitptr zeroanc, oneanc; bitptr suppsteps; void emboss_getoptions(char *pgm, int argc, char *argv[]) { ajint numseqs=0; ajint numwts=0; AjPStr method = NULL; howoften = often; howmany = many; outgrno = 1; outgropt = false; simple = true; trout = true; weights = false; justwts = false; ancvar = false; allsokal = false; allwagner = true; mixture = false; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; mulsets = false; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); phylostates = ajAcdGetDiscretestates("infile"); while (phylostates[numseqs]) numseqs++; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } phyloanc = ajAcdGetProperties("ancfile"); if(phyloanc) ancvar = true; method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "w")) allwagner = true; else if(ajStrMatchC(method, "c")) allsokal = true; else if(ajStrMatchC(method, "m")) { mixture = allwagner = true; phylomix = ajAcdGetProperties("mixfile"); } howmany = ajAcdGetInt("howmany"); howoften = ajAcdGetInt("howoften"); outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; simple = ajAcdGetBoolean("simple"); threshold = ajAcdGetFloat("threshold"); progress = ajAcdGetBoolean("progress"); printdata = ajAcdGetBoolean("printdata"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nPenny algorithm, version %s\n",VERSION); fprintf(outfile, " branch-and-bound to find all"); fprintf(outfile, " most parsimonious trees\n\n"); } /* emboss_getoptions */ void allocrest() { long i; weight = (steptr)Malloc(chars*sizeof(steptr)); threshwt = (double *)Malloc(chars*sizeof(double)); bestorders = (long **)Malloc(maxtrees*sizeof(long *)); bestrees = (long **)Malloc(maxtrees*sizeof(long *)); for (i = 1; i <= maxtrees; i++) { bestorders[i - 1] = (long *)Malloc(spp*sizeof(long)); bestrees[i - 1] = (long *)Malloc(spp*sizeof(long)); } numsteps = (steptr)Malloc(chars*sizeof(steptr)); guess = (Char *)Malloc(chars*sizeof(Char)); numszero = (steptr)Malloc(chars*sizeof(steptr)); numsone = (steptr)Malloc(chars*sizeof(steptr)); current = (treenumbers)Malloc(spp*sizeof(long)); order = (treenumbers)Malloc(spp*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); added = (boolean *)Malloc(nonodes*sizeof(boolean)); ancone = (boolean *)Malloc(chars*sizeof(boolean)); anczero = (boolean *)Malloc(chars*sizeof(boolean)); ancone0 = (boolean *)Malloc(chars*sizeof(boolean)); anczero0 = (boolean *)Malloc(chars*sizeof(boolean)); wagner = (bitptr)Malloc(words*sizeof(long)); wagner0 = (bitptr)Malloc(words*sizeof(long)); zeroanc = (bitptr)Malloc(words*sizeof(long)); oneanc = (bitptr)Malloc(words*sizeof(long)); suppsteps = (bitptr)Malloc(words*sizeof(long)); extras = (steptr)Malloc(chars*sizeof(steptr)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersstate(phylostates[0],&spp, &chars, &nonodes, 1); words = chars / bits + 1; if (printdata) fprintf(outfile, "%2ld species, %3ld characters\n", spp, chars); alloctree2(&treenode); setuptree2(treenode); allocrest(); } /* doinit */ void inputoptions() { /* input the information on the options */ long i; if(justwts){ if(firstset){ if (ancvar) { inputancestorsstr(phyloanc->Str[0], anczero0, ancone0); } if (mixture) { inputmixturestr(phylomix->Str[0], wagner0); } } for (i = 0; i < (chars); i++) weight[i] = 1; inputweightsstr(phyloweights->Str[0], chars, weight, &weights); for (i = 0; i < (words); i++) { if (mixture) wagner[i] = wagner0[i]; else if (allsokal) wagner[i] = 0; else wagner[i] = (1L << (bits + 1)) - (1L << 1); } } else { if (!firstset) { samenumspstate(phylostates[ith-1], &chars, ith); } for (i = 0; i < (chars); i++) weight[i] = 1; if (ancvar) { inputancestorsstr(phyloanc->Str[ith-1], anczero0, ancone0); } if (mixture) { inputmixturestr(phylomix->Str[ith-1], wagner0); } if (weights) inputweightsstr(phyloweights->Str[ith-1], chars, weight, &weights); for (i = 0; i < (words); i++) { if (mixture) wagner[i] = wagner0[i]; else if (allsokal) wagner[i] = 0; else wagner[i] = (1L << (bits + 1)) - (1L << 1); } } for (i = 0; i < (chars); i++) { if (!ancvar) { anczero[i] = true; ancone[i] = (((1L << (i % bits + 1)) & wagner[i / bits]) != 0); } else { anczero[i] = anczero0[i]; ancone[i] = ancone0[i]; } } noroot = true; questions = false; for (i = 0; i < (chars); i++) { if (weight[i] > 0) { noroot = (noroot && ancone[i] && anczero[i] && ((((1L << (i % bits + 1)) & wagner[i / bits]) != 0) || threshold <= 2.0)); } questions = (questions || (ancone[i] && anczero[i])); threshwt[i] = threshold * weight[i]; } } /* inputoptions */ void doinput() { /* reads the input data */ inputoptions(); if(!justwts || firstset) disc_inputdata2(phylostates[ith-1], treenode); } /* doinput */ void supplement(bitptr suppsteps) { /* determine minimum number of steps more which will be added when rest of species are put in tree */ long i, j, k, l; long defone, defzero, a; k = 0; for (i = 0; i < (words); i++) { defone = 0; defzero = 0; a = 0; for (l = 1; l <= bits; l++) { k++; if (k <= chars) { if (!ancone[k - 1]) defzero = ((long)defzero) | (1L << l); if (!anczero[k - 1]) defone = ((long)defone) | (1L << l); } } for (j = 0; j < (spp); j++) { defone |= treenode[j]->empstte1[i] & (~treenode[j]->empstte0[i]); defzero |= treenode[j]->empstte0[i] & (~treenode[j]->empstte1[i]); if (added[j]) a |= defone & defzero; } suppsteps[i] = defone & defzero & (~a); } } /* supplement */ void evaluate(node2 *r) { /* Determines the number of steps needed for a tree. This is the minimum number needed to evolve chars on this tree */ long i, stepnum, smaller; double sum; sum = 0.0; for (i = 0; i < (chars); i++) { numszero[i] = 0; numsone[i] = 0; } supplement(suppsteps); for (i = 0; i < (words); i++) zeroanc[i] =fullset; full = true; postorder(r, fullset, full, wagner, zeroanc); cpostorder(r, full, zeroanc, numszero, numsone); count(r->fulstte1, zeroanc, numszero, numsone); count(suppsteps, zeroanc, numszero, numsone); for (i = 0; i < (words); i++) zeroanc[i] = 0; full = false; postorder(r, fullset, full, wagner, zeroanc); cpostorder(r, full, zeroanc, numszero, numsone); count(r->empstte0, zeroanc, numszero, numsone); count(suppsteps, zeroanc, numszero, numsone); for (i = 0; i < (chars); i++) { smaller = spp * weight[i]; numsteps[i] = smaller; if (anczero[i]) { numsteps[i] = numszero[i]; smaller = numszero[i]; } if (ancone[i] && numsone[i] < smaller) numsteps[i] = numsone[i]; stepnum = numsteps[i] + extras[i]; if (stepnum <= threshwt[i]) sum += stepnum; else sum += threshwt[i]; guess[i] = '?'; if (!ancone[i] || (anczero[i] && numszero[i] < numsone[i])) guess[i] = '0'; else if (!anczero[i] || (ancone[i] && numsone[i] < numszero[i])) guess[i] = '1'; } if (examined == 0 && mults == 0) bestyet = -1.0; like = sum; } /* evaluate */ void addtraverse(node2 *a, node2 *b, node2 *c, long *m, long *n, valptr valyew, placeptr place) { /* traverse all places to add b */ if (done) return; if ((*m) <= 2 || !(noroot && (a == root || a == root->next->back))) { add3(a, b, c, &root, treenode); (*n)++; evaluate(root); examined++; if (examined == howoften) { examined = 0; mults++; if (mults == howmany) done = true; if (progress) { printf("%6ld", mults); if (bestyet >= 0) printf("%18.5f", bestyet); else printf(" - "); printf("%17ld%20.2f\n", nextree - 1, fracdone * 100); #ifdef WIN32 phyFillScreenColor(); #endif } } valyew[(*n) - 1] = like; place[(*n) - 1] = a->index; re_move3(&b, &c, &root, treenode); } if (!a->tip) { addtraverse(a->next->back, b, c, m,n,valyew,place); addtraverse(a->next->next->back, b, c, m,n,valyew,place); } } /* addtraverse */ void addit(long m) { /* adds the species one by one, recursively */ long n; valptr valyew; placeptr place; long i, j, n1, besttoadd = 0; valptr bestval; placeptr bestplace; double oldfrac, oldfdone, sum, bestsum; valyew = (valptr)Malloc(nonodes*sizeof(double)); bestval = (valptr)Malloc(nonodes*sizeof(double)); place = (placeptr)Malloc(nonodes*sizeof(long)); bestplace = (placeptr)Malloc(nonodes*sizeof(long)); if (simple && !firsttime) { n = 0; added[order[m - 1] - 1] = true; addtraverse(root, treenode[order[m - 1] - 1], treenode[spp + m - 2], &m,&n,valyew,place); besttoadd = order[m - 1]; memcpy(bestplace, place, nonodes*sizeof(long)); memcpy(bestval, valyew, nonodes*sizeof(double)); } else { bestsum = -1.0; for (i = 1; i <= (spp); i++) { if (!added[i - 1]) { n = 0; added[i - 1] = true; addtraverse(root, treenode[i - 1], treenode[spp + m - 2], &m, &n,valyew,place); added[i - 1] = false; sum = 0.0; for (j = 0; j < (n); j++) sum += valyew[j]; if (sum > bestsum) { bestsum = sum; besttoadd = i; memcpy(bestplace, place, nonodes*sizeof(long)); memcpy(bestval, valyew, nonodes*sizeof(double)); } } } } order[m - 1] = besttoadd; memcpy(place, bestplace, nonodes*sizeof(long)); memcpy(valyew, bestval, nonodes*sizeof(double)); shellsort(valyew, place, n); oldfrac = fracinc; oldfdone = fracdone; n1 = 0; for (i = 0; i < (n); i++) { if (valyew[i] <= bestyet || bestyet < 0.0) n1++; } if (n1 > 0) fracinc /= n1; for (i = 0; i < (n); i++) { if (valyew[i] <= bestyet || bestyet < 0.0) { current[m - 1] = place[i]; add3(treenode[place[i] - 1], treenode[besttoadd - 1], treenode[spp + m - 2], &root, treenode); added[besttoadd - 1] = true; if (m < spp) addit(m + 1); else { if (valyew[i] < bestyet || bestyet < 0.0) { nextree = 1; bestyet = valyew[i]; } if (nextree <= maxtrees) { memcpy(bestorders[nextree - 1], order, spp*sizeof(long)); memcpy(bestrees[nextree - 1], current, spp*sizeof(long)); } nextree++; firsttime = false; } re_move3(&treenode[besttoadd - 1], &treenode[spp + m - 2], &root, treenode); added[besttoadd - 1] = false; } fracdone += fracinc; } fracinc = oldfrac; fracdone = oldfdone; free(valyew); free(bestval); free(place); free(bestplace); } /* addit */ void reroot(node2 *outgroup) { /* reorients tree, putting outgroup in desired position. */ node2 *p, *q, *newbottom, *oldbottom; if (outgroup->back->index == root->index) return; newbottom = outgroup->back; p = treenode[newbottom->index - 1]->back; while (p->index != root->index) { oldbottom = treenode[p->index - 1]; treenode[p->index - 1] = p; p = oldbottom->back; } p = root->next; q = root->next->next; p->back->back = q->back; q->back->back = p->back; p->back = outgroup; q->back = outgroup->back; outgroup->back->back = root->next->next; outgroup->back = root->next; treenode[newbottom->index - 1] = newbottom; } /* reroot */ void describe() { /* prints ancestors, steps and table of numbers of steps in each character */ if (stepbox) { putc('\n', outfile); writesteps(weights, numsteps); } if (questions && (!noroot || didreroot)) guesstates(guess); if (ancseq) { hypstates(fullset, full, noroot, didreroot, root, wagner, zeroanc, oneanc, treenode, guess, garbage); putc('\n', outfile); } if (trout) { col = 0; treeout2(root, &col, root); } } /* describe */ void maketree() { /* tree construction recursively by branch and bound */ long i, j, k; node2 *dummy; fullset = (1L << (bits + 1)) - (1L << 1); if (progress) { printf("\nHow many\n"); printf("trees looked Approximate\n"); printf("at so far Length of How many percentage\n"); printf("(multiples shortest tree trees this long searched\n"); printf("of %4ld): found so far found so far so far\n", howoften); printf("---------- ------------ ------------ ------------\n"); #ifdef WIN32 phyFillScreenColor(); #endif } done = false; mults = 0; examined = 0; nextree = 1; root = treenode[0]; firsttime = true; for (i = 0; i < (spp); i++) added[i] = false; added[0] = true; order[0] = 1; k = 2; fracdone = 0.0; fracinc = 1.0; bestyet = -1.0; addit(k); if (done) { if (progress) { printf("Search broken off! Not guaranteed to\n"); printf(" have found the most parsimonious trees.\n"); } if (treeprint) { fprintf(outfile, "Search broken off! Not guaranteed to\n"); fprintf(outfile, " have found the most parsimonious\n"); fprintf(outfile, " trees, but here is what we found:\n"); } } if (treeprint) { fprintf(outfile, "\nrequires a total of %18.3f\n\n", bestyet); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%5ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i < (spp); i++) added[i] = true; for (i = 0; i <= (nextree - 2); i++) { for (j = k; j <= (spp); j++) add3(treenode[bestrees[i][j - 1] - 1], treenode[bestorders[i][j - 1] - 1], treenode[spp + j - 2], &root, treenode); if (noroot) reroot(treenode[outgrno - 1]); didreroot = (outgropt && noroot); evaluate(root); printree(treeprint, noroot, didreroot, root); describe(); for (j = k - 1; j < (spp); j++) re_move3(&treenode[bestorders[i][j] - 1], &dummy, &root, treenode); } if (progress) { printf("\nOutput written to file \"%s\"\n\n", outfilename); if (trout) printf("Trees also written onto file \"%s\"\n\n", outtreename); } if (ancseq) freegarbage(&garbage); } /* maketree */ int main(int argc, Char *argv[]) { /* Penny's branch-and-bound method */ /* Reads in the number of species, number of characters, options and data. Then finds all most parsimonious trees */ #ifdef MAC argc = 1; /* macsetup("Penny",""); */ argv[0] = "Penny"; #endif init(argc,argv); emboss_getoptions("fpenny", argc, argv); ansi = ANSICRT; firstset = true; garbage = NULL; bits = 8*sizeof(long) - 1; doinit(); for (ith = 1; ith <= msets; ith++) { if(firstset){ if (allsokal && !mixture) fprintf(outfile, "Camin-Sokal parsimony method\n\n"); if (allwagner && !mixture) fprintf(outfile, "Wagner parsimony method\n\n"); } doinput(); if (msets > 1 && !justwts) { fprintf(outfile, "Data set # %ld:\n\n",ith); if (progress) printf("\nData set # %ld:\n",ith); } if (justwts){ if(firstset && mixture && printdata) printmixture(outfile, wagner); fprintf(outfile, "Weights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } else if (mixture && printdata) printmixture(outfile, wagner); if (printdata){ if (weights || justwts) printweights(outfile, 0, chars, weight, "Characters"); if (ancvar) printancestors(outfile, anczero, ancone); } if (ith == 1) firstset = false; maketree(); } FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* Penny's branch-and-bound method */ PHYLIPNEW-3.69.650/src/gendist.c0000664000175000017500000001643511305225544012670 00000000000000#include "phylip.h" /* version 3.6. (c) Copyright 1993-1997 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define epsilong 0.02 /* a small number */ AjPPhyloFreq phylofreq; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void getalleles(void); void inputdata(void); void getinput(void); void makedists(void); void writedists(void); /* function prototypes */ #endif const char* outfilename; AjPFile embossoutfile; long loci, totalleles, df, datasets, ith; long nonodes; long *alleles; phenotype3 *x; double **d; boolean all, cavalli, lower, nei, reynolds, mulsets, firstset, progress; void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr method = NULL; all = true; cavalli = false; lower = false; nei = false; reynolds = false; lower = false; progress = true; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv,"PHYLIPNEW",VERSION); phylofreq = ajAcdGetFrequencies("infile"); method = ajAcdGetListSingle("method"); if(ajStrMatchC(method, "n")) nei = true; else if(ajStrMatchC(method, "c")) cavalli = true; else if(ajStrMatchC(method, "r")) reynolds = true; lower = ajAcdGetBoolean("lower"); progress = ajAcdGetBoolean("progress"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); } /* emboss_getoptions */ void allocrest() { long i; x = (phenotype3 *)Malloc(spp*sizeof(phenotype3)); d = (double **)Malloc(spp*sizeof(double *)); for (i = 0; i < (spp); i++) d[i] = (double *)Malloc(spp*sizeof(double)); alleles = (long *)Malloc(loci*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersfreq(phylofreq, &spp, &loci, &nonodes, 1); allocrest(); } /* doinit */ void getalleles() { long i; if (!firstset) samenumspfreq(phylofreq, &loci, ith); totalleles = 0; for (i = 0; i < loci; i++) { alleles[i] = phylofreq->Allele[i]; totalleles += alleles[i]; } df = totalleles - loci; } /* getalleles */ void inputdata() { /* read allele frequencies */ long i, j, k, m, m1, n; double sum; ajint ipos = 0; for (i = 0; i < spp; i++) x[i] = (phenotype3)Malloc(totalleles*sizeof(double)); for (i = 1; i <= (spp); i++) { initnamefreq(phylofreq,i-1); m = 1; for (j = 1; j <= (loci); j++) { sum = 0.0; n = alleles[j - 1]; for (k = 1; k <= n; k++) { x[i - 1][m - 1] = phylofreq->Data[ipos++]; sum += x[i - 1][m - 1]; if (x[i - 1][m - 1] < 0.0) { printf("\n\nERROR: Locus %ld in species %ld: an allele", j, i); printf(" frequency is negative\n\n"); embExitBad(); } m++; } if (all && fabs(sum - 1.0) > epsilong) { printf( "\n\nERROR: Locus %ld in species %ld: frequencies do not add up to 1\n\n", j, i); for (m1 = 1; m1 <= n; m1 += 1) { if (m1 == 1) printf("%f", x[i-1][m-n+m1-2]); else { if ((m1 % 8) == 1) printf("\n"); printf("+%f", x[i-1][m-n+m1-2]); } } printf(" = %f\n\n", sum); embExitBad(); } if (!all) { x[i - 1][m - 1] = 1.0 - sum; if (x[i-1][m-1] < -epsilong) { printf("\n\nERROR: Locus %ld in species %ld: ",j,i); printf("frequencies add up to more than 1\n\n"); for (m1 = 1; m1 <= n; m1 += 1) { if (m1 == 1) printf("%f", x[i-1][m-n+m1-2]); else { if ((m1 % 8) == 1) printf("\n"); printf("+%f", x[i-1][m-n+m1-2]); } } printf(" = %f\n\n", sum); embExitBad(); } m++; } } } } /* inputdata */ void getinput() { /* read the input data */ getalleles(); inputdata(); } /* getinput */ void makedists() { long i, j, k; double s, s1, s2, s3, f; double TEMP; if (progress) printf("Distances calculated for species\n"); for (i = 0; i < spp; i++) d[i][i] = 0.0; for (i = 1; i <= spp; i++) { if (progress) { #ifdef WIN32 phyFillScreenColor(); #endif printf(" "); for (j = 0; j < nmlngth; j++) putchar(nayme[i - 1][j]); printf(" "); } for (j = 0; j <= i - 1; j++) { if (cavalli) { s = 0.0; for (k = 0; k < (totalleles); k++) { f = x[i - 1][k] * x[j][k]; if (f > 0.0) s += sqrt(f); } d[i - 1][j] = 4 * (loci - s) / df; } if (nei) { s1 = 0.0; s2 = 0.0; s3 = 0.0; for (k = 0; k < (totalleles); k++) { s1 += x[i - 1][k] * x[j][k]; TEMP = x[i - 1][k]; s2 += TEMP * TEMP; TEMP = x[j][k]; s3 += TEMP * TEMP; } if (s1 <= 1.0e-20) { d[i - 1][j] = -1.0; printf("\nWARNING: INFINITE DISTANCE BETWEEN SPECIES "); printf("%ld AND %ld; -1.0 WAS WRITTEN\n", i, j); } else d[i - 1][j] = fabs(-log(s1 / sqrt(s2 * s3))); } if (reynolds) { s1 = 0.0; s2 = 0.0; for (k = 0; k < (totalleles); k++) { TEMP = x[i - 1][k] - x[j][k]; s1 += TEMP * TEMP; s2 += x[i - 1][k] * x[j][k]; } d[i - 1][j] = s1 / (loci * 2 - 2 * s2); } if (progress) { putchar('.'); fflush(stdout); } d[j][i - 1] = d[i - 1][j]; } if (progress) { putchar('\n'); fflush(stdout); } } if (progress) { putchar('\n'); fflush(stdout); } } /* makedists */ void writedists() { long i, j, k; fprintf(outfile, "%5ld\n", spp); for (i = 0; i < (spp); i++) { for (j = 0; j < nmlngth; j++) putc(nayme[i][j], outfile); if (lower) k = i; else k = spp; for (j = 1; j <= k; j++) { if (d[i][j-1] < 100.0) fprintf(outfile, "%10.6f", d[i][j-1]); else if (d[i][j-1] < 1000.0) fprintf(outfile, " %10.6f", d[i][j-1]); else fprintf(outfile, " %11.6f", d[i][j-1]); if ((j + 1) % 7 == 0 && j < k) putc('\n', outfile); } putc('\n', outfile); } if (progress) printf("Distances written to file \"%s\"\n\n", outfilename); } /* writedists */ int main(int argc, Char *argv[]) { /* main program */ #ifdef MAC argc = 1; /* macsetup("Gendist",""); */ argv[0] = "Gendist"; #endif init(argc, argv); emboss_getoptions("fgendist",argc,argv); ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; doinit(); for (ith = 1; ith <= (datasets); ith++) { getinput(); firstset = false; if ((datasets > 1) && progress) printf("\nData set # %ld:\n\n",ith); makedists(); writedists(); } FClose(infile); FClose(outfile); #ifdef MAC fixmacfile(outfilename); #endif printf("Done.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } PHYLIPNEW-3.69.650/src/Makefile.in0000664000175000017500000020050612171071677013136 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ bin_PROGRAMS = fclique$(EXEEXT) fconsense$(EXEEXT) fcontml$(EXEEXT) \ fcontrast$(EXEEXT) fdnacomp$(EXEEXT) fdnadist$(EXEEXT) \ fdnainvar$(EXEEXT) fdnaml$(EXEEXT) fdnamlk$(EXEEXT) \ fdnamove$(EXEEXT) fdnapars$(EXEEXT) fdnapenny$(EXEEXT) \ fdolmove$(EXEEXT) fdollop$(EXEEXT) fdolpenny$(EXEEXT) \ fdrawgram$(EXEEXT) fdrawtree$(EXEEXT) ffactor$(EXEEXT) \ ffitch$(EXEEXT) fgendist$(EXEEXT) fkitsch$(EXEEXT) \ fmix$(EXEEXT) fmove$(EXEEXT) fneighbor$(EXEEXT) fpars$(EXEEXT) \ fpenny$(EXEEXT) fproml$(EXEEXT) fpromlk$(EXEEXT) \ fprotdist$(EXEEXT) fprotpars$(EXEEXT) frestdist$(EXEEXT) \ frestml$(EXEEXT) fretree$(EXEEXT) fdiscboot$(EXEEXT) \ ffreqboot$(EXEEXT) frestboot$(EXEEXT) fseqboot$(EXEEXT) \ fseqbootall$(EXEEXT) ftreedist$(EXEEXT) ftreedistpair$(EXEEXT) subdir = src DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(srcdir)/config.h.in $(top_srcdir)/depcomp ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" PROGRAMS = $(bin_PROGRAMS) am_fclique_OBJECTS = clique.$(OBJEXT) disc.$(OBJEXT) phylip.$(OBJEXT) fclique_OBJECTS = $(am_fclique_OBJECTS) fclique_LDADD = $(LDADD) am__DEPENDENCIES_1 = @LOCALLINK_FALSE@fclique_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fclique_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fconsense_OBJECTS = consense.$(OBJEXT) cons.$(OBJEXT) \ phylip.$(OBJEXT) fconsense_OBJECTS = $(am_fconsense_OBJECTS) fconsense_LDADD = $(LDADD) @LOCALLINK_FALSE@fconsense_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fconsense_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fcontml_OBJECTS = contml.$(OBJEXT) cont.$(OBJEXT) phylip.$(OBJEXT) fcontml_OBJECTS = $(am_fcontml_OBJECTS) fcontml_LDADD = $(LDADD) @LOCALLINK_FALSE@fcontml_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fcontml_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fcontrast_OBJECTS = contrast.$(OBJEXT) cont.$(OBJEXT) \ phylip.$(OBJEXT) fcontrast_OBJECTS = $(am_fcontrast_OBJECTS) fcontrast_LDADD = $(LDADD) @LOCALLINK_FALSE@fcontrast_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fcontrast_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdiscboot_OBJECTS = discboot.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fdiscboot_OBJECTS = $(am_fdiscboot_OBJECTS) fdiscboot_LDADD = $(LDADD) @LOCALLINK_FALSE@fdiscboot_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdiscboot_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnacomp_OBJECTS = dnacomp.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) fdnacomp_OBJECTS = $(am_fdnacomp_OBJECTS) fdnacomp_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnacomp_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnacomp_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnadist_OBJECTS = dnadist.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) fdnadist_OBJECTS = $(am_fdnadist_OBJECTS) fdnadist_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnadist_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnadist_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnainvar_OBJECTS = dnainvar.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fdnainvar_OBJECTS = $(am_fdnainvar_OBJECTS) fdnainvar_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnainvar_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnainvar_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnaml_OBJECTS = dnaml.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) fdnaml_OBJECTS = $(am_fdnaml_OBJECTS) fdnaml_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnaml_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnaml_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnamlk_OBJECTS = dnamlk.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) \ printree.$(OBJEXT) mlclock.$(OBJEXT) fdnamlk_OBJECTS = $(am_fdnamlk_OBJECTS) fdnamlk_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnamlk_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnamlk_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnamove_OBJECTS = dnamove.$(OBJEXT) moves.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fdnamove_OBJECTS = $(am_fdnamove_OBJECTS) fdnamove_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnamove_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnamove_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnapars_OBJECTS = dnapars.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) fdnapars_OBJECTS = $(am_fdnapars_OBJECTS) fdnapars_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnapars_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnapars_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdnapenny_OBJECTS = dnapenny.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fdnapenny_OBJECTS = $(am_fdnapenny_OBJECTS) fdnapenny_LDADD = $(LDADD) @LOCALLINK_FALSE@fdnapenny_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdnapenny_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdollop_OBJECTS = dollop.$(OBJEXT) disc.$(OBJEXT) dollo.$(OBJEXT) \ phylip.$(OBJEXT) fdollop_OBJECTS = $(am_fdollop_OBJECTS) fdollop_LDADD = $(LDADD) @LOCALLINK_FALSE@fdollop_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdollop_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdolmove_OBJECTS = dolmove.$(OBJEXT) disc.$(OBJEXT) moves.$(OBJEXT) \ dollo.$(OBJEXT) phylip.$(OBJEXT) fdolmove_OBJECTS = $(am_fdolmove_OBJECTS) fdolmove_LDADD = $(LDADD) @LOCALLINK_FALSE@fdolmove_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdolmove_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdolpenny_OBJECTS = dolpenny.$(OBJEXT) disc.$(OBJEXT) \ dollo.$(OBJEXT) phylip.$(OBJEXT) fdolpenny_OBJECTS = $(am_fdolpenny_OBJECTS) fdolpenny_LDADD = $(LDADD) @LOCALLINK_FALSE@fdolpenny_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdolpenny_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdrawgram_OBJECTS = drawgram.$(OBJEXT) draw.$(OBJEXT) \ draw2.$(OBJEXT) phylip.$(OBJEXT) fdrawgram_OBJECTS = $(am_fdrawgram_OBJECTS) fdrawgram_LDADD = $(LDADD) @LOCALLINK_FALSE@fdrawgram_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdrawgram_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fdrawtree_OBJECTS = drawtree.$(OBJEXT) draw.$(OBJEXT) \ draw2.$(OBJEXT) phylip.$(OBJEXT) fdrawtree_OBJECTS = $(am_fdrawtree_OBJECTS) fdrawtree_LDADD = $(LDADD) @LOCALLINK_FALSE@fdrawtree_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fdrawtree_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_ffactor_OBJECTS = factor.$(OBJEXT) phylip.$(OBJEXT) ffactor_OBJECTS = $(am_ffactor_OBJECTS) ffactor_LDADD = $(LDADD) @LOCALLINK_FALSE@ffactor_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@ffactor_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_ffitch_OBJECTS = fitch.$(OBJEXT) dist.$(OBJEXT) phylip.$(OBJEXT) ffitch_OBJECTS = $(am_ffitch_OBJECTS) ffitch_LDADD = $(LDADD) @LOCALLINK_FALSE@ffitch_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@ffitch_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_ffreqboot_OBJECTS = freqboot.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) ffreqboot_OBJECTS = $(am_ffreqboot_OBJECTS) ffreqboot_LDADD = $(LDADD) @LOCALLINK_FALSE@ffreqboot_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@ffreqboot_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fgendist_OBJECTS = gendist.$(OBJEXT) phylip.$(OBJEXT) fgendist_OBJECTS = $(am_fgendist_OBJECTS) fgendist_LDADD = $(LDADD) @LOCALLINK_FALSE@fgendist_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fgendist_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fkitsch_OBJECTS = kitsch.$(OBJEXT) dist.$(OBJEXT) phylip.$(OBJEXT) fkitsch_OBJECTS = $(am_fkitsch_OBJECTS) fkitsch_LDADD = $(LDADD) @LOCALLINK_FALSE@fkitsch_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fkitsch_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fmix_OBJECTS = mix.$(OBJEXT) disc.$(OBJEXT) wagner.$(OBJEXT) \ phylip.$(OBJEXT) fmix_OBJECTS = $(am_fmix_OBJECTS) fmix_LDADD = $(LDADD) @LOCALLINK_FALSE@fmix_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fmix_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fmove_OBJECTS = move.$(OBJEXT) disc.$(OBJEXT) moves.$(OBJEXT) \ wagner.$(OBJEXT) phylip.$(OBJEXT) fmove_OBJECTS = $(am_fmove_OBJECTS) fmove_LDADD = $(LDADD) @LOCALLINK_FALSE@fmove_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fmove_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fneighbor_OBJECTS = neighbor.$(OBJEXT) dist.$(OBJEXT) \ phylip.$(OBJEXT) fneighbor_OBJECTS = $(am_fneighbor_OBJECTS) fneighbor_LDADD = $(LDADD) @LOCALLINK_FALSE@fneighbor_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fneighbor_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fpars_OBJECTS = pars.$(OBJEXT) discrete.$(OBJEXT) phylip.$(OBJEXT) fpars_OBJECTS = $(am_fpars_OBJECTS) fpars_LDADD = $(LDADD) @LOCALLINK_FALSE@fpars_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fpars_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fpenny_OBJECTS = penny.$(OBJEXT) disc.$(OBJEXT) wagner.$(OBJEXT) \ phylip.$(OBJEXT) fpenny_OBJECTS = $(am_fpenny_OBJECTS) fpenny_LDADD = $(LDADD) @LOCALLINK_FALSE@fpenny_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fpenny_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fproml_OBJECTS = proml.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) fproml_OBJECTS = $(am_fproml_OBJECTS) fproml_LDADD = $(LDADD) @LOCALLINK_FALSE@fproml_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fproml_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fpromlk_OBJECTS = promlk.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) \ printree.$(OBJEXT) mlclock.$(OBJEXT) fpromlk_OBJECTS = $(am_fpromlk_OBJECTS) fpromlk_LDADD = $(LDADD) @LOCALLINK_FALSE@fpromlk_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fpromlk_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fprotdist_OBJECTS = protdist.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fprotdist_OBJECTS = $(am_fprotdist_OBJECTS) fprotdist_LDADD = $(LDADD) @LOCALLINK_FALSE@fprotdist_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fprotdist_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fprotpars_OBJECTS = protpars.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fprotpars_OBJECTS = $(am_fprotpars_OBJECTS) fprotpars_LDADD = $(LDADD) @LOCALLINK_FALSE@fprotpars_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fprotpars_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_frestboot_OBJECTS = restboot.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) frestboot_OBJECTS = $(am_frestboot_OBJECTS) frestboot_LDADD = $(LDADD) @LOCALLINK_FALSE@frestboot_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@frestboot_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_frestdist_OBJECTS = restdist.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) frestdist_OBJECTS = $(am_frestdist_OBJECTS) frestdist_LDADD = $(LDADD) @LOCALLINK_FALSE@frestdist_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@frestdist_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_frestml_OBJECTS = restml.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) frestml_OBJECTS = $(am_frestml_OBJECTS) frestml_LDADD = $(LDADD) @LOCALLINK_FALSE@frestml_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@frestml_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fretree_OBJECTS = retree.$(OBJEXT) moves.$(OBJEXT) phylip.$(OBJEXT) fretree_OBJECTS = $(am_fretree_OBJECTS) fretree_LDADD = $(LDADD) @LOCALLINK_FALSE@fretree_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fretree_DEPENDENCIES = ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fseqboot_OBJECTS = seqboot.$(OBJEXT) seq.$(OBJEXT) phylip.$(OBJEXT) fseqboot_OBJECTS = $(am_fseqboot_OBJECTS) fseqboot_LDADD = $(LDADD) @LOCALLINK_FALSE@fseqboot_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fseqboot_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_fseqbootall_OBJECTS = seqbootall.$(OBJEXT) seq.$(OBJEXT) \ phylip.$(OBJEXT) fseqbootall_OBJECTS = $(am_fseqbootall_OBJECTS) fseqbootall_LDADD = $(LDADD) @LOCALLINK_FALSE@fseqbootall_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@fseqbootall_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_ftreedist_OBJECTS = treedist.$(OBJEXT) cons.$(OBJEXT) \ phylip.$(OBJEXT) ftreedist_OBJECTS = $(am_ftreedist_OBJECTS) ftreedist_LDADD = $(LDADD) @LOCALLINK_FALSE@ftreedist_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@ftreedist_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) am_ftreedistpair_OBJECTS = treedistpair.$(OBJEXT) cons.$(OBJEXT) \ phylip.$(OBJEXT) ftreedistpair_OBJECTS = $(am_ftreedistpair_OBJECTS) ftreedistpair_LDADD = $(LDADD) @LOCALLINK_FALSE@ftreedistpair_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @LOCALLINK_FALSE@ $(am__DEPENDENCIES_1) @LOCALLINK_TRUE@ftreedistpair_DEPENDENCIES = \ @LOCALLINK_TRUE@ ../../../nucleus/libnucleus.la \ @LOCALLINK_TRUE@ ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la \ @LOCALLINK_TRUE@ $(am__DEPENDENCIES_1) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(fclique_SOURCES) $(fconsense_SOURCES) $(fcontml_SOURCES) \ $(fcontrast_SOURCES) $(fdiscboot_SOURCES) $(fdnacomp_SOURCES) \ $(fdnadist_SOURCES) $(fdnainvar_SOURCES) $(fdnaml_SOURCES) \ $(fdnamlk_SOURCES) $(fdnamove_SOURCES) $(fdnapars_SOURCES) \ $(fdnapenny_SOURCES) $(fdollop_SOURCES) $(fdolmove_SOURCES) \ $(fdolpenny_SOURCES) $(fdrawgram_SOURCES) $(fdrawtree_SOURCES) \ $(ffactor_SOURCES) $(ffitch_SOURCES) $(ffreqboot_SOURCES) \ $(fgendist_SOURCES) $(fkitsch_SOURCES) $(fmix_SOURCES) \ $(fmove_SOURCES) $(fneighbor_SOURCES) $(fpars_SOURCES) \ $(fpenny_SOURCES) $(fproml_SOURCES) $(fpromlk_SOURCES) \ $(fprotdist_SOURCES) $(fprotpars_SOURCES) $(frestboot_SOURCES) \ $(frestdist_SOURCES) $(frestml_SOURCES) $(fretree_SOURCES) \ $(fseqboot_SOURCES) $(fseqbootall_SOURCES) \ $(ftreedist_SOURCES) $(ftreedistpair_SOURCES) DIST_SOURCES = $(fclique_SOURCES) $(fconsense_SOURCES) \ $(fcontml_SOURCES) $(fcontrast_SOURCES) $(fdiscboot_SOURCES) \ $(fdnacomp_SOURCES) $(fdnadist_SOURCES) $(fdnainvar_SOURCES) \ $(fdnaml_SOURCES) $(fdnamlk_SOURCES) $(fdnamove_SOURCES) \ $(fdnapars_SOURCES) $(fdnapenny_SOURCES) $(fdollop_SOURCES) \ $(fdolmove_SOURCES) $(fdolpenny_SOURCES) $(fdrawgram_SOURCES) \ $(fdrawtree_SOURCES) $(ffactor_SOURCES) $(ffitch_SOURCES) \ $(ffreqboot_SOURCES) $(fgendist_SOURCES) $(fkitsch_SOURCES) \ $(fmix_SOURCES) $(fmove_SOURCES) $(fneighbor_SOURCES) \ $(fpars_SOURCES) $(fpenny_SOURCES) $(fproml_SOURCES) \ $(fpromlk_SOURCES) $(fprotdist_SOURCES) $(fprotpars_SOURCES) \ $(frestboot_SOURCES) $(frestdist_SOURCES) $(frestml_SOURCES) \ $(fretree_SOURCES) $(fseqboot_SOURCES) $(fseqbootall_SOURCES) \ $(ftreedist_SOURCES) $(ftreedistpair_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ @ESYSTEMLIBS_FALSE@LLINCLUDES = -I../../../ajax/expat -I../../../ajax/zlib @ESYSTEMLIBS_FALSE@NLINCLUDES = -I${embprefix}/include/ezlib -I${embprefix}/include/eexpat @ESYSTEMLIBS_FALSE@LLAIXLIBS = -L../../../ajax/expat/.libs -L../../../ajax/zlib/.libs @ESYSTEMLIBS_FALSE@NLAIXLIBS = -leexpat -lezlib @ESYSTEMLIBS_FALSE@LLADD = ../../../ajax/expat/libeexpat.la ../../../ajax/zlib/libezlib.la @ESYSTEMLIBS_FALSE@NLADD = -leexpat -lezlib @LOCALLINK_FALSE@AM_CPPFLAGS = -I../include -I${embprefix}/include \ @LOCALLINK_FALSE@ -I${embprefix}/include/eplplot \ @LOCALLINK_FALSE@ $(NLINCLUDES) \ @LOCALLINK_FALSE@ -I${embprefix}/include/epcre @LOCALLINK_TRUE@AM_CPPFLAGS = -I../include -I../../../nucleus -I../../../ajax/pcre \ @LOCALLINK_TRUE@ $(LLINCLUDES) \ @LOCALLINK_TRUE@ -I../../../ajax/core -I../../../ajax/graphics \ @LOCALLINK_TRUE@ -I../../../ajax/ensembl -I../../../ajax/ajaxdb \ @LOCALLINK_TRUE@ -I../../../ajax/acd -I../../../plplot @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_FALSE@AIX_CFLAGS = -Wl,-bdynamic -Wl,-brtl -L${embprefix}/lib -lnucleus -lacd \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_FALSE@-lajaxdb -lensembl -lajaxg -lajax -lepcre $(NLAIXLIBS) -leplplot @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@AIX_CFLAGS = -Wl,-bdynamic -Wl,-brtl -L../../../plplot/.libs \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@-L../../../ajax/pcre/.libs $(LLAIXLIBS) \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@-L../../../ajax/core/.libs \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@-L../../../ajax/graphics/.libs -L../../../ajax/ensembl/.libs \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@-L../../../ajax/ajaxdb/.libs -L../../../ajax/acd/.libs \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@-L../../../nucleus/.libs \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@-lnucleus -lacd -lajaxdb -lensembl -lajaxg -lajax -lepcre \ @ISAIXIA64_TRUE@@ISSHARED_TRUE@@LOCALLINK_TRUE@ $(NLAIXLIBS) -leplplot AM_CFLAGS = $(AIX_CFLAGS) $(WARN_CFLAGS) $(DEVWARN_CFLAGS) fclique_SOURCES = clique.c disc.c phylip.c fconsense_SOURCES = consense.c cons.c phylip.c fcontml_SOURCES = contml.c cont.c phylip.c fcontrast_SOURCES = contrast.c cont.c phylip.c fdnacomp_SOURCES = dnacomp.c seq.c phylip.c fdnadist_SOURCES = dnadist.c seq.c phylip.c fdnainvar_SOURCES = dnainvar.c seq.c phylip.c fdnaml_SOURCES = dnaml.c seq.c phylip.c fdnamlk_SOURCES = dnamlk.c seq.c phylip.c printree.c mlclock.c fdnamove_SOURCES = dnamove.c moves.c seq.c phylip.c fdnapenny_SOURCES = dnapenny.c seq.c phylip.c fdnapars_SOURCES = dnapars.c seq.c phylip.c fdolmove_SOURCES = dolmove.c disc.c moves.c dollo.c phylip.c fdollop_SOURCES = dollop.c disc.c dollo.c phylip.c fdolpenny_SOURCES = dolpenny.c disc.c dollo.c phylip.c fdrawgram_SOURCES = drawgram.c draw.c draw2.c phylip.c fdrawtree_SOURCES = drawtree.c draw.c draw2.c phylip.c ffactor_SOURCES = factor.c phylip.c ffitch_SOURCES = fitch.c dist.c phylip.c fgendist_SOURCES = gendist.c phylip.c fkitsch_SOURCES = kitsch.c dist.c phylip.c fmix_SOURCES = mix.c disc.c wagner.c phylip.c fmove_SOURCES = move.c disc.c moves.c wagner.c phylip.c fneighbor_SOURCES = neighbor.c dist.c phylip.c fpars_SOURCES = pars.c discrete.c phylip.c fpenny_SOURCES = penny.c disc.c wagner.c phylip.c fproml_SOURCES = proml.c seq.c phylip.c fpromlk_SOURCES = promlk.c seq.c phylip.c printree.c mlclock.c fprotdist_SOURCES = protdist.c seq.c phylip.c fprotpars_SOURCES = protpars.c seq.c phylip.c frestdist_SOURCES = restdist.c seq.c phylip.c frestml_SOURCES = restml.c seq.c phylip.c fretree_SOURCES = retree.c moves.c phylip.c ftreedist_SOURCES = treedist.c cons.c phylip.c ftreedistpair_SOURCES = treedistpair.c cons.c phylip.c fdiscboot_SOURCES = discboot.c seq.c phylip.c ffreqboot_SOURCES = freqboot.c seq.c phylip.c frestboot_SOURCES = restboot.c seq.c phylip.c fseqboot_SOURCES = seqboot.c seq.c phylip.c fseqbootall_SOURCES = seqbootall.c seq.c phylip.c @LOCALLINK_FALSE@LDADD = -L${embprefix}/lib -lnucleus -lacd -lajaxdb -lensembl -lajaxg \ @LOCALLINK_FALSE@ -lajax -lepcre $(NLADD) -leplplot $(XLIB) @LOCALLINK_TRUE@LDADD = ../../../nucleus/libnucleus.la ../../../ajax/acd/libacd.la \ @LOCALLINK_TRUE@ ../../../ajax/ajaxdb/libajaxdb.la \ @LOCALLINK_TRUE@ ../../../ajax/ensembl/libensembl.la \ @LOCALLINK_TRUE@ ../../../ajax/graphics/libajaxg.la \ @LOCALLINK_TRUE@ ../../../ajax/core/libajax.la \ @LOCALLINK_TRUE@ $(LLADD) \ @LOCALLINK_TRUE@ ../../../ajax/pcre/libepcre.la \ @LOCALLINK_TRUE@ ../../../plplot/libeplplot.la $(XLIB) all: config.h $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu src/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): config.h: stamp-h1 @if test ! -f $@; then rm -f stamp-h1; else :; fi @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status src/config.h $(srcdir)/config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f stamp-h1 touch $@ distclean-hdr: -rm -f config.h stamp-h1 install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p || test -f $$p1; \ then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(bindir)" && rm -f $$files clean-binPROGRAMS: @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list fclique$(EXEEXT): $(fclique_OBJECTS) $(fclique_DEPENDENCIES) $(EXTRA_fclique_DEPENDENCIES) @rm -f fclique$(EXEEXT) $(LINK) $(fclique_OBJECTS) $(fclique_LDADD) $(LIBS) fconsense$(EXEEXT): $(fconsense_OBJECTS) $(fconsense_DEPENDENCIES) $(EXTRA_fconsense_DEPENDENCIES) @rm -f fconsense$(EXEEXT) $(LINK) $(fconsense_OBJECTS) $(fconsense_LDADD) $(LIBS) fcontml$(EXEEXT): $(fcontml_OBJECTS) $(fcontml_DEPENDENCIES) $(EXTRA_fcontml_DEPENDENCIES) @rm -f fcontml$(EXEEXT) $(LINK) $(fcontml_OBJECTS) $(fcontml_LDADD) $(LIBS) fcontrast$(EXEEXT): $(fcontrast_OBJECTS) $(fcontrast_DEPENDENCIES) $(EXTRA_fcontrast_DEPENDENCIES) @rm -f fcontrast$(EXEEXT) $(LINK) $(fcontrast_OBJECTS) $(fcontrast_LDADD) $(LIBS) fdiscboot$(EXEEXT): $(fdiscboot_OBJECTS) $(fdiscboot_DEPENDENCIES) $(EXTRA_fdiscboot_DEPENDENCIES) @rm -f fdiscboot$(EXEEXT) $(LINK) $(fdiscboot_OBJECTS) $(fdiscboot_LDADD) $(LIBS) fdnacomp$(EXEEXT): $(fdnacomp_OBJECTS) $(fdnacomp_DEPENDENCIES) $(EXTRA_fdnacomp_DEPENDENCIES) @rm -f fdnacomp$(EXEEXT) $(LINK) $(fdnacomp_OBJECTS) $(fdnacomp_LDADD) $(LIBS) fdnadist$(EXEEXT): $(fdnadist_OBJECTS) $(fdnadist_DEPENDENCIES) $(EXTRA_fdnadist_DEPENDENCIES) @rm -f fdnadist$(EXEEXT) $(LINK) $(fdnadist_OBJECTS) $(fdnadist_LDADD) $(LIBS) fdnainvar$(EXEEXT): $(fdnainvar_OBJECTS) $(fdnainvar_DEPENDENCIES) $(EXTRA_fdnainvar_DEPENDENCIES) @rm -f fdnainvar$(EXEEXT) $(LINK) $(fdnainvar_OBJECTS) $(fdnainvar_LDADD) $(LIBS) fdnaml$(EXEEXT): $(fdnaml_OBJECTS) $(fdnaml_DEPENDENCIES) $(EXTRA_fdnaml_DEPENDENCIES) @rm -f fdnaml$(EXEEXT) $(LINK) $(fdnaml_OBJECTS) $(fdnaml_LDADD) $(LIBS) fdnamlk$(EXEEXT): $(fdnamlk_OBJECTS) $(fdnamlk_DEPENDENCIES) $(EXTRA_fdnamlk_DEPENDENCIES) @rm -f fdnamlk$(EXEEXT) $(LINK) $(fdnamlk_OBJECTS) $(fdnamlk_LDADD) $(LIBS) fdnamove$(EXEEXT): $(fdnamove_OBJECTS) $(fdnamove_DEPENDENCIES) $(EXTRA_fdnamove_DEPENDENCIES) @rm -f fdnamove$(EXEEXT) $(LINK) $(fdnamove_OBJECTS) $(fdnamove_LDADD) $(LIBS) fdnapars$(EXEEXT): $(fdnapars_OBJECTS) $(fdnapars_DEPENDENCIES) $(EXTRA_fdnapars_DEPENDENCIES) @rm -f fdnapars$(EXEEXT) $(LINK) $(fdnapars_OBJECTS) $(fdnapars_LDADD) $(LIBS) fdnapenny$(EXEEXT): $(fdnapenny_OBJECTS) $(fdnapenny_DEPENDENCIES) $(EXTRA_fdnapenny_DEPENDENCIES) @rm -f fdnapenny$(EXEEXT) $(LINK) $(fdnapenny_OBJECTS) $(fdnapenny_LDADD) $(LIBS) fdollop$(EXEEXT): $(fdollop_OBJECTS) $(fdollop_DEPENDENCIES) $(EXTRA_fdollop_DEPENDENCIES) @rm -f fdollop$(EXEEXT) $(LINK) $(fdollop_OBJECTS) $(fdollop_LDADD) $(LIBS) fdolmove$(EXEEXT): $(fdolmove_OBJECTS) $(fdolmove_DEPENDENCIES) $(EXTRA_fdolmove_DEPENDENCIES) @rm -f fdolmove$(EXEEXT) $(LINK) $(fdolmove_OBJECTS) $(fdolmove_LDADD) $(LIBS) fdolpenny$(EXEEXT): $(fdolpenny_OBJECTS) $(fdolpenny_DEPENDENCIES) $(EXTRA_fdolpenny_DEPENDENCIES) @rm -f fdolpenny$(EXEEXT) $(LINK) $(fdolpenny_OBJECTS) $(fdolpenny_LDADD) $(LIBS) fdrawgram$(EXEEXT): $(fdrawgram_OBJECTS) $(fdrawgram_DEPENDENCIES) $(EXTRA_fdrawgram_DEPENDENCIES) @rm -f fdrawgram$(EXEEXT) $(LINK) $(fdrawgram_OBJECTS) $(fdrawgram_LDADD) $(LIBS) fdrawtree$(EXEEXT): $(fdrawtree_OBJECTS) $(fdrawtree_DEPENDENCIES) $(EXTRA_fdrawtree_DEPENDENCIES) @rm -f fdrawtree$(EXEEXT) $(LINK) $(fdrawtree_OBJECTS) $(fdrawtree_LDADD) $(LIBS) ffactor$(EXEEXT): $(ffactor_OBJECTS) $(ffactor_DEPENDENCIES) $(EXTRA_ffactor_DEPENDENCIES) @rm -f ffactor$(EXEEXT) $(LINK) $(ffactor_OBJECTS) $(ffactor_LDADD) $(LIBS) ffitch$(EXEEXT): $(ffitch_OBJECTS) $(ffitch_DEPENDENCIES) $(EXTRA_ffitch_DEPENDENCIES) @rm -f ffitch$(EXEEXT) $(LINK) $(ffitch_OBJECTS) $(ffitch_LDADD) $(LIBS) ffreqboot$(EXEEXT): $(ffreqboot_OBJECTS) $(ffreqboot_DEPENDENCIES) $(EXTRA_ffreqboot_DEPENDENCIES) @rm -f ffreqboot$(EXEEXT) $(LINK) $(ffreqboot_OBJECTS) $(ffreqboot_LDADD) $(LIBS) fgendist$(EXEEXT): $(fgendist_OBJECTS) $(fgendist_DEPENDENCIES) $(EXTRA_fgendist_DEPENDENCIES) @rm -f fgendist$(EXEEXT) $(LINK) $(fgendist_OBJECTS) $(fgendist_LDADD) $(LIBS) fkitsch$(EXEEXT): $(fkitsch_OBJECTS) $(fkitsch_DEPENDENCIES) $(EXTRA_fkitsch_DEPENDENCIES) @rm -f fkitsch$(EXEEXT) $(LINK) $(fkitsch_OBJECTS) $(fkitsch_LDADD) $(LIBS) fmix$(EXEEXT): $(fmix_OBJECTS) $(fmix_DEPENDENCIES) $(EXTRA_fmix_DEPENDENCIES) @rm -f fmix$(EXEEXT) $(LINK) $(fmix_OBJECTS) $(fmix_LDADD) $(LIBS) fmove$(EXEEXT): $(fmove_OBJECTS) $(fmove_DEPENDENCIES) $(EXTRA_fmove_DEPENDENCIES) @rm -f fmove$(EXEEXT) $(LINK) $(fmove_OBJECTS) $(fmove_LDADD) $(LIBS) fneighbor$(EXEEXT): $(fneighbor_OBJECTS) $(fneighbor_DEPENDENCIES) $(EXTRA_fneighbor_DEPENDENCIES) @rm -f fneighbor$(EXEEXT) $(LINK) $(fneighbor_OBJECTS) $(fneighbor_LDADD) $(LIBS) fpars$(EXEEXT): $(fpars_OBJECTS) $(fpars_DEPENDENCIES) $(EXTRA_fpars_DEPENDENCIES) @rm -f fpars$(EXEEXT) $(LINK) $(fpars_OBJECTS) $(fpars_LDADD) $(LIBS) fpenny$(EXEEXT): $(fpenny_OBJECTS) $(fpenny_DEPENDENCIES) $(EXTRA_fpenny_DEPENDENCIES) @rm -f fpenny$(EXEEXT) $(LINK) $(fpenny_OBJECTS) $(fpenny_LDADD) $(LIBS) fproml$(EXEEXT): $(fproml_OBJECTS) $(fproml_DEPENDENCIES) $(EXTRA_fproml_DEPENDENCIES) @rm -f fproml$(EXEEXT) $(LINK) $(fproml_OBJECTS) $(fproml_LDADD) $(LIBS) fpromlk$(EXEEXT): $(fpromlk_OBJECTS) $(fpromlk_DEPENDENCIES) $(EXTRA_fpromlk_DEPENDENCIES) @rm -f fpromlk$(EXEEXT) $(LINK) $(fpromlk_OBJECTS) $(fpromlk_LDADD) $(LIBS) fprotdist$(EXEEXT): $(fprotdist_OBJECTS) $(fprotdist_DEPENDENCIES) $(EXTRA_fprotdist_DEPENDENCIES) @rm -f fprotdist$(EXEEXT) $(LINK) $(fprotdist_OBJECTS) $(fprotdist_LDADD) $(LIBS) fprotpars$(EXEEXT): $(fprotpars_OBJECTS) $(fprotpars_DEPENDENCIES) $(EXTRA_fprotpars_DEPENDENCIES) @rm -f fprotpars$(EXEEXT) $(LINK) $(fprotpars_OBJECTS) $(fprotpars_LDADD) $(LIBS) frestboot$(EXEEXT): $(frestboot_OBJECTS) $(frestboot_DEPENDENCIES) $(EXTRA_frestboot_DEPENDENCIES) @rm -f frestboot$(EXEEXT) $(LINK) $(frestboot_OBJECTS) $(frestboot_LDADD) $(LIBS) frestdist$(EXEEXT): $(frestdist_OBJECTS) $(frestdist_DEPENDENCIES) $(EXTRA_frestdist_DEPENDENCIES) @rm -f frestdist$(EXEEXT) $(LINK) $(frestdist_OBJECTS) $(frestdist_LDADD) $(LIBS) frestml$(EXEEXT): $(frestml_OBJECTS) $(frestml_DEPENDENCIES) $(EXTRA_frestml_DEPENDENCIES) @rm -f frestml$(EXEEXT) $(LINK) $(frestml_OBJECTS) $(frestml_LDADD) $(LIBS) fretree$(EXEEXT): $(fretree_OBJECTS) $(fretree_DEPENDENCIES) $(EXTRA_fretree_DEPENDENCIES) @rm -f fretree$(EXEEXT) $(LINK) $(fretree_OBJECTS) $(fretree_LDADD) $(LIBS) fseqboot$(EXEEXT): $(fseqboot_OBJECTS) $(fseqboot_DEPENDENCIES) $(EXTRA_fseqboot_DEPENDENCIES) @rm -f fseqboot$(EXEEXT) $(LINK) $(fseqboot_OBJECTS) $(fseqboot_LDADD) $(LIBS) fseqbootall$(EXEEXT): $(fseqbootall_OBJECTS) $(fseqbootall_DEPENDENCIES) $(EXTRA_fseqbootall_DEPENDENCIES) @rm -f fseqbootall$(EXEEXT) $(LINK) $(fseqbootall_OBJECTS) $(fseqbootall_LDADD) $(LIBS) ftreedist$(EXEEXT): $(ftreedist_OBJECTS) $(ftreedist_DEPENDENCIES) $(EXTRA_ftreedist_DEPENDENCIES) @rm -f ftreedist$(EXEEXT) $(LINK) $(ftreedist_OBJECTS) $(ftreedist_LDADD) $(LIBS) ftreedistpair$(EXEEXT): $(ftreedistpair_OBJECTS) $(ftreedistpair_DEPENDENCIES) $(EXTRA_ftreedistpair_DEPENDENCIES) @rm -f ftreedistpair$(EXEEXT) $(LINK) $(ftreedistpair_OBJECTS) $(ftreedistpair_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clique.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cons.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/consense.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cont.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/contml.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/contrast.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/disc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/discboot.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/discrete.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnacomp.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnadist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnainvar.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnaml.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnamlk.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnamove.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnapars.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dnapenny.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dollo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dollop.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dolmove.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dolpenny.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/draw.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/draw2.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/drawgram.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/drawtree.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/factor.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fitch.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freqboot.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gendist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kitsch.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mix.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mlclock.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/move.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/moves.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/neighbor.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pars.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/penny.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phylip.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/printree.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proml.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/promlk.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/protdist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/protpars.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/restboot.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/restdist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/restml.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/retree.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seq.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seqboot.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seqbootall.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/treedist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/treedistpair.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wagner.Po@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c $< .c.obj: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: $(HEADERS) $(SOURCES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(PROGRAMS) config.h installdirs: for dir in "$(DESTDIR)$(bindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-binPROGRAMS install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -rf ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-binPROGRAMS .MAKE: all install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ clean-generic clean-libtool cscopelist ctags distclean \ distclean-compile distclean-generic distclean-hdr \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-binPROGRAMS \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-binPROGRAMS # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/src/dnapars.c0000664000175000017500000012645211616234204012662 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6 (c) Copyright 1993-2002 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ #define MAXNUMTREES 1000000 /* bigger than number of user trees can be */ extern sequence y; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void reallocchars(void); void doinit(void); void makeweights(void); void doinput(void); void initdnaparsnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void evaluate(node *); void tryadd(node *, node *, node *); void addpreorder(node *, node *, node *); void trydescendants(node *, node *, node *, node *, boolean); void trylocal(node *, node *); void trylocal2(node *, node *, node *); void tryrearr(node *, boolean *); void repreorder(node *, boolean *); void rearrange(node **); void describe(void); void dnapars_coordinates(node *, double, long *, double *); void dnapars_printree(void); void globrearrange(void); void grandrearr(void); void maketree(void); void freerest(void); void load_tree(long treei); /* function prototypes */ #endif Char infilename[FNMLNGTH], intreename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; char basechar[32]="ACMGRSVTWYHKDBNO???????????????"; node *root; long chars, col, msets, ith, njumble, jumb, maxtrees, numtrees; /* chars = number of sites in actual sequences */ long inseed, inseed0; double threshold; boolean jumble, usertree, thresh, weights, thorough, rearrfirst, trout, progress, stepbox, ancseq, mulsets, justwts, firstset, mulf, multf; steptr oldweight; longer seed; pointarray treenode; /* pointers to all nodes in tree */ long *enterorder; long *zeros; /* local variables for Pascal maketree, propagated globally for C version: */ long minwhich; double like, minsteps, bestyet, bestlike, bstlike2; boolean lastrearr, recompute; double nsteps[maxuser]; long **fsteps; node *there, *oldnufork; long *place; bestelm *bestrees; long *threshwt; baseptr nothing; gbases *garbage; node *temp, *temp1, *temp2, *tempsum, *temprm, *tempadd, *tempf, *tmp, *tmp1, *tmp2, *tmp3, *tmprm, *tmpadd; boolean *names; node *grbg; char *progname; void emboss_getoptions(char *pgm, int argc, char *argv[]) { jumble = false; njumble = 1; outgrno = 1; outgropt = false; thresh = false; thorough = true; transvp = false; rearrfirst = false; maxtrees = 10000; trout = true; usertree = false; weights = false; mulsets = false; printdata = false; progress = true; treeprint = true; stepbox = false; ancseq = false; dotdiff = true; msets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees){ numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; } numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; msets = numseqs; } else if (numwts > 1) { mulsets = true; msets = numwts; justwts = true; } progress = ajAcdGetBoolean("progress"); printdata = ajAcdGetBoolean("printdata"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); if(!usertree) { thorough = ajAcdGetToggle("thorough"); if(thorough) rearrfirst = ajAcdGetBoolean("rearrange"); maxtrees = ajAcdGetInt("maxtrees"); njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; thresh = ajAcdGetToggle("dothreshold"); if(thresh) threshold = ajAcdGetFloat("threshold"); stepbox = ajAcdGetBoolean("stepbox"); ancseq = ajAcdGetBoolean("ancseq"); transvp = ajAcdGetBoolean("transversion"); if (ancseq || printdata) ajAcdGetBoolean("dotdiff"); embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nDNA parsimony algorithm, version %s\n\n",VERSION); if (transvp) fprintf(outfile, "Transversion parsimony\n\n"); } /* getoptions */ void allocrest() { long i; y = (Char **)Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *)Malloc(chars*sizeof(Char)); bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm)); for (i = 1; i <= maxtrees; i++) bestrees[i - 1].btree = (long *)Malloc(nonodes*sizeof(long)); nayme = (naym *)Malloc(spp*sizeof(naym)); enterorder = (long *)Malloc(spp*sizeof(long)); place = (long *)Malloc(nonodes*sizeof(long)); weight = (long *)Malloc(chars*sizeof(long)); oldweight = (long *)Malloc(chars*sizeof(long)); alias = (long *)Malloc(chars*sizeof(long)); ally = (long *)Malloc(chars*sizeof(long)); location = (long *)Malloc(chars*sizeof(long)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &chars, &nonodes, 1); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars); alloctree(&treenode, nonodes, usertree); } /* doinit */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= chars; i++) { alias[i - 1] = i; oldweight[i - 1] = weight[i - 1]; ally[i - 1] = i; } sitesort(chars, weight); sitecombine(chars); sitescrunch(chars); endsite = 0; for (i = 1; i <= chars; i++) { if (ally[i - 1] == i) endsite++; } for (i = 1; i <= endsite; i++) location[alias[i - 1] - 1] = i; if (!thresh) threshold = spp; threshwt = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) { weight[i] *= 10; threshwt[i] = (long)(threshold * weight[i] + 0.5); } zeros = (long *)Malloc(endsite*sizeof(long)); for (i = 0; i < endsite; i++) zeros[i] = 0; } /* makeweights */ void doinput() { /* reads the input data */ long i; if (justwts) { if (firstset) seq_inputdata(seqsets[0],chars); for (i = 0; i < chars; i++) weight[i] = 1; inputweightsstr(phyloweights->Str[0], chars, weight, &weights); if (justwts) { fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); if (progress) printf("\nWeights set # %ld:\n\n", ith); } if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } else { if (!firstset){ samenumspseq(seqsets[ith-1], &chars, ith); reallocchars(); } seq_inputdata(seqsets[ith-1], chars); for (i = 0; i < chars; i++) weight[i] = 1; if (weights) { inputweightsstr(phyloweights->Str[0], chars, weight, &weights); if (printdata) printweights(outfile, 0, chars, weight, "Sites"); } } makeweights(); makevalues(treenode, zeros, usertree); if (!usertree) { allocnode(&temp, zeros, endsite); allocnode(&temp1, zeros, endsite); allocnode(&temp2, zeros, endsite); allocnode(&tempsum, zeros, endsite); allocnode(&temprm, zeros, endsite); allocnode(&tempadd, zeros, endsite); allocnode(&tempf, zeros, endsite); allocnode(&tmp, zeros, endsite); allocnode(&tmp1, zeros, endsite); allocnode(&tmp2, zeros, endsite); allocnode(&tmp3, zeros, endsite); allocnode(&tmprm, zeros, endsite); allocnode(&tmpadd, zeros, endsite); } } /* doinput */ void initdnaparsnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnutreenode(grbg, p, nodei, endsite, zeros); treenode[nodei - 1] = *p; break; case nonbottom: gnutreenode(grbg, p, nodei, endsite, zeros); break; case tip: match_names_to_data (str, treenode, p, spp); break; case length: /* if there is a length, read it and discard value */ processlength(&valyew, &divisor, ch, &minusread, treestr, parens); break; default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/ break; } } /* initdnaparsnode */ void evaluate(node *r) { /* determines the number of steps needed for a tree. this is the minimum number of steps needed to evolve sequences on this tree */ long i, steps; long term; double sum; sum = 0.0; for (i = 0; i < endsite; i++) { steps = r->numsteps[i]; if ((long)steps <= threshwt[i]) term = steps; else term = threshwt[i]; sum += (double)term; if (usertree && which <= maxuser) fsteps[which - 1][i] = term; } if (usertree && which <= maxuser) { nsteps[which - 1] = sum; if (which == 1) { minwhich = 1; minsteps = sum; } else if (sum < minsteps) { minwhich = which; minsteps = sum; } } like = -sum; } /* evaluate */ void tryadd(node *p, node *item, node *nufork) { /* temporarily adds one fork and one tip to the tree. if the location where they are added yields greater "likelihood" than other locations tested up to that time, then keeps that location as there */ long pos; double belowsum, parentsum; boolean found, collapse, changethere, trysave; if (!p->tip) { memcpy(temp->base, p->base, endsite*sizeof(long)); memcpy(temp->numsteps, p->numsteps, endsite*sizeof(long)); memcpy(temp->numnuc, p->numnuc, endsite*sizeof(nucarray)); temp->numdesc = p->numdesc + 1; if (p->back) { multifillin(temp, tempadd, 1); sumnsteps2(tempsum, temp, p->back, 0, endsite, threshwt); } else { multisumnsteps(temp, tempadd, 0, endsite, threshwt); tempsum->sumsteps = temp->sumsteps; } if (tempsum->sumsteps <= -bestyet) { if (p->back) sumnsteps2(tempsum, temp, p->back, endsite+1, endsite, threshwt); else { multisumnsteps(temp, temp1, endsite+1, endsite, threshwt); tempsum->sumsteps = temp->sumsteps; } } p->sumsteps = tempsum->sumsteps; } if (p == root) sumnsteps2(temp, item, p, 0, endsite, threshwt); else { sumnsteps(temp1, item, p, 0, endsite); sumnsteps2(temp, temp1, p->back, 0, endsite, threshwt); } if (temp->sumsteps <= -bestyet) { if (p == root) sumnsteps2(temp, item, p, endsite+1, endsite, threshwt); else { sumnsteps(temp1, item, p, endsite+1, endsite); sumnsteps2(temp, temp1, p->back, endsite+1, endsite, threshwt); } } belowsum = temp->sumsteps; multf = false; like = -belowsum; if (!p->tip && belowsum >= p->sumsteps) { multf = true; like = -p->sumsteps; } trysave = true; if (!multf && p != root) { parentsum = treenode[p->back->index - 1]->sumsteps; if (belowsum >= parentsum) trysave = false; } if (lastrearr) { changethere = true; if (like >= bstlike2 && trysave) { if (like > bstlike2) found = false; else { addnsave(p, item, nufork, &root, &grbg, multf, treenode, place, zeros); pos = 0; findtree(&found, &pos, nextree, place, bestrees); } if (!found) { collapse = collapsible(item, p, temp, temp1, temp2, tempsum, temprm, tmpadd, multf, root, zeros, treenode); if (!thorough) changethere = !collapse; if (thorough || !collapse || like > bstlike2 || (nextree == 1)) { if (like > bstlike2) { addnsave(p, item, nufork, &root, &grbg, multf, treenode, place, zeros); bestlike = bstlike2 = like; addbestever(&pos, &nextree, maxtrees, collapse, place, bestrees); } else addtiedtree(pos, &nextree, maxtrees, collapse, place, bestrees); } } } if (like >= bestyet) { if (like > bstlike2) bstlike2 = like; if (changethere && trysave) { bestyet = like; there = p; mulf = multf; } } } else if ((like > bestyet) || (like >= bestyet && trysave)) { bestyet = like; there = p; mulf = multf; } } /* tryadd */ void addpreorder(node *p, node *item, node *nufork) { /* traverses a n-ary tree, calling function tryadd at a node before calling tryadd at its descendants */ node *q; if (p == NULL) return; tryadd(p, item, nufork); if (!p->tip) { q = p->next; while (q != p) { addpreorder(q->back, item, nufork); q = q->next; } } } /* addpreorder */ void trydescendants(node *item, node *forknode, node *parent, node *parentback, boolean trybelow) { /* tries rearrangements at parent and below parent's descendants */ node *q, *tempblw; boolean bestever=0, belowbetter, multf=0, saved, trysave; double parentsum=0, belowsum; memcpy(temp->base, parent->base, endsite*sizeof(long)); memcpy(temp->numsteps, parent->numsteps, endsite*sizeof(long)); memcpy(temp->numnuc, parent->numnuc, endsite*sizeof(nucarray)); temp->numdesc = parent->numdesc + 1; multifillin(temp, tempadd, 1); sumnsteps2(tempsum, parentback, temp, 0, endsite, threshwt); belowbetter = true; if (lastrearr) { parentsum = tempsum->sumsteps; if (-tempsum->sumsteps >= bstlike2) { belowbetter = false; bestever = false; multf = true; if (-tempsum->sumsteps > bstlike2) bestever = true; savelocrearr(item, forknode, parent, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = parent; mulf = true; } } } else if (-tempsum->sumsteps >= like) { there = parent; mulf = true; like = -tempsum->sumsteps; } if (trybelow) { sumnsteps(temp, parent, tempadd, 0, endsite); sumnsteps2(tempsum, temp, parentback, 0, endsite, threshwt); if (lastrearr) { belowsum = tempsum->sumsteps; if (-tempsum->sumsteps >= bstlike2 && belowbetter && (forknode->numdesc > 2 || (forknode->numdesc == 2 && parent->back->index != forknode->index))) { trysave = false; memcpy(temp->base, parentback->base, endsite*sizeof(long)); memcpy(temp->numsteps, parentback->numsteps, endsite*sizeof(long)); memcpy(temp->numnuc, parentback->numnuc, endsite*sizeof(nucarray)); temp->numdesc = parentback->numdesc + 1; multifillin(temp, tempadd, 1); sumnsteps2(tempsum, parent, temp, 0, endsite, threshwt); if (-tempsum->sumsteps < bstlike2) { multf = false; bestever = false; trysave = true; } if (-belowsum > bstlike2) { multf = false; bestever = true; trysave = true; } if (trysave) { if (treenode[parent->index - 1] != parent) tempblw = parent->back; else tempblw = parent; savelocrearr(item, forknode, tempblw, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -belowsum; there = tempblw; mulf = false; } } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { if (treenode[parent->index - 1] != parent) tempblw = parent->back; else tempblw = parent; there = tempblw; mulf = false; } } } q = parent->next; while (q != parent) { if (q->back && q->back != item) { memcpy(temp1->base, q->base, endsite*sizeof(long)); memcpy(temp1->numsteps, q->numsteps, endsite*sizeof(long)); memcpy(temp1->numnuc, q->numnuc, endsite*sizeof(nucarray)); temp1->numdesc = q->numdesc; multifillin(temp1, parentback, 0); if (lastrearr) belowbetter = (-parentsum < bstlike2); if (!q->back->tip) { memcpy(temp->base, q->back->base, endsite*sizeof(long)); memcpy(temp->numsteps, q->back->numsteps, endsite*sizeof(long)); memcpy(temp->numnuc, q->back->numnuc, endsite*sizeof(nucarray)); temp->numdesc = q->back->numdesc + 1; multifillin(temp, tempadd, 1); sumnsteps2(tempsum, temp1, temp, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { belowbetter = false; bestever = false; multf = true; if (-tempsum->sumsteps > bstlike2) bestever = true; savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = q->back; mulf = true; } } } else if (-tempsum->sumsteps >= like) { like = -tempsum->sumsteps; there = q->back; mulf = true; } } sumnsteps(temp, q->back, tempadd, 0, endsite); sumnsteps2(tempsum, temp, temp1, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { trysave = false; multf = false; if (belowbetter) { bestever = false; trysave = true; } if (-tempsum->sumsteps > bstlike2) { bestever = true; trysave = true; } if (trysave) { if (treenode[q->back->index - 1] != q->back) tempblw = q; else tempblw = q->back; savelocrearr(item, forknode, tempblw, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = tempblw; mulf = false; } } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { if (treenode[q->back->index - 1] != q->back) tempblw = q; else tempblw = q->back; there = tempblw; mulf = false; } } } q = q->next; } } /* trydescendants */ void trylocal(node *item, node *forknode) { /* rearranges below forknode, below descendants of forknode when there are more than 2 descendants, then unroots the back of forknode and rearranges on its descendants */ node *q; boolean bestever, multf, saved; memcpy(temprm->base, zeros, endsite*sizeof(long)); memcpy(temprm->numsteps, zeros, endsite*sizeof(long)); memcpy(temprm->oldbase, item->base, endsite*sizeof(long)); memcpy(temprm->oldnumsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempf->base, forknode->base, endsite*sizeof(long)); memcpy(tempf->numsteps, forknode->numsteps, endsite*sizeof(long)); memcpy(tempf->numnuc, forknode->numnuc, endsite*sizeof(nucarray)); tempf->numdesc = forknode->numdesc - 1; multifillin(tempf, temprm, -1); if (!forknode->back) { sumnsteps2(tempsum, tempf, tempadd, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps > bstlike2) { bestever = true; multf = false; savelocrearr(item, forknode, forknode, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = forknode; mulf = false; } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = forknode; mulf = false; } } } else { sumnsteps(temp, tempf, tempadd, 0, endsite); sumnsteps2(tempsum, temp, forknode->back, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps > bstlike2) { bestever = true; multf = false; savelocrearr(item, forknode, forknode, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = forknode; mulf = false; } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = forknode; mulf = false; } } trydescendants(item, forknode, forknode->back, tempf, false); } q = forknode->next; while (q != forknode) { if (q->back != item) { memcpy(temp2->base, q->base, endsite*sizeof(long)); memcpy(temp2->numsteps, q->numsteps, endsite*sizeof(long)); memcpy(temp2->numnuc, q->numnuc, endsite*sizeof(nucarray)); temp2->numdesc = q->numdesc - 1; multifillin(temp2, temprm, -1); if (!q->back->tip) { trydescendants(item, forknode, q->back, temp2, true); } else { sumnsteps(temp1, q->back, tempadd, 0, endsite); sumnsteps2(tempsum, temp1, temp2, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps > bstlike2) { multf = false; bestever = true; savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = q->back; mulf = false; } } } else if ((-tempsum->sumsteps) > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = q->back; mulf = false; } } } } q = q->next; } } /* trylocal */ void trylocal2(node *item, node *forknode, node *other) { /* rearranges below forknode, below descendants of forknode when there are more than 2 descendants, then unroots the back of forknode and rearranges on its descendants. Used if forknode has binary descendants */ node *q; boolean bestever=0, multf, saved, belowbetter, trysave; memcpy(tempf->base, other->base, endsite*sizeof(long)); memcpy(tempf->numsteps, other->numsteps, endsite*sizeof(long)); memcpy(tempf->oldbase, forknode->base, endsite*sizeof(long)); memcpy(tempf->oldnumsteps, forknode->numsteps, endsite*sizeof(long)); tempf->numdesc = other->numdesc; if (forknode->back) trydescendants(item, forknode, forknode->back, tempf, false); if (!other->tip) { memcpy(temp->base, other->base, endsite*sizeof(long)); memcpy(temp->numsteps, other->numsteps, endsite*sizeof(long)); memcpy(temp->numnuc, other->numnuc, endsite*sizeof(nucarray)); temp->numdesc = other->numdesc + 1; multifillin(temp, tempadd, 1); if (forknode->back) sumnsteps2(tempsum, forknode->back, temp, 0, endsite, threshwt); else sumnsteps2(tempsum, NULL, temp, 0, endsite, threshwt); belowbetter = true; if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { belowbetter = false; bestever = false; multf = true; if (-tempsum->sumsteps > bstlike2) bestever = true; savelocrearr(item, forknode, other, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = other; mulf = true; } } } else if (-tempsum->sumsteps >= like) { there = other; mulf = true; like = -tempsum->sumsteps; } if (forknode->back) { memcpy(temprm->base, forknode->back->base, endsite*sizeof(long)); memcpy(temprm->numsteps, forknode->back->numsteps, endsite*sizeof(long)); } else { memcpy(temprm->base, zeros, endsite*sizeof(long)); memcpy(temprm->numsteps, zeros, endsite*sizeof(long)); } memcpy(temprm->oldbase, other->back->base, endsite*sizeof(long)); memcpy(temprm->oldnumsteps, other->back->numsteps, endsite*sizeof(long)); q = other->next; while (q != other) { memcpy(temp2->base, q->base, endsite*sizeof(long)); memcpy(temp2->numsteps, q->numsteps, endsite*sizeof(long)); memcpy(temp2->numnuc, q->numnuc, endsite*sizeof(nucarray)); if (forknode->back) { temp2->numdesc = q->numdesc; multifillin(temp2, temprm, 0); } else { temp2->numdesc = q->numdesc - 1; multifillin(temp2, temprm, -1); } if (!q->back->tip) trydescendants(item, forknode, q->back, temp2, true); else { sumnsteps(temp1, q->back, tempadd, 0, endsite); sumnsteps2(tempsum, temp1, temp2, 0, endsite, threshwt); if (lastrearr) { if (-tempsum->sumsteps >= bstlike2) { trysave = false; multf = false; if (belowbetter) { bestever = false; trysave = true; } if (-tempsum->sumsteps > bstlike2) { bestever = true; trysave = true; } if (trysave) { savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm, tmpadd, &root, maxtrees, &nextree, multf, bestever, &saved, place, bestrees, treenode, &grbg, zeros); if (saved) { like = bstlike2 = -tempsum->sumsteps; there = q->back; mulf = false; } } } } else if (-tempsum->sumsteps > like) { like = -tempsum->sumsteps; if (-tempsum->sumsteps > bestyet) { there = q->back; mulf = false; } } } q = q->next; } } } /* trylocal2 */ void tryrearr(node *p, boolean *success) { /* evaluates one rearrangement of the tree. if the new tree has greater "likelihood" than the old one sets success = TRUE and keeps the new tree. otherwise, restores the old tree */ node *forknode, *newfork, *other, *oldthere; double oldlike; boolean oldmulf; if (p->back == NULL) return; forknode = treenode[p->back->index - 1]; if (!forknode->back && forknode->numdesc <= 2 && alltips(forknode, p)) return; oldlike = bestyet; like = -10.0 * spp * chars; memcpy(tempadd->base, p->base, endsite*sizeof(long)); memcpy(tempadd->numsteps, p->numsteps, endsite*sizeof(long)); memcpy(tempadd->oldbase, zeros, endsite*sizeof(long)); memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long)); if (forknode->numdesc > 2) { oldthere = there = forknode; oldmulf = mulf = true; trylocal(p, forknode); } else { findbelow(&other, p, forknode); oldthere = there = other; oldmulf = mulf = false; trylocal2(p, forknode, other); } if ((like <= oldlike) || (there == oldthere && mulf == oldmulf)) return; recompute = true; re_move(p, &forknode, &root, recompute, treenode, &grbg, zeros); if (mulf) add(there, p, NULL, &root, recompute, treenode, &grbg, zeros); else { if (forknode->numdesc > 0) getnufork(&newfork, &grbg, treenode, zeros); else newfork = forknode; add(there, p, newfork, &root, recompute, treenode, &grbg, zeros); } if (like > oldlike + LIKE_EPSILON) { *success = true; bestyet = like; } } /* tryrearr */ void repreorder(node *p, boolean *success) { /* traverses a binary tree, calling PROCEDURE tryrearr at a node before calling tryrearr at its descendants */ node *q, *this; if (p == NULL) return; if (!p->visited) { tryrearr(p, success); p->visited = true; } if (!p->tip) { q = p; while (q->next != p) { this = q->next->back; repreorder(q->next->back,success); if (q->next->back == this) q = q->next; } } } /* repreorder */ void rearrange(node **r) { /* traverses the tree (preorder), finding any local rearrangement which decreases the number of steps. if traversal succeeds in increasing the tree's "likelihood", PROCEDURE rearrange runs traversal again */ boolean success=true; while (success) { success = false; clearvisited(treenode); repreorder(*r, &success); } } /* rearrange */ void describe() { /* prints ancestors, steps and table of numbers of steps in each site */ if (treeprint) { fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10.0); fprintf(outfile, "\n between and length\n"); fprintf(outfile, " ------- --- ------\n"); printbranchlengths(root); } if (stepbox) writesteps(chars, weights, oldweight, root); if (ancseq) { hypstates(chars, root, treenode, &garbage, basechar); putc('\n', outfile); } putc('\n', outfile); if (trout) { col = 0; treeout3(root, nextree, &col, root); } } /* describe */ void dnapars_coordinates(node *p, double lengthsum, long *tipy, double *tipmax) { /* establishes coordinates of nodes */ node *q, *first, *last; double xx; if (p == NULL) return; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { xx = q->v; if (xx > 100.0) xx = 100.0; dnapars_coordinates(q->back, lengthsum + xx, tipy,tipmax); q = q->next; } while (p != q); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if ((p == root) || count_sibs(p) > 2) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* dnapars_coordinates */ void dnapars_printree() { /* prints out diagram of the tree2 */ long tipy; double scale, tipmax; long i; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; dnapars_coordinates(root, 0.0, &tipy, &tipmax); scale = 1.0 / (long)(tipmax + 1.000); for (i = 1; i <= (tipy - down); i++) drawline3(i, scale, root); putc('\n', outfile); } /* dnapars_printree */ void globrearrange() { /* does global rearrangements */ long j; double gotlike; boolean frommulti; node *item, *nufork; recompute = true; do { printf(" "); gotlike = bestlike = bstlike2; /* order matters here ! */ for (j = 0; j < nonodes; j++) { bestyet = -10.0 * spp * chars; if (j < spp) item = treenode[enterorder[j] -1]; else item = treenode[j]; if ((item != root) && ((j < spp) || ((j >= spp) && (item->numdesc > 0))) && !((item->back->index == root->index) && (root->numdesc == 2) && alltips(root, item))) { re_move(item, &nufork, &root, recompute, treenode, &grbg, zeros); frommulti = (nufork->numdesc > 0); clearcollapse(treenode); there = root; memcpy(tempadd->base, item->base, endsite*sizeof(long)); memcpy(tempadd->numsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempadd->oldbase, zeros, endsite*sizeof(long)); memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long)); if (frommulti){ oldnufork = nufork; getnufork(&nufork, &grbg, treenode, zeros); } addpreorder(root, item, nufork); if (frommulti) oldnufork = NULL; if (!mulf) add(there, item, nufork, &root, recompute, treenode, &grbg, zeros); else add(there, item, NULL, &root, recompute, treenode, &grbg, zeros); } if (progress) { if (j % ((nonodes / 72) + 1) == 0) putchar('.'); fflush(stdout); } } if (progress) { putchar('\n'); #ifdef WIN32 phyFillScreenColor(); #endif } } while (bestlike > gotlike); } /* globrearrange */ void load_tree(long treei) { /* restores a tree from bestrees */ long j, nextnode; boolean recompute = false; node *dummy; for (j = spp - 1; j >= 1; j--) re_move(treenode[j], &dummy, &root, recompute, treenode, &grbg, zeros); root = treenode[0]; recompute = true; add(treenode[0], treenode[1], treenode[spp], &root, recompute, treenode, &grbg, zeros); nextnode = spp + 2; for (j = 3; j <= spp; j++) { if (bestrees[treei].btree[j - 1] > 0) add(treenode[bestrees[treei].btree[j - 1] - 1], treenode[j - 1], treenode[nextnode++ - 1], &root, recompute, treenode, &grbg, zeros); else add(treenode[treenode[-bestrees[treei].btree[j-1]-1]->back->index-1], treenode[j - 1], NULL, &root, recompute, treenode, &grbg, zeros); } } void grandrearr() { /* calls global rearrangement on best trees */ long treei; boolean done; done = false; do { treei = findunrearranged(bestrees, nextree, true); if (treei < 0) done = true; else bestrees[treei].gloreange = true; if (!done) { load_tree(treei); globrearrange(); done = rearrfirst; } } while (!done); } /* grandrearr */ void maketree() { /* constructs a binary tree from the pointers in treenode. adds each node at location which yields highest "likelihood" then rearranges the tree for greatest "likelihood" */ long i, j, nextnode; boolean done, firsttree, goteof, haslengths; node *item, *nufork, *dummy; pointarray nodep; char* treestr; numtrees = 0; if (!usertree) { for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); recompute = true; root = treenode[enterorder[0] - 1]; add(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1], treenode[spp], &root, recompute, treenode, &grbg, zeros); if (progress) { printf("Adding species:\n"); writename(0, 2, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = false; oldnufork = NULL; for (i = 3; i <= spp; i++) { bestyet = -10.0 * spp * chars; item = treenode[enterorder[i - 1] - 1]; getnufork(&nufork, &grbg, treenode, zeros); there = root; memcpy(tempadd->base, item->base, endsite*sizeof(long)); memcpy(tempadd->numsteps, item->numsteps, endsite*sizeof(long)); memcpy(tempadd->oldbase, zeros, endsite*sizeof(long)); memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long)); addpreorder(root, item, nufork); if (!mulf) add(there, item, nufork, &root, recompute, treenode, &grbg, zeros); else add(there, item, NULL, &root, recompute, treenode, &grbg, zeros); like = bestyet; rearrange(&root); if (progress) { writename(i - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } lastrearr = (i == spp); if (lastrearr) { bestlike = bestyet; if (jumb == 1) { bstlike2 = bestlike; nextree = 1; initbestrees(bestrees, maxtrees, true); initbestrees(bestrees, maxtrees, false); } if (progress) { printf("\nDoing global rearrangements"); if (rearrfirst) printf(" on the first of the trees tied for best\n"); else printf(" on all trees tied for best\n"); printf(" !"); for (j = 0; j < nonodes; j++) if (j % ((nonodes / 72) + 1) == 0) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } globrearrange(); } } done = false; while (!done && findunrearranged(bestrees, nextree, true) >= 0) { grandrearr(); done = rearrfirst; } if (progress) putchar('\n'); recompute = false; for (i = spp - 1; i >= 1; i--) re_move(treenode[i], &dummy, &root, recompute, treenode, &grbg, zeros); if (jumb == njumble) { collapsebestrees(&root, &grbg, treenode, bestrees, place, zeros, chars, recompute, progress); if (treeprint) { putc('\n', outfile); if (nextree == 2) fprintf(outfile, "One most parsimonious tree found:\n"); else fprintf(outfile, "%6ld trees in all found\n", nextree - 1); } if (nextree > maxtrees + 1) { if (treeprint) fprintf(outfile, "here are the first %4ld of them\n", (long)maxtrees); nextree = maxtrees + 1; } if (treeprint) putc('\n', outfile); for (i = 0; i <= (nextree - 2); i++) { root = treenode[0]; add(treenode[0], treenode[1], treenode[spp], &root, recompute, treenode, &grbg, zeros); nextnode = spp + 2; for (j = 3; j <= spp; j++) { if (bestrees[i].btree[j - 1] > 0) add(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1], treenode[nextnode++ - 1], &root, recompute, treenode, &grbg, zeros); else add(treenode[treenode[-bestrees[i].btree[j - 1]-1]->back->index-1], treenode[j - 1], NULL, &root, recompute, treenode, &grbg, zeros); } reroot(treenode[outgrno - 1], root); postorder(root); evaluate(root); treelength(root, chars, treenode); dnapars_printree(); describe(); for (j = 1; j < spp; j++) re_move(treenode[j], &dummy, &root, recompute, treenode, &grbg, zeros); } } } else { if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); if (numtrees > MAXNUMTREES) { printf("\nERROR: number of input trees is read incorrectly from %s\n", intreename); embExitBad(); } if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n"); } fsteps = (long **)Malloc(maxuser*sizeof(long *)); for (j = 1; j <= maxuser; j++) fsteps[j - 1] = (long *)Malloc(endsite*sizeof(long)); if (trout) fprintf(outtree, "%ld\n", numtrees); nodep = NULL; which = 1; while (which <= numtrees) { firsttree = true; nextnode = 0; haslengths = true; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, treenode, &goteof, &firsttree, nodep, &nextnode, &haslengths, &grbg, initdnaparsnode,false,nonodes); if (treeprint) fprintf(outfile, "\n\n"); if (outgropt) reroot(treenode[outgrno - 1], root); postorder(root); evaluate(root); treelength(root, chars, treenode); dnapars_printree(); describe(); if (which < numtrees) gdispose(root, &grbg, treenode); which++; } FClose(intree); putc('\n', outfile); if (numtrees > 1 && chars > 1 ) standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed); for (j = 1; j <= maxuser; j++) free(fsteps[j - 1]); free(fsteps); } if (jumb == njumble) { if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) { printf("\nTree"); if ((usertree && numtrees > 1) || (!usertree && nextree != 2)) printf("s"); printf(" also written onto file \"%s\"\n", outtreename); } } } } /* maketree */ void reallocchars() { /* The amount of chars can change between runs this function reallocates all the variables whose size depends on the amount of chars */ long i; for (i=0; i < spp; i++){ free(y[i]); y[i] = (Char *)Malloc(chars*sizeof(Char)); } free(weight); free(oldweight); free(alias); free(ally); free(location); weight = (long *)Malloc(chars*sizeof(long)); oldweight = (long *)Malloc(chars*sizeof(long)); alias = (long *)Malloc(chars*sizeof(long)); ally = (long *)Malloc(chars*sizeof(long)); location = (long *)Malloc(chars*sizeof(long)); } void freerest() { /* free variables that are allocated each data set */ long i; if (!usertree) { freenode(&temp); freenode(&temp1); freenode(&temp2); freenode(&tempsum); freenode(&temprm); freenode(&tempadd); freenode(&tempf); freenode(&tmp); freenode(&tmp1); freenode(&tmp2); freenode(&tmp3); freenode(&tmprm); freenode(&tmpadd); } for (i = 0; i < spp; i++) free(y[i]); free(y); for (i = 1; i <= maxtrees; i++) free(bestrees[i - 1].btree); free(bestrees); free(nayme); free(enterorder); free(place); free(weight); free(oldweight); free(alias); free(ally); free(location); freegrbg(&grbg); if (ancseq) freegarbage(&garbage); free(threshwt); free(zeros); freenodes(nonodes, treenode); } /* freerest */ int main(int argc, Char *argv[]) { /* DNA parsimony by uphill search */ /* reads in spp, chars, and the data. Then calls maketree to construct the tree */ #ifdef MAC argc = 1; /* macsetup("Dnapars",""); */ argv[0] = "Dnapars"; #endif init(argc, argv); emboss_getoptions("fdnapars",argc,argv); progname = argv[0]; ibmpc = IBMCRT; ansi = ANSICRT; firstset = true; garbage = NULL; grbg = NULL; doinit(); for (ith = 1; ith <= msets; ith++) { if (!(justwts && !firstset)) allocrest(); if (msets > 1 && !justwts) { fprintf(outfile, "\nData set # %ld:\n\n", ith); if (progress) printf("\nData set # %ld:\n\n", ith); } doinput(); if (ith == 1) firstset = false; for (jumb = 1; jumb <= njumble; jumb++) maketree(); if (!justwts) freerest(); } freetree(nonodes, treenode); FClose(infile); FClose(outfile); if (weights || justwts) FClose(weightfile); if (trout) FClose(outtree); if (usertree) FClose(intree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif if (progress) printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* DNA parsimony by uphill search */ PHYLIPNEW-3.69.650/src/dnaml.c0000664000175000017500000021164611616234204012325 00000000000000#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, Dan Fineman, and Patrick Colacurcio. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef struct valrec { double rat, ratxi, ratxv, orig_zz, z1, y1, z1zz, z1yy, xiz1, xiy1xv; double *ww, *zz, *wwzz, *vvzz; } valrec; typedef long vall[maxcategs]; typedef double contribarr[maxcategs]; AjPSeqset* seqsets = NULL; AjPPhyloProp phyloratecat = NULL; AjPPhyloProp phyloweights = NULL; AjPPhyloTree* phylotrees; ajint numseqs; ajint numwts; #ifndef OLDC /* function prototypes */ void dnamlcopy(tree *, tree *, long, long); //void getoptions(void); void emboss_getoptions(char *pgm, int argc, char *argv[]); void allocrest(void); void doinit(void); void inputoptions(void); void makeweights(void); void getinput(void); void initmemrates(void); void inittable_for_usertree(char *); void inittable(void); double evaluate(node *, boolean); void alloc_nvd (long, nuview_data *); void free_nvd (nuview_data *); void nuview(node *); void slopecurv(node *, double, double *, double *, double *); void makenewv(node *); void update(node *); void smooth(node *); void insert_(node *, node *, boolean); void dnaml_re_move(node **, node **); void buildnewtip(long, tree *); void buildsimpletree(tree *); void addtraverse(node *, node *, boolean); void rearrange(node *, node *); void initdnamlnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void dnaml_coordinates(node *, double, long *, double *); void dnaml_printree(void); void sigma(node *, double *, double *, double *); void describe(node *); void reconstr(node *, long); void rectrav(node *, long, long); void summarize(void); void dnaml_treeout(node *); void inittravtree(node *); void treevaluate(void); void maketree(void); void clean_up(void); void reallocsites(void); void globrearrange(void) ; void dnaml_unroot_here(node* root, node** nodep, long nonodes); void dnaml_unroot(node* p, node** nodep, long nonodes); void freetable(void); void alloclrsaves(void); void resetlrsaves(void); void freelrsaves(void); /* function prototypes */ #endif /* local rearrangements need to save views. created globally so that * * reallocation of the same variable is unnecessary */ node **lrsaves; long oldendsite; double fracchange; long rcategs; boolean haslengths; Char infilename[FNMLNGTH], intreename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH]; const char* outfilename; const char* outtreename; AjPFile embossoutfile; AjPFile embossouttree; double *rate, *rrate, *probcat; long nonodes2, sites, weightsum, categs, datasets, ith, njumble, jumb; long parens, outgrno; boolean freqsfrom, global, jumble, weights, trout, usertree, ctgry, rctgry, auto_, hypstate, ttr, progress, mulsets, justwts, firstset, improve, smoothit, polishing, lngths, gama, invar,inserting=false; tree curtree, bestree, bestree2, priortree; node *qwhere, *grbg, *addwhere; double xi, xv, ttratio, ttratio0, freqa, freqc, freqg, freqt, freqr, freqy, freqar, freqcy, freqgr, freqty, cv, alpha, lambda, invarfrac, bestyet; long *enterorder, inseed, inseed0; steptr aliasweight; contribarr *contribution, like, nulike, clai; double **term, **slopeterm, **curveterm; longer seed; Char* progname; char basechar[16]="acmgrsvtwyhkdbn"; /* Local variables for maketree, propagated globally for c version: */ long k, nextsp, numtrees, maxwhich, mx, mx0, mx1, shimotrees; double dummy, maxlogl; boolean succeeded, smoothed; double **l0gf; double *l0gl; valrec ***tbl; Char ch, ch2; long col; vall *mp=NULL; void dnamlcopy(tree *a, tree *b, long nonodes, long categs) { /* copies tree a to tree b*/ /* assumes bifurcation (OK) */ long i, j; node *p, *q; for (i = 0; i < spp; i++) { copynode(a->nodep[i], b->nodep[i], categs); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { copynode(p, q, categs); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->likelihood = a->likelihood; b->start = a->start; /* start used in dnaml only */ b->root = a->root; /* root used in dnamlk only */ } /* dnamlcopy plc*/ void emboss_getoptions(char *pgm, int argc, char *argv[]) { AjPStr gammamethod = NULL; ajint i; AjPFloat basefreq; AjPFloat hmmrates; AjPFloat hmmprob; AjPFloat arrayval; AjBool rough; double probsum=0.0; ctgry = false; rctgry = false; categs = 1; rcategs = 1; auto_ = false; freqsfrom = true; gama = false; global = false; hypstate = false; improve = false; invar = false; jumble = false; njumble = 1; lngths = false; lambda = 1.0; outgrno = 1; outgropt = false; trout = true; ttratio = 2.0; ttr = false; usertree = false; weights = false; printdata = false; progress = true; treeprint = true; invarfrac = 0.0; cv = 1.0; mulsets = false; datasets = 1; embInitPV(pgm, argc, argv, "PHYLIPNEW",VERSION); seqsets = ajAcdGetSeqsetall("sequence"); numseqs = 0; while (seqsets[numseqs]) numseqs++; phylotrees = ajAcdGetTree("intreefile"); if (phylotrees) { numtrees = 0; while (phylotrees[numtrees]) numtrees++; usertree = true; lngths = ajAcdGetBoolean("lengths"); } numwts = 0; phyloweights = ajAcdGetProperties("weights"); if (phyloweights) { weights = true; numwts = ajPhyloPropGetSize(phyloweights); } if (numseqs > 1) { mulsets = true; datasets = numseqs; } else if (numwts > 1) { mulsets = true; datasets = numwts; justwts = true; } categs = ajAcdGetInt("ncategories"); if (categs > 1) { ctgry = true; rate = (double *) Malloc(categs * sizeof(double)); arrayval = ajAcdGetArray("rate"); emboss_initcategs(arrayval, categs, rate); } else{ rate = (double *) Malloc(categs*sizeof(double)); rate[0] = 1.0; } phyloratecat = ajAcdGetProperties("categories"); gammamethod = ajAcdGetListSingle("gammatype"); if(ajStrMatchC(gammamethod, "n")) { rrate = (double *) Malloc(rcategs*sizeof(double)); probcat = (double *) Malloc(rcategs*sizeof(double)); rrate[0] = 1.0; probcat[0] = 1.0; } else { rctgry = true; auto_ = ajAcdGetBoolean("adjsite"); if(auto_) { lambda = ajAcdGetFloat("lambda"); lambda = 1 / lambda; } } if(ajStrMatchC(gammamethod, "g")) { gama = true; rcategs = ajAcdGetInt("ngammacat"); cv = ajAcdGetFloat("gammacoefficient"); alpha = 1.0 / (cv*cv); initmemrates(); initgammacat(rcategs, alpha, rrate, probcat); } else if(ajStrMatchC(gammamethod, "i")) { invar = true; rcategs = ajAcdGetInt("ninvarcat"); cv = ajAcdGetFloat("invarcoefficient"); alpha = 1.0 / (cv*cv); invarfrac = ajAcdGetFloat("invarfrac"); initmemrates(); initgammacat(rcategs-1, alpha, rrate, probcat); for (i=0; i < rcategs-1 ; i++) probcat[i] = probcat[i]*(1.0-invarfrac); probcat[rcategs-1] = invarfrac; rrate[rcategs-1] = 0.0; } else if(ajStrMatchC(gammamethod, "h")) { rcategs = ajAcdGetInt("nhmmcategories"); initmemrates(); hmmrates = ajAcdGetArray("hmmrates"); emboss_initcategs(hmmrates, rcategs,rrate); hmmprob = ajAcdGetArray("hmmprobabilities"); for (i=0; i < rcategs; i++){ probcat[i] = ajFloatGet(hmmprob, i); probsum += probcat[i]; } } outgrno = ajAcdGetInt("outgrno"); if(outgrno != 0) outgropt = true; else outgrno = 1; ttratio = ajAcdGetFloat("ttratio"); if(!usertree) { global = ajAcdGetBoolean("global"); rough = ajAcdGetBoolean("rough"); if(!rough) improve = true; njumble = ajAcdGetInt("njumble"); if(njumble >0) { inseed = ajAcdGetInt("seed"); jumble = true; emboss_initseed(inseed, &inseed0, seed); } else njumble = 1; } if((mulsets) && (!jumble)) { jumble = true; inseed = ajAcdGetInt("seed"); emboss_initseed(inseed, &inseed0, seed); } printdata = ajAcdGetBoolean("printdata"); progress = ajAcdGetBoolean("progress"); treeprint = ajAcdGetBoolean("treeprint"); trout = ajAcdGetToggle("trout"); hypstate = ajAcdGetBoolean("hypstate"); freqsfrom = ajAcdGetToggle("freqsfrom"); if(!freqsfrom) { basefreq = ajAcdGetArray("basefreq"); freqa = ajFloatGet(basefreq, 0); freqc = ajFloatGet(basefreq, 1); freqg = ajFloatGet(basefreq, 2); freqt = ajFloatGet(basefreq, 3); } embossoutfile = ajAcdGetOutfile("outfile"); emboss_openfile(embossoutfile, &outfile, &outfilename); if(trout) { embossouttree = ajAcdGetOutfile("outtreefile"); emboss_openfile(embossouttree, &outtree, &outtreename); } fprintf(outfile, "\nNucleic acid sequence Maximum Likelihood"); fprintf(outfile, " method, version %s\n\n",VERSION); printf("\n mulsets: %s",(mulsets ? "true" : "false")); printf("\n datasets : %ld",(datasets)); printf("\n rctgry : %s",(rctgry ? "true" : "false")); printf("\n gama : %s",(gama ? "true" : "false")); printf("\n invar : %s",(invar ? "true" : "false")); printf("\n numwts : %d",(numwts)); printf("\n numseqs : %d",(numseqs)); printf("\n\n ctgry: %s",(ctgry ? "true" : "false")); printf("\n categs : %ld",(categs)); printf("\n rcategs : %ld",(rcategs)); printf("\n auto_: %s",(auto_ ? "true" : "false")); printf("\n freqsfrom : %s",(freqsfrom ? "true" : "false")); printf("\n global : %s",(global ? "true" : "false")); printf("\n hypstate : %s",(hypstate ? "true" : "false")); printf("\n improve : %s",(improve ? "true" : "false")); printf("\n invar : %s",(invar ? "true" : "false")); printf("\n jumble : %s",(jumble ? "true" : "false")); printf("\n njumble : %ld",(njumble)); printf("\n lngths : %s",(lngths ? "true" : "false")); printf("\n lambda : %f",(lambda)); printf("\n cv : %f",(cv)); printf("\n freqa : %f",(freqa)); printf("\n freqc : %f",(freqc)); printf("\n freqg : %f",(freqg)); printf("\n freqt : %f",(freqt)); printf("\n outgrno : %ld",(outgrno)); printf("\n outgropt: %s",(outgropt ? "true" : "false")); printf("\n trout : %s",(trout ? "true" : "false")); printf("\n ttratio : %f",(ttratio)); printf("\n ttr : %s",(ttr ? "true" : "false")); printf("\n usertree : %s",(usertree ? "true" : "false")); printf("\n weights: %s",(weights ? "true" : "false")); printf("\n printdata : %s",(printdata ? "true" : "false")); printf("\n progress : %s",(progress ? "true" : "false")); printf("\n treeprint: %s",(treeprint ? "true" : "false")); printf("\n interleaved : %s \n\n",(interleaved ? "true" : "false")); } /* emboss_getoptions */ void initmemrates(void) { probcat = (double *) Malloc(rcategs * sizeof(double)); rrate = (double *) Malloc(rcategs * sizeof(double)); } void reallocsites(void) { long i; for (i=0; i < spp; i++) { free(y[i]); y[i] = (Char *) Malloc(sites*sizeof(Char)); } free(category); free(weight); free(alias); free(ally); free(location); free(aliasweight); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long)); } void allocrest(void) { long i; y = (Char **) Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *) Malloc(sites*sizeof(Char)); nayme = (naym *) Malloc(spp*sizeof(naym));; enterorder = (long *) Malloc(spp*sizeof(long)); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long)); } /* allocrest */ void doinit(void) { /* initializes variables */ inputnumbersseq(seqsets[0], &spp, &sites, &nonodes2, 2); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); alloctree(&curtree.nodep, nonodes2, usertree); allocrest(); if (usertree) return; alloctree(&bestree.nodep, nonodes2, 0); alloctree(&priortree.nodep, nonodes2, 0); if (njumble <= 1) return; alloctree(&bestree2.nodep, nonodes2, 0); } /* doinit */ void inputoptions(void) { long i; if (!firstset && !justwts) { samenumspseq(seqsets[ith-1], &sites, ith); reallocsites(); } for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; if (justwts || weights) inputweightsstr(phyloweights->Str[ith-1], sites, weight, &weights); weightsum = 0; for (i = 0; i < sites; i++) weightsum += weight[i]; if (ctgry && categs > 1) { inputcategsstr(phyloratecat->Str[0], 0, sites, category, categs, "DnaML"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } if (weights && printdata) printweights(outfile, 0, sites, weight, "Sites"); } /* inputoptions */ void makeweights(void) { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = 0; aliasweight[i - 1] = weight[i - 1]; location[i - 1] = 0; } sitesort2 (sites, aliasweight); sitecombine2(sites, aliasweight); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { if (aliasweight[i - 1] > 0) endsite = i; } for (i = 1; i <= endsite; i++) { location[alias[i - 1] - 1] = i; ally[alias[i - 1] - 1] = alias[i - 1]; } term = (double **) Malloc( endsite * sizeof(double *)); for (i = 0; i < endsite; i++) term[i] = (double *) Malloc( rcategs * sizeof(double)); slopeterm = (double **) Malloc( endsite * sizeof(double *)); for (i = 0; i < endsite; i++) slopeterm[i] = (double *) Malloc( rcategs * sizeof(double)); curveterm = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) curveterm[i] = (double *) Malloc( rcategs * sizeof(double)); mp = (vall *) Malloc( sites*sizeof(vall)); contribution = (contribarr *) Malloc( endsite*sizeof(contribarr)); } /* makeweights */ void getinput(void) { /* reads the input data */ inputoptions(); if (!freqsfrom) getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, true); if (!justwts || firstset) seq_inputdata(seqsets[ith-1],sites); if ( !firstset ) oldendsite = endsite; makeweights(); if ( firstset ) alloclrsaves(); else resetlrsaves(); setuptree2(&curtree); if (!usertree) { setuptree2(&bestree); setuptree2(&priortree); if (njumble > 1) setuptree2(&bestree2); } allocx(nonodes2, rcategs, curtree.nodep, usertree); if (!usertree) { allocx(nonodes2, rcategs, bestree.nodep, 0); allocx(nonodes2, rcategs, priortree.nodep, 0); if (njumble > 1) allocx(nonodes2, rcategs, bestree2.nodep, 0); } makevalues2(rcategs, curtree.nodep, endsite, spp, y, alias); if (freqsfrom) { empiricalfreqs(&freqa, &freqc, &freqg, &freqt, aliasweight, curtree.nodep); getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, true); } if (!justwts || firstset) fprintf(outfile, "\nTransition/transversion ratio = %10.6f\n\n", ttratio); } /* getinput */ void inittable_for_usertree(char* treestr) { /* If there's a user tree, then the ww/zz/wwzz/vvzz elements need to be allocated appropriately. */ long num_comma; long i, j; /* First, figure out the largest possible furcation, i.e. the number of commas plus one */ countcomma(treestr, &num_comma); num_comma++; for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { /* Free the stuff allocated assuming bifurcations */ free (tbl[i][j]->ww); free (tbl[i][j]->zz); free (tbl[i][j]->wwzz); free (tbl[i][j]->vvzz); /* Then allocate for worst-case multifurcations */ tbl[i][j]->ww = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->zz = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->wwzz = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->vvzz = (double *) Malloc( num_comma * sizeof (double)); } } } /* inittable_for_usertree */ void freetable(void) { long i, j; for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { free(tbl[i][j]->ww); free(tbl[i][j]->zz); free(tbl[i][j]->wwzz); free(tbl[i][j]->vvzz); } } for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) free(tbl[i][j]); free(tbl[i]); } free(tbl); } void inittable(void) { /* Define a lookup table. Precompute values and print them out in tables */ long i, j; double sumrates; tbl = (valrec ***) Malloc(rcategs * sizeof(valrec **)); for (i = 0; i < rcategs; i++) { tbl[i] = (valrec **) Malloc(categs*sizeof(valrec *)); for (j = 0; j < categs; j++) tbl[i][j] = (valrec *) Malloc(sizeof(valrec)); } for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { tbl[i][j]->rat = rrate[i]*rate[j]; tbl[i][j]->ratxi = tbl[i][j]->rat * xi; tbl[i][j]->ratxv = tbl[i][j]->rat * xv; /* Allocate assuming bifurcations, will be changed later if necessary (i.e. there's a user tree) */ tbl[i][j]->ww = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->zz = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->wwzz = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->vvzz = (double *) Malloc( 2 * sizeof (double)); } } if (!lngths) { /* restandardize rates */ sumrates = 0.0; for (i = 0; i < endsite; i++) { for (j = 0; j < rcategs; j++) sumrates += aliasweight[i] * probcat[j] * tbl[j][category[alias[i] - 1] - 1]->rat; } sumrates /= (double)sites; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->rat /= sumrates; tbl[i][j]->ratxi /= sumrates; tbl[i][j]->ratxv /= sumrates; } } if(jumb > 1) return; if (rcategs > 1) { if (gama) { fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); fprintf(outfile, "Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); } fprintf(outfile, "\nState in HMM Rate of change Probability\n\n"); for (i = 0; i < rcategs; i++) if (probcat[i] < 0.0001) fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.001) fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.01) fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); else fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); putc('\n', outfile); if (auto_) fprintf(outfile, "Expected length of a patch of sites having the same rate = %8.3f\n", 1/lambda); putc('\n', outfile); } if (categs > 1) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (i = 0; i < categs; i++) fprintf(outfile, "%9ld%16.3f\n", i+1, rate[i]); } if ((rcategs > 1) || (categs >> 1)) fprintf(outfile, "\n\n"); } /* inittable */ double evaluate(node *p, boolean saveit) { contribarr tterm; double sum, sum2, sumc, y, lz, y1, z1zz, z1yy, prod12, prod1, prod2, prod3, sumterm, lterm; long i, j, k, lai; node *q; sitelike x1, x2; sum = 0.0; q = p->back; if ( p->initialized == false && p->tip == false) nuview(p); if ( q->initialized == false && q->tip == false) nuview(q); y = p->v; lz = -y; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->orig_zz = exp(tbl[i][j]->ratxi * lz); tbl[i][j]->z1 = exp(tbl[i][j]->ratxv * lz); tbl[i][j]->z1zz = tbl[i][j]->z1 * tbl[i][j]->orig_zz; tbl[i][j]->z1yy = tbl[i][j]->z1 - tbl[i][j]->z1zz; } for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { if (y > 0.0) { y1 = 1.0 - tbl[j][k]->z1; z1zz = tbl[j][k]->z1zz; z1yy = tbl[j][k]->z1yy; } else { y1 = 0.0; z1zz = 1.0; z1yy = 0.0; } memcpy(x1, p->x[i][j], sizeof(sitelike)); prod1 = freqa * x1[0] + freqc * x1[(long)C - (long)A] + freqg * x1[(long)G - (long)A] + freqt * x1[(long)T - (long)A]; memcpy(x2, q->x[i][j], sizeof(sitelike)); prod2 = freqa * x2[0] + freqc * x2[(long)C - (long)A] + freqg * x2[(long)G - (long)A] + freqt * x2[(long)T - (long)A]; prod3 = (x1[0] * freqa + x1[(long)G - (long)A] * freqg) * (x2[0] * freqar + x2[(long)G - (long)A] * freqgr) + (x1[(long)C - (long)A] * freqc + x1[(long)T - (long)A] * freqt) * (x2[(long)C - (long)A] * freqcy + x2[(long)T - (long)A] * freqty); prod12 = freqa * x1[0] * x2[0] + freqc * x1[(long)C - (long)A] * x2[(long)C - (long)A] + freqg * x1[(long)G - (long)A] * x2[(long)G - (long)A] + freqt * x1[(long)T - (long)A] * x2[(long)T - (long)A]; tterm[j] = z1zz * prod12 + z1yy * prod3 + y1 * prod1 * prod2; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * tterm[j]; lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) clai[j] = tterm[j] / sumterm; memcpy(contribution[i], clai, rcategs*sizeof(double)); if (saveit && !auto_ && usertree && (which <= shimotrees)) l0gf[which - 1][i] = lterm; sum += aliasweight[i] * lterm; } for (j = 0; j < rcategs; j++) like[j] = 1.0; for (i = 0; i < sites; i++) { sumc = 0.0; for (k = 0; k < rcategs; k++) sumc += probcat[k] * like[k]; sumc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; } else { for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc); } memcpy(like, nulike, rcategs*sizeof(double)); } sum2 = 0.0; for (i = 0; i < rcategs; i++) sum2 += probcat[i] * like[i]; sum += log(sum2); curtree.likelihood = sum; if (!saveit || auto_ || !usertree) return sum; if(which <= shimotrees) l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; return sum; } if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } return sum; } /* evaluate */ void alloc_nvd (long num_sibs, nuview_data *local_nvd) { /* Allocate blocks of memory appropriate for the number of siblings a given node has */ local_nvd->yy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->wwzz = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vvzz = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vzsumr = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vzsumy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sum = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sumr = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sumy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->xx = (sitelike *) Malloc( num_sibs * sizeof (sitelike)); } /* alloc_nvd */ void free_nvd (nuview_data *local_nvd) { /* The natural complement to the alloc version */ free (local_nvd->yy); free (local_nvd->wwzz); free (local_nvd->vvzz); free (local_nvd->vzsumr); free (local_nvd->vzsumy); free (local_nvd->sum); free (local_nvd->sumr); free (local_nvd->sumy); free (local_nvd->xx); } /* free_nvd */ void nuview(node *p) { long i, j, k, l, num_sibs, sib_index; nuview_data *local_nvd = NULL; node *sib_ptr, *sib_back_ptr; sitelike p_xx; double lw; double correction; double maxx; /* Figure out how many siblings the current node has */ num_sibs = count_sibs (p); /* Recursive calls, should be called for all children */ sib_ptr = p; for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (!sib_back_ptr->tip && !sib_back_ptr->initialized) nuview (sib_back_ptr); } /* Allocate the structure and blocks therein for variables used in this function */ local_nvd = (nuview_data *) Malloc( sizeof (nuview_data)); alloc_nvd (num_sibs, local_nvd); /* Loop 1: makes assignments to tbl based on some combination of what's already in tbl and the children's value of v */ sib_ptr = p; for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; lw = - (sib_back_ptr->v); for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->ww[sib_index] = exp(tbl[i][j]->ratxi * lw); tbl[i][j]->zz[sib_index] = exp(tbl[i][j]->ratxv * lw); tbl[i][j]->wwzz[sib_index] = tbl[i][j]->ww[sib_index] * tbl[i][j]->zz[sib_index]; tbl[i][j]->vvzz[sib_index] = (1.0 - tbl[i][j]->ww[sib_index]) * tbl[i][j]->zz[sib_index]; } } /* Loop 2: */ for (i = 0; i < endsite; i++) { correction = 0; maxx = 0; k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { /* Loop 2.1 */ sib_ptr = p; for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if ( j == 0 ) correction += sib_back_ptr->underflows[i]; local_nvd->wwzz[sib_index] = tbl[j][k]->wwzz[sib_index]; local_nvd->vvzz[sib_index] = tbl[j][k]->vvzz[sib_index]; local_nvd->yy[sib_index] = 1.0 - tbl[j][k]->zz[sib_index]; memcpy(local_nvd->xx[sib_index], sib_back_ptr->x[i][j], sizeof(sitelike)); } /* Loop 2.2 */ for (sib_index=0; sib_index < num_sibs; sib_index++) { local_nvd->sum[sib_index] = local_nvd->yy[sib_index] * (freqa * local_nvd->xx[sib_index][(long)A] + freqc * local_nvd->xx[sib_index][(long)C] + freqg * local_nvd->xx[sib_index][(long)G] + freqt * local_nvd->xx[sib_index][(long)T]); local_nvd->sumr[sib_index] = freqar * local_nvd->xx[sib_index][(long)A] + freqgr * local_nvd->xx[sib_index][(long)G]; local_nvd->sumy[sib_index] = freqcy * local_nvd->xx[sib_index][(long)C] + freqty * local_nvd->xx[sib_index][(long)T]; local_nvd->vzsumr[sib_index] = local_nvd->vvzz[sib_index] * local_nvd->sumr[sib_index]; local_nvd->vzsumy[sib_index] = local_nvd->vvzz[sib_index] * local_nvd->sumy[sib_index]; } /* Initialize to one, multiply incremental values for every sibling a node has */ p_xx[(long)A] = 1 ; p_xx[(long)C] = 1 ; p_xx[(long)G] = 1 ; p_xx[(long)T] = 1 ; for (sib_index=0; sib_index < num_sibs; sib_index++) { p_xx[(long)A] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)A] + local_nvd->vzsumr[sib_index]; p_xx[(long)C] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)C] + local_nvd->vzsumy[sib_index]; p_xx[(long)G] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)G] + local_nvd->vzsumr[sib_index]; p_xx[(long)T] *= local_nvd->sum[sib_index] + local_nvd->wwzz[sib_index] * local_nvd->xx[sib_index][(long)T] + local_nvd->vzsumy[sib_index]; } for ( l = 0 ; l < ((long)T - (long)A + 1); l++ ) { if ( p_xx[l] > maxx ) maxx = p_xx[l]; } /* And the final point of this whole function: */ memcpy(p->x[i][j], p_xx, sizeof(sitelike)); } p->underflows[i] = 0; if ( maxx < MIN_DOUBLE) fix_x(p,i,maxx,rcategs); p->underflows[i] += correction; } p->initialized = true; free_nvd (local_nvd); free (local_nvd); } /* nuview */ void slopecurv(node *p,double y,double *like,double *slope,double *curve) { /* compute log likelihood, slope and curvature at node p */ long i, j, k, lai; double sum, sumc, sumterm, lterm, sumcs, sumcc, sum2, slope2, curve2, temp; double lz, zz, z1, zzs, z1s, zzc, z1c, aa, bb, cc, prod1, prod2, prod12, prod3; contribarr thelike, nulike, nuslope, nucurve, theslope, thecurve, clai, cslai, cclai; node *q; sitelike x1, x2; q = p->back; sum = 0.0; lz = -y; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->orig_zz = exp(tbl[i][j]->rat * lz); tbl[i][j]->z1 = exp(tbl[i][j]->ratxv * lz); } for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { if (y > 0.0) { zz = tbl[j][k]->orig_zz; z1 = tbl[j][k]->z1; } else { zz = 1.0; z1 = 1.0; } zzs = -tbl[j][k]->rat * zz ; z1s = -tbl[j][k]->ratxv * z1 ; temp = tbl[j][k]->rat; zzc = temp * temp * zz; temp = tbl[j][k]->ratxv; z1c = temp * temp * z1; memcpy(x1, p->x[i][j], sizeof(sitelike)); prod1 = freqa * x1[0] + freqc * x1[(long)C - (long)A] + freqg * x1[(long)G - (long)A] + freqt * x1[(long)T - (long)A]; memcpy(x2, q->x[i][j], sizeof(sitelike)); prod2 = freqa * x2[0] + freqc * x2[(long)C - (long)A] + freqg * x2[(long)G - (long)A] + freqt * x2[(long)T - (long)A]; prod3 = (x1[0] * freqa + x1[(long)G - (long)A] * freqg) * (x2[0] * freqar + x2[(long)G - (long)A] * freqgr) + (x1[(long)C - (long)A] * freqc + x1[(long)T - (long)A] * freqt) * (x2[(long)C - (long)A] * freqcy + x2[(long)T - (long)A] * freqty); prod12 = freqa * x1[0] * x2[0] + freqc * x1[(long)C - (long)A] * x2[(long)C - (long)A] + freqg * x1[(long)G - (long)A] * x2[(long)G - (long)A] + freqt * x1[(long)T - (long)A] * x2[(long)T - (long)A]; aa = prod12 - prod3; bb = prod3 - prod1*prod2; cc = prod1 * prod2; term[i][j] = zz * aa + z1 * bb + cc; slopeterm[i][j] = zzs * aa + z1s * bb; curveterm[i][j] = zzc * aa + z1c * bb; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * term[i][j]; lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) { term[i][j] = term[i][j] / sumterm; slopeterm[i][j] = slopeterm[i][j] / sumterm; curveterm[i][j] = curveterm[i][j] / sumterm; } sum += aliasweight[i] * lterm; } for (i = 0; i < rcategs; i++) { thelike[i] = 1.0; theslope[i] = 0.0; thecurve[i] = 0.0; } for (i = 0; i < sites; i++) { sumc = 0.0; sumcs = 0.0; sumcc = 0.0; for (k = 0; k < rcategs; k++) { sumc += probcat[k] * thelike[k]; sumcs += probcat[k] * theslope[k]; sumcc += probcat[k] * thecurve[k]; } sumc *= lambda; sumcs *= lambda; sumcc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, term[lai - 1], rcategs*sizeof(double)); memcpy(cslai, slopeterm[lai - 1], rcategs*sizeof(double)); memcpy(cclai, curveterm[lai - 1], rcategs*sizeof(double)); if (weight[i] > 1) { for (j = 0; j < rcategs; j++) { if (clai[j] > 0.0) clai[j] = exp(weight[i]*log(clai[j])); else clai[j] = 0.0; if (cslai[j] > 0.0) cslai[j] = exp(weight[i]*log(cslai[j])); else cslai[j] = 0.0; if (cclai[j] > 0.0) cclai[j] = exp(weight[i]*log(cclai[j])); else cclai[j] = 0.0; } } for (j = 0; j < rcategs; j++) { nulike[j] = ((1.0 - lambda) * thelike[j] + sumc) * clai[j]; nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs) * clai[j] + ((1.0 - lambda) * thelike[j] + sumc) * cslai[j]; nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc) * clai[j] + 2.0 * ((1.0 - lambda) * theslope[j] + sumcs) * cslai[j] + ((1.0 - lambda) * thelike[j] + sumc) * cclai[j]; } } else { for (j = 0; j < rcategs; j++) { nulike[j] = ((1.0 - lambda) * thelike[j] + sumc); nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs); nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc); } } memcpy(thelike, nulike, rcategs*sizeof(double)); memcpy(theslope, nuslope, rcategs*sizeof(double)); memcpy(thecurve, nucurve, rcategs*sizeof(double)); } sum2 = 0.0; slope2 = 0.0; curve2 = 0.0; for (i = 0; i < rcategs; i++) { sum2 += probcat[i] * thelike[i]; slope2 += probcat[i] * theslope[i]; curve2 += probcat[i] * thecurve[i]; } sum += log(sum2); (*like) = sum; (*slope) = slope2 / sum2; /* Expressed in terms of *slope to prevent overflow */ (*curve) = curve2 / sum2 - *slope * *slope; } /* slopecurv */ void makenewv(node *p) { /* Newton-Raphson algorithm improvement of a branch length */ long it, ite; double y, yold=0, yorig, like, slope, curve, oldlike=0; boolean done, firsttime, better; node *q; q = p->back; y = p->v; yorig = y; done = false; firsttime = true; it = 1; ite = 0; while ((it < iterations) && (ite < 20) && (!done)) { slopecurv (p, y, &like, &slope, &curve); better = false; if (firsttime) { /* if no older value of y to compare with */ yold = y; oldlike = like; firsttime = false; better = true; } else { if (like > oldlike) { /* update the value of yold if it was better */ yold = y; oldlike = like; better = true; it++; } } if (better) { y = y + slope/fabs(curve); /* Newton-Raphson, forced uphill-wards */ if (y < epsilon) y = epsilon; } else { if (fabs(y - yold) < epsilon) ite = 20; y = (y + 19*yold) / 20.0; /* retract 95% of way back */ } ite++; done = fabs(y-yold) < 0.1*epsilon; } smoothed = (fabs(yold-yorig) < epsilon) && (yorig > 1000.0*epsilon); p->v = yold; /* the last one that had better likelihood */ q->v = yold; curtree.likelihood = oldlike; } /* makenewv */ void update(node *p) { long num_sibs, i; node* sib_ptr; if (!p->tip && !p->initialized) nuview(p); if (!p->back->tip && !p->back->initialized) nuview(p->back); if ((!usertree) || (usertree && !lngths) || p->iter) { makenewv(p); if ( smoothit ) { inittrav(p); inittrav(p->back); } else { if (inserting) { num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_ptr->initialized = false; } } } } } /* update */ void smooth(node *p) { long i, num_sibs; node *sib_ptr; smoothed = false; update (p); if (p->tip) return; num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; if (polishing || (smoothit && !smoothed)) { smooth(sib_ptr->back); } } } /* smooth */ void insert_(node *p, node *q, boolean dooinit) { /* Insert q near p */ /* assumes bifurcation (OK) */ long i; node *r; r = p->next->next; hookup(r, q->back); hookup(p->next, q); q->v = 0.5 * q->v; q->back->v = q->v; r->v = q->v; r->back->v = r->v; p->initialized = false; if (dooinit) { inittrav(p); inittrav(p->back); } i = 1; inserting = true; while (i <= smoothings) { smooth (p); if ( !p->tip ) { smooth(p->next); smooth(p->next->next); } i++; } inserting = false; } /* insert_ */ void dnaml_re_move(node **p, node **q) { /* remove p and record in q where it was */ long i; /* assumes bifurcation (OK) */ *q = (*p)->next->back; hookup(*q, (*p)->next->next->back); (*p)->next->back = NULL; (*p)->next->next->back = NULL; (*q)->v += (*q)->back->v; (*q)->back->v = (*q)->v; if ( smoothit ) { inittrav((*q)); inittrav((*q)->back); } if ( smoothit ) { for ( i = 0 ; i < smoothings ; i++ ) { smooth(*q); smooth((*q)->back); } } else smooth(*q); } /* dnaml_re_move */ void buildnewtip(long m, tree *tr) { node *p; p = tr->nodep[nextsp + spp - 3]; hookup(tr->nodep[m - 1], p); p->v = initialv; p->back->v = initialv; } /* buildnewtip */ void buildsimpletree(tree *tr) { hookup(tr->nodep[enterorder[0] - 1], tr->nodep[enterorder[1] - 1]); tr->nodep[enterorder[0] - 1]->v = 0.1; tr->nodep[enterorder[0] - 1]->back->v = 0.1; tr->nodep[enterorder[1] - 1]->v = 0.1; tr->nodep[enterorder[1] - 1]->back->v = 0.1; buildnewtip(enterorder[2], tr); insert_(tr->nodep[enterorder[2] - 1]->back, tr->nodep[enterorder[0] - 1], false); } /* buildsimpletree2 */ void addtraverse(node *p, node *q, boolean contin) { /* try adding p at q, proceed recursively through tree */ long i, num_sibs; double like, vsave = 0; node *qback = NULL, *sib_ptr; if (!smoothit) { vsave = q->v; qback = q->back; } insert_(p, q, smoothit); like = evaluate(p, false); if (like > bestyet + LIKE_EPSILON || bestyet == UNDEFINED) { bestyet = like; if (smoothit) { dnamlcopy(&curtree, &bestree, nonodes2, rcategs); addwhere = q; } else qwhere = q; succeeded = true; } if (smoothit) dnamlcopy(&priortree, &curtree, nonodes2, rcategs); else { hookup (q, qback); q->v = vsave; q->back->v = vsave; curtree.likelihood = bestyet; } if (!q->tip && contin) { num_sibs = count_sibs (q); if (q == curtree.start) num_sibs++; sib_ptr = q; for (i=0; i < num_sibs; i++) { addtraverse(p, sib_ptr->next->back, contin); sib_ptr = sib_ptr->next; } } } /* addtraverse */ void freelrsaves(void) { long i,j; for ( i = 0 ; i < NLRSAVES ; i++ ) { for (j = 0; j < oldendsite; j++) free(lrsaves[i]->x[j]); free(lrsaves[i]->x); free(lrsaves[i]->underflows); free(lrsaves[i]); } free(lrsaves); } void resetlrsaves(void) { freelrsaves(); alloclrsaves(); } void alloclrsaves(void) { long i,j; lrsaves = Malloc(NLRSAVES * sizeof(node*)); for ( i = 0 ; i < NLRSAVES ; i++ ) { lrsaves[i] = Malloc(sizeof(node)); lrsaves[i]->x = (phenotype)Malloc(endsite*sizeof(ratelike)); lrsaves[i]->underflows = Malloc(endsite * sizeof (double)); for (j = 0; j < endsite; j++) lrsaves[i]->x[j] = (ratelike)Malloc(rcategs*sizeof(sitelike)); } } /* alloclrsaves */ void globrearrange(void) { /* does global rearrangements */ tree globtree; tree oldtree; int i,j,k,l,num_sibs,num_sibs2; node *where,*sib_ptr,*sib_ptr2; double oldbestyet = curtree.likelihood; int success = false; alloctree(&globtree.nodep,nonodes2,0); alloctree(&oldtree.nodep,nonodes2,0); setuptree2(&globtree); setuptree2(&oldtree); allocx(nonodes2, rcategs, globtree.nodep, 0); allocx(nonodes2, rcategs, oldtree.nodep, 0); dnamlcopy(&curtree,&globtree,nonodes2,rcategs); dnamlcopy(&curtree,&oldtree,nonodes2,rcategs); bestyet = curtree.likelihood; for ( i = spp ; i < nonodes2 ; i++ ) { num_sibs = count_sibs(curtree.nodep[i]); sib_ptr = curtree.nodep[i]; if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); for ( j = 0 ; j <= num_sibs ; j++ ) { dnaml_re_move(&sib_ptr,&where); dnamlcopy(&curtree,&priortree,nonodes2,rcategs); qwhere = where; if (where->tip) { dnamlcopy(&oldtree,&curtree,nonodes2,rcategs); dnamlcopy(&oldtree,&bestree,nonodes2,rcategs); sib_ptr=sib_ptr->next; continue; } else num_sibs2 = count_sibs(where); sib_ptr2 = where; for ( k = 0 ; k < num_sibs2 ; k++ ) { addwhere = NULL; addtraverse(sib_ptr,sib_ptr2->back,true); if ( !smoothit ) { if (succeeded && qwhere != where && qwhere != where->back) { insert_(sib_ptr,qwhere,true); smoothit = true; for (l = 1; l<=smoothings; l++) { smooth (where); smooth (where->back); } smoothit = false; success = true; dnamlcopy(&curtree,&globtree,nonodes2,rcategs); dnamlcopy(&priortree,&curtree,nonodes2,rcategs); } } else if ( addwhere && where != addwhere && where->back != addwhere && bestyet > globtree.likelihood) { dnamlcopy(&bestree,&globtree,nonodes2,rcategs); success = true; } sib_ptr2 = sib_ptr2->next; } dnamlcopy(&oldtree,&curtree,nonodes2,rcategs); dnamlcopy(&oldtree,&bestree,nonodes2,rcategs); sib_ptr = sib_ptr->next; } } dnamlcopy(&globtree,&curtree,nonodes2,rcategs); dnamlcopy(&globtree,&bestree,nonodes2,rcategs); if (success && globtree.likelihood > oldbestyet) { succeeded = true; } else { succeeded = false; } bestyet = globtree.likelihood; freex(nonodes2, globtree.nodep); freex(nonodes2, oldtree.nodep); freetree2(globtree.nodep, nonodes2); freetree2(oldtree.nodep, nonodes2); } /* globrearrange */ void rearrange(node *p, node *pp) { /* rearranges the tree locally moving pp around near p */ /* assumes bifurcation (OK) */ long i, num_sibs; node *q, *r, *sib_ptr; node *rnb = NULL, *rnnb = NULL; if (!p->tip && !p->back->tip) { curtree.likelihood = bestyet; if (p->back->next != pp) r = p->back->next; else r = p->back->next->next; /* assumes bifurcation, that's ok */ if (!smoothit) { rnb = r->next->back; rnnb = r->next->next->back; copynode(r,lrsaves[0],rcategs); copynode(r->next,lrsaves[1],rcategs); copynode(r->next->next,lrsaves[2],rcategs); copynode(p->next,lrsaves[3],rcategs); copynode(p->next->next,lrsaves[4],rcategs); } else dnamlcopy(&curtree, &bestree, nonodes2, rcategs); dnaml_re_move(&r, &q); nuview(p->next); nuview(p->next->next); if (smoothit) dnamlcopy(&curtree, &priortree, nonodes2, rcategs); else qwhere = q; num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; addtraverse(r, sib_ptr->back, false); } if (smoothit) dnamlcopy(&bestree, &curtree, nonodes2, rcategs); else { if (qwhere == q) { hookup(rnb,r->next); hookup(rnnb,r->next->next); copynode(lrsaves[0],r,rcategs); copynode(lrsaves[1],r->next,rcategs); copynode(lrsaves[2],r->next->next,rcategs); copynode(lrsaves[3],p->next,rcategs); copynode(lrsaves[4],p->next->next,rcategs); rnb->v = r->next->v; rnnb->v = r->next->next->v; r->back->v = r->v; curtree.likelihood = bestyet; } else { insert_(r, qwhere, true); smoothit = true; for (i = 1; i<=smoothings; i++) { smooth (r); smooth (r->back); } smoothit = false; } } } if (!p->tip) { num_sibs = count_sibs (p); if (p == curtree.start) num_sibs++; sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; rearrange(sib_ptr->back, p); } } } /* rearrange */ void initdnamlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, char** treestr) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; malloc_pheno((*p), endsite, rcategs); nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); malloc_pheno(*p, endsite, rcategs); (*p)->index = nodei; break; case tip: match_names_to_data (str, nodep, p, spp); break; case iter: (*p)->initialized = false; (*p)->v = initialv; (*p)->iter = true; if ((*p)->back != NULL){ (*p)->back->iter = true; (*p)->back->v = initialv; (*p)->back->initialized = false; } break; case length: processlength(&valyew, &divisor, ch, &minusread, treestr, parens); (*p)->v = valyew / divisor / fracchange; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; case hsnolength: haslengths = false; break; default: /* cases hslength, treewt, unittrwt */ break; /* should never occur */ } } /* initdnamlnode */ void dnaml_coordinates(node *p, double lengthsum, long *tipy, double *tipmax) { /* establishes coordinates of nodes */ node *q, *first, *last; double xx; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { xx = fracchange * q->v; if (xx > 100.0) xx = 100.0; dnaml_coordinates(q->back, lengthsum + xx, tipy,tipmax); q = q->next; } while ((p == curtree.start || p != q) && (p != curtree.start || p->next != q)); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if (p == curtree.start) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* dnaml_coordinates */ void dnaml_printree(void) { /* prints out diagram of the tree2 */ long tipy; double scale, tipmax; long i; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; dnaml_coordinates(curtree.start, 0.0, &tipy, &tipmax); scale = 1.0 / (long)(tipmax + 1.000); for (i = 1; i <= (tipy - down); i++) drawline2(i, scale, curtree); putc('\n', outfile); } /* dnaml_printree */ void sigma(node *p, double *sumlr, double *s1, double *s2) { /* compute standard deviation */ double tt, aa, like, slope, curv; slopecurv (p, p->v, &like, &slope, &curv); tt = p->v; p->v = epsilon; p->back->v = epsilon; aa = evaluate(p, false); p->v = tt; p->back->v = tt; (*sumlr) = evaluate(p, false) - aa; if (curv < -epsilon) { (*s1) = p->v + (-slope - sqrt(slope * slope - 3.841 * curv)) / curv; (*s2) = p->v + (-slope + sqrt(slope * slope - 3.841 * curv)) / curv; } else { (*s1) = -1.0; (*s2) = -1.0; } } /* sigma */ void describe(node *p) { /* print out information for one branch */ long i, num_sibs; node *q, *sib_ptr; double sumlr, sigma1, sigma2; if (!p->tip && !p->initialized) nuview(p); if (!p->back->tip && !p->back->initialized) nuview(p->back); q = p->back; if (q->tip) { fprintf(outfile, " "); for (i = 0; i < nmlngth; i++) putc(nayme[q->index-1][i], outfile); fprintf(outfile, " "); } else fprintf(outfile, " %4ld ", q->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, "%15.5f", q->v * fracchange); if (!usertree || (usertree && !lngths) || p->iter) { sigma(q, &sumlr, &sigma1, &sigma2); if (sigma1 <= sigma2) fprintf(outfile, " ( zero, infinity)"); else { fprintf(outfile, " ("); if (sigma2 <= 0.0) fprintf(outfile, " zero"); else fprintf(outfile, "%9.5f", sigma2 * fracchange); fprintf(outfile, ",%12.5f", sigma1 * fracchange); putc(')', outfile); } if (sumlr > 1.9205) fprintf(outfile, " *"); if (sumlr > 2.995) putc('*', outfile); } putc('\n', outfile); if (!p->tip) { num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; describe(sib_ptr->back); } } } /* describe */ void reconstr(node *p, long n) { /* reconstruct and print out base at site n+1 at node p */ long i, j, k, m, first, second, num_sibs; double f, sum, xx[4]; node *q; if ((ally[n] == 0) || (location[ally[n]-1] == 0)) putc('.', outfile); else { j = location[ally[n]-1] - 1; for (i = 0; i < 4; i++) { f = p->x[j][mx-1][i]; num_sibs = count_sibs(p); q = p; for (k = 0; k < num_sibs; k++) { q = q->next; f *= q->x[j][mx-1][i]; } f = sqrt(f); xx[i] = f; } xx[0] *= freqa; xx[1] *= freqc; xx[2] *= freqg; xx[3] *= freqt; sum = xx[0]+xx[1]+xx[2]+xx[3]; for (i = 0; i < 4; i++) xx[i] /= sum; first = 0; for (i = 1; i < 4; i++) if (xx [i] > xx[first]) first = i; if (first == 0) second = 1; else second = 0; for (i = 0; i < 4; i++) if ((i != first) && (xx[i] > xx[second])) second = i; m = 1 << first; if (xx[first] < 0.4999995) m = m + (1 << second); if (xx[first] > 0.95) putc(toupper((int)basechar[m - 1]), outfile); else putc(basechar[m - 1], outfile); if (rctgry && rcategs > 1) mx = mp[n][mx - 1]; else mx = 1; } } /* reconstr */ void rectrav(node *p, long m, long n) { /* print out segment of reconstructed sequence for one branch */ long i; node *q; putc(' ', outfile); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, " "); mx = mx0; for (i = m; i <= n; i++) { if ((i % 10 == 0) && (i != m)) putc(' ', outfile); if (p->tip) putc(y[p->index-1][i], outfile); else reconstr(p, i); } putc('\n', outfile); if (!p->tip) { for ( q = p->next; q != p; q = q->next ) rectrav(q->back, m, n); } mx1 = mx; } /* rectrav */ void summarize(void) { /* print out branch length information and node numbers */ long i, j, mm=0, num_sibs; double mode, sum; double like[maxcategs], nulike[maxcategs]; double **marginal; node *sib_ptr; if (!treeprint) return; fprintf(outfile, "\nremember: "); if (outgropt) fprintf(outfile, "(although rooted by outgroup) "); fprintf(outfile, "this is an unrooted tree!\n\n"); fprintf(outfile, "Ln Likelihood = %11.5f\n", curtree.likelihood); fprintf(outfile, "\n Between And Length"); if (!(usertree && lngths && haslengths)) fprintf(outfile, " Approx. Confidence Limits"); fprintf(outfile, "\n"); fprintf(outfile, " ------- --- ------"); if (!(usertree && lngths && haslengths)) fprintf(outfile, " ------- ---------- ------"); fprintf(outfile, "\n\n"); for (i = spp; i < nonodes2; i++) { /* So this works with arbitrary multifurcations */ if (curtree.nodep[i]) { num_sibs = count_sibs (curtree.nodep[i]); sib_ptr = curtree.nodep[i]; for (j = 0; j < num_sibs; j++) { sib_ptr->initialized = false; sib_ptr = sib_ptr->next; } } } describe(curtree.start->back); /* So this works with arbitrary multifurcations */ num_sibs = count_sibs (curtree.start); sib_ptr = curtree.start; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; describe(sib_ptr->back); } fprintf(outfile, "\n"); if (!(usertree && lngths && haslengths)) { fprintf(outfile, " * = significantly positive, P < 0.05\n"); fprintf(outfile, " ** = significantly positive, P < 0.01\n\n"); } dummy = evaluate(curtree.start, false); if (rctgry && rcategs > 1) { for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; mp[i][j] = j + 1; for (k = 1; k <= rcategs; k++) { if (k != j + 1) { if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { nulike[j] = lambda * probcat[k - 1] * like[k - 1]; mp[i][j] = k; } } } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); } mode = 0.0; mx = 1; for (i = 1; i <= rcategs; i++) { if (probcat[i - 1] * like[i - 1] > mode) { mx = i; mode = probcat[i - 1] * like[i - 1]; } } mx0 = mx; fprintf(outfile, "Combination of categories that contributes the most to the likelihood:\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 1; i <= sites; i++) { fprintf(outfile, "%ld", mx); if (i % 10 == 0) putc(' ', outfile); if (i % 60 == 0 && i != sites) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } mx = mp[i - 1][mx - 1]; } fprintf(outfile, "\n\n"); marginal = (double **) Malloc( sites*sizeof(double *)); for (i = 0; i < sites; i++) marginal[i] = (double *) Malloc( rcategs*sizeof(double)); for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) { nulike[j] /= sum; marginal[i][j] = nulike[j]; } memcpy(like, nulike, rcategs * sizeof(double)); } for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = 0; i < sites; i++) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } marginal[i][j] *= like[j] * probcat[j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); sum = 0.0; for (j = 0; j < rcategs; j++) sum += marginal[i][j]; for (j = 0; j < rcategs; j++) marginal[i][j] /= sum; } fprintf(outfile, "Most probable category at each site if > 0.95" " probability (\".\" otherwise)\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 0; i < sites; i++) { mm = 0; sum = 0.0; for (j = 0; j < rcategs; j++) if (marginal[i][j] > sum) { sum = marginal[i][j]; mm = j; } if (sum >= 0.95) fprintf(outfile, "%ld", mm+1); else putc('.', outfile); if ((i+1) % 60 == 0) { if (i != 0) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } } else if ((i+1) % 10 == 0) putc(' ', outfile); } putc('\n', outfile); for (i = 0; i < sites; i++) free(marginal[i]); free(marginal); } putc('\n', outfile); if (hypstate) { fprintf(outfile, "Probable sequences at interior nodes:\n\n"); fprintf(outfile, " node "); for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) putc(' ', outfile); fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n"); if (!rctgry || (rcategs == 1)) mx0 = 1; for (i = 0; i < sites; i += 60) { k = i + 59; if (k >= sites) k = sites - 1; rectrav(curtree.start, i, k); rectrav(curtree.start->back, i, k); putc('\n', outfile); mx0 = mx1; } } } /* summarize */ void dnaml_treeout(node *p) { /* write out file with representation of final tree2 */ /* Only works for bifurcations! */ long i, n, w; Char c; double x; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index-1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index-1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { putc('(', outtree); col++; dnaml_treeout(p->next->back); putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } dnaml_treeout(p->next->next->back); if (p == curtree.start) { putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } dnaml_treeout(p->back); } putc(')', outtree); col++; } x = p->v * fracchange; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p == curtree.start) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.5f", (int)(w + 7), x); col += w + 8; } } /* dnaml_treeout */ void inittravtree(node *p) { /* traverse tree to set initialized and v to initial values */ node *q; p->initialized = false; p->back->initialized = false; if ( usertree && (!lngths || p->iter) ) { p->v = initialv; p->back->v = initialv; } if ( !p->tip ) { q = p->next; while ( q != p ) { inittravtree(q->back); q = q->next; } } } /* inittravtree */ void treevaluate(void) { /* evaluate a user tree */ long i; inittravtree(curtree.start); polishing = true; smoothit = true; for (i = 1; i <= smoothings * 4; i++) smooth (curtree.start); dummy = evaluate(curtree.start, true); } /* treevaluate */ void dnaml_unroot(node* root, node** nodep, long nonodes) { node *p,*r,*q; double newl; long i; long numsibs; numsibs = count_sibs(root); if ( numsibs > 2 ) { q = root; r = root; while (!(q->next == root)) q = q->next; q->next = root->next; for(i=0 ; i < endsite ; i++){ free(r->x[i]); r->x[i] = NULL; } free(r->x); r->x = NULL; chuck(&grbg, r); curtree.nodep[spp] = q; } else { /* Bifurcating root - remove entire root fork */ /* Join oldlen on each side of root */ newl = root->next->oldlen + root->next->next->oldlen; root->next->back->oldlen = newl; root->next->next->back->oldlen = newl; /* Join v on each side of root */ newl = root->next->v + root->next->next->v; root->next->back->v = newl; root->next->next->back->v = newl; /* Connect root's children */ root->next->back->back=root->next->next->back; root->next->next->back->back = root->next->back; /* Move nodep entries down one and set indices */ for ( i = spp; i < nonodes-1; i++ ) { p = nodep[i+1]; nodep[i] = p; nodep[i+1] = NULL; if ( nodep[i] == NULL ) /* This may happen in a multifurcating intree */ break; do { p->index = i+1; p = p->next; } while (p != nodep[i]); } /* Free protx arrays from old root */ for(i=0 ; i < endsite ; i++){ free(root->x[i]); free(root->next->x[i]); free(root->next->next->x[i]); root->x[i] = NULL; root->next->x[i] = NULL; root->next->next->x[i] = NULL; } free(root->x); free(root->next->x); free(root->next->next->x); chuck(&grbg,root->next->next); chuck(&grbg,root->next); chuck(&grbg,root); } } void maketree(void) { long i, j; boolean dummy_first, goteof; pointarray dummy_treenode=NULL; long nextnode; node *root; char* treestr; inittable(); if (usertree) { treestr = ajStrGetuniquePtr(&phylotrees[0]->Tree); inittable_for_usertree (treestr); if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) emboss_initseed(inseed, &inseed0, seed); l0gl = (double *) Malloc(shimotrees * sizeof(double)); l0gf = (double **) Malloc(shimotrees * sizeof(double *)); for (i=0; i < shimotrees; ++i) l0gf[i] = (double *) Malloc(endsite * sizeof(double)); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } which = 1; /* This taken out of tree read, used to be [spp-1], but referring to [0] produces output identical to what the pre-modified dnaml produced. */ while (which <= numtrees) { /* These initializations required each time through the loop since multiple trees require re-initialization */ haslengths = true; nextnode = 0; dummy_first = true; goteof = false; treestr = ajStrGetuniquePtr(&phylotrees[which-1]->Tree); treeread(&treestr, &root, dummy_treenode, &goteof, &dummy_first, curtree.nodep, &nextnode, &haslengths, &grbg, initdnamlnode, false, nonodes2); dnaml_unroot(root, curtree.nodep, nonodes2); if (goteof && (which <= numtrees)) { /* if we hit the end of the file prematurely */ printf ("\n"); printf ("ERROR: trees missing at end of file.\n"); printf ("\tExpected number of trees:\t\t%ld\n", numtrees); printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1); exxit(-1); } curtree.start = curtree.nodep[0]->back; if ( outgropt ) curtree.start = curtree.nodep[outgrno - 1]->back; treevaluate(); dnaml_printree(); summarize(); if (trout) { col = 0; dnaml_treeout(curtree.start); } if(which < numtrees){ freex_notip(nextnode, curtree.nodep); gdispose(curtree.start, &grbg, curtree.nodep); } else nonodes2 = nextnode; which++; } FClose(intree); putc('\n', outfile); if (!auto_ && numtrees > 1 && weightsum > 1 ) standev2(numtrees, maxwhich, 0, endsite-1, maxlogl, l0gl, l0gf, aliasweight, seed); } else { /* If there's no input user tree, */ for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); if (progress) { printf("\nAdding species:\n"); writename(0, 3, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } nextsp = 3; polishing = false; smoothit = improve; buildsimpletree(&curtree); curtree.start = curtree.nodep[enterorder[0] - 1]->back; nextsp = 4; while (nextsp <= spp) { buildnewtip(enterorder[nextsp - 1], &curtree); bestyet = UNDEFINED; if (smoothit) dnamlcopy(&curtree, &priortree, nonodes2, rcategs); addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, curtree.start, true); if (smoothit) dnamlcopy(&bestree, &curtree, nonodes2, rcategs); else { insert_(curtree.nodep[enterorder[nextsp - 1] - 1]->back, qwhere, true); smoothit = true; for (i = 1; i<=smoothings; i++) { smooth (curtree.start); smooth (curtree.start->back); } smoothit = false; bestyet = curtree.likelihood; } if (progress) { writename(nextsp - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (global && nextsp == spp && progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = spp ; j < nonodes2 ; j++) if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); } succeeded = true; while (succeeded) { succeeded = false; if (global && nextsp == spp && progress) { printf(" "); fflush(stdout); } if (global && nextsp == spp) globrearrange(); else rearrange(curtree.start, curtree.start->back); if (global && nextsp == spp && progress) putchar('\n'); } nextsp++; } if ( !smoothit) dnamlcopy(&curtree, &bestree, nonodes2, rcategs); if (global && progress) { putchar('\n'); fflush(stdout); } if (njumble > 1) { if (jumb == 1) dnamlcopy(&bestree, &bestree2, nonodes2, rcategs); else if (bestree2.likelihood < bestree.likelihood) dnamlcopy(&bestree, &bestree2, nonodes2, rcategs); } if (jumb == njumble) { if (njumble > 1) dnamlcopy(&bestree2, &curtree, nonodes2, rcategs); curtree.start = curtree.nodep[outgrno - 1]->back; for (i = 0; i < nonodes2; i++) { if (i < spp) curtree.nodep[i]->initialized = false; else { curtree.nodep[i]->initialized = false; curtree.nodep[i]->next->initialized = false; curtree.nodep[i]->next->next->initialized = false; } } treevaluate(); dnaml_printree(); summarize(); if (trout) { col = 0; dnaml_treeout(curtree.start); } } } if (usertree) { free(l0gl); for (i=0; i < shimotrees; i++) free(l0gf[i]); free(l0gf); } freetable(); if (jumb < njumble) return; free(contribution); free(mp); for (i=0; i < endsite; i++) free(term[i]); free(term); for (i=0; i < endsite; i++) free(slopeterm[i]); free(slopeterm); for (i=0; i < endsite; i++) free(curveterm[i]); free(curveterm); freex(nonodes2, curtree.nodep); if (!usertree) { freex(nonodes2, bestree.nodep); freex(nonodes2, priortree.nodep); if (njumble > 1) freex(nonodes2, bestree2.nodep); } if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n", outtreename); } } /* maketree */ void clean_up(void) { /* Free and/or close stuff */ long i; free (rrate); free (probcat); free (rate); /* Seems to require freeing every time... */ for (i = 0; i < spp; i++) { free (y[i]); } free (y); free (nayme); free (enterorder); free (category); free (weight); free (alias); free (ally); free (location); free (aliasweight); FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif } /* clean_up */ int main(int argc, Char *argv[]) { /* DNA Maximum Likelihood */ #ifdef MAC argc = 1; /* macsetup("DnaML",""); */ argv[0] = "DnaML"; #endif init(argc,argv); emboss_getoptions("fdnaml", argc, argv); progname = argv[0]; firstset = true; ibmpc = IBMCRT; ansi = ANSICRT; grbg = NULL; doinit(); ttratio0 = ttratio; for (ith = 1; ith <= datasets; ith++) { if (datasets > 1) { fprintf(outfile, "Data set # %ld:\n", ith); printf("\nData set # %ld:\n", ith); } ttratio = ttratio0; getinput(); if (ith == 1) firstset = false; if (usertree) maketree(); else for (jumb = 1; jumb <= njumble; jumb++) maketree(); } clean_up(); printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif embExit(); return 0; } /* DNA Maximum Likelihood */ PHYLIPNEW-3.69.650/ltmain.sh0000644000175000017500000105152212171071672012116 00000000000000 # libtool (GNU libtool) 2.4.2 # Written by Gordon Matzigkeit , 1996 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, # 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # GNU Libtool is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Libtool; see the file COPYING. If not, a copy # can be downloaded from http://www.gnu.org/licenses/gpl.html, # or obtained by writing to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Usage: $progname [OPTION]... [MODE-ARG]... # # Provide generalized library-building support services. # # --config show all configuration variables # --debug enable verbose shell tracing # -n, --dry-run display commands without modifying any files # --features display basic configuration information and exit # --mode=MODE use operation mode MODE # --preserve-dup-deps don't remove duplicate dependency libraries # --quiet, --silent don't print informational messages # --no-quiet, --no-silent # print informational messages (default) # --no-warn don't display warning messages # --tag=TAG use configuration variables from tag TAG # -v, --verbose print more informational messages than default # --no-verbose don't print the extra informational messages # --version print version information # -h, --help, --help-all print short, long, or detailed help message # # MODE must be one of the following: # # clean remove files from the build directory # compile compile a source file into a libtool object # execute automatically set library path, then run a program # finish complete the installation of libtool libraries # install install libraries or executables # link create a library or an executable # uninstall remove libraries from an installed directory # # MODE-ARGS vary depending on the MODE. When passed as first option, # `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that. # Try `$progname --help --mode=MODE' for a more detailed description of MODE. # # When reporting a bug, please describe a test case to reproduce it and # include the following information: # # host-triplet: $host # shell: $SHELL # compiler: $LTCC # compiler flags: $LTCFLAGS # linker: $LD (gnu? $with_gnu_ld) # $progname: (GNU libtool) 2.4.2 # automake: $automake_version # autoconf: $autoconf_version # # Report bugs to . # GNU libtool home page: . # General help using GNU software: . PROGRAM=libtool PACKAGE=libtool VERSION=2.4.2 TIMESTAMP="" package_revision=1.3337 # Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $1 _LTECHO_EOF' } # NLS nuisances: We save the old values to restore during execute mode. lt_user_locale= lt_safe_locale= for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test \"\${$lt_var+set}\" = set; then save_$lt_var=\$$lt_var $lt_var=C export $lt_var lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\" lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\" fi" done LC_ALL=C LANGUAGE=C export LANGUAGE LC_ALL $lt_unset CDPATH # Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh # is ksh but when the shell is invoked as "sh" and the current value of # the _XPG environment variable is not equal to 1 (one), the special # positional parameter $0, within a function call, is the name of the # function. progpath="$0" : ${CP="cp -f"} test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'} : ${MAKE="make"} : ${MKDIR="mkdir"} : ${MV="mv -f"} : ${RM="rm -f"} : ${SHELL="${CONFIG_SHELL-/bin/sh}"} : ${Xsed="$SED -e 1s/^X//"} # Global variables: EXIT_SUCCESS=0 EXIT_FAILURE=1 EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. exit_status=$EXIT_SUCCESS # Make sure IFS has a sensible default lt_nl=' ' IFS=" $lt_nl" dirname="s,/[^/]*$,," basename="s,^.*/,," # func_dirname file append nondir_replacement # Compute the dirname of FILE. If nonempty, add APPEND to the result, # otherwise set result to NONDIR_REPLACEMENT. func_dirname () { func_dirname_result=`$ECHO "${1}" | $SED "$dirname"` if test "X$func_dirname_result" = "X${1}"; then func_dirname_result="${3}" else func_dirname_result="$func_dirname_result${2}" fi } # func_dirname may be replaced by extended shell implementation # func_basename file func_basename () { func_basename_result=`$ECHO "${1}" | $SED "$basename"` } # func_basename may be replaced by extended shell implementation # func_dirname_and_basename file append nondir_replacement # perform func_basename and func_dirname in a single function # call: # dirname: Compute the dirname of FILE. If nonempty, # add APPEND to the result, otherwise set result # to NONDIR_REPLACEMENT. # value returned in "$func_dirname_result" # basename: Compute filename of FILE. # value retuned in "$func_basename_result" # Implementation must be kept synchronized with func_dirname # and func_basename. For efficiency, we do not delegate to # those functions but instead duplicate the functionality here. func_dirname_and_basename () { # Extract subdirectory from the argument. func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"` if test "X$func_dirname_result" = "X${1}"; then func_dirname_result="${3}" else func_dirname_result="$func_dirname_result${2}" fi func_basename_result=`$ECHO "${1}" | $SED -e "$basename"` } # func_dirname_and_basename may be replaced by extended shell implementation # func_stripname prefix suffix name # strip PREFIX and SUFFIX off of NAME. # PREFIX and SUFFIX must not contain globbing or regex special # characters, hashes, percent signs, but SUFFIX may contain a leading # dot (in which case that matches only a dot). # func_strip_suffix prefix name func_stripname () { case ${2} in .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; esac } # func_stripname may be replaced by extended shell implementation # These SED scripts presuppose an absolute path with a trailing slash. pathcar='s,^/\([^/]*\).*$,\1,' pathcdr='s,^/[^/]*,,' removedotparts=':dotsl s@/\./@/@g t dotsl s,/\.$,/,' collapseslashes='s@/\{1,\}@/@g' finalslash='s,/*$,/,' # func_normal_abspath PATH # Remove doubled-up and trailing slashes, "." path components, # and cancel out any ".." path components in PATH after making # it an absolute path. # value returned in "$func_normal_abspath_result" func_normal_abspath () { # Start from root dir and reassemble the path. func_normal_abspath_result= func_normal_abspath_tpath=$1 func_normal_abspath_altnamespace= case $func_normal_abspath_tpath in "") # Empty path, that just means $cwd. func_stripname '' '/' "`pwd`" func_normal_abspath_result=$func_stripname_result return ;; # The next three entries are used to spot a run of precisely # two leading slashes without using negated character classes; # we take advantage of case's first-match behaviour. ///*) # Unusual form of absolute path, do nothing. ;; //*) # Not necessarily an ordinary path; POSIX reserves leading '//' # and for example Cygwin uses it to access remote file shares # over CIFS/SMB, so we conserve a leading double slash if found. func_normal_abspath_altnamespace=/ ;; /*) # Absolute path, do nothing. ;; *) # Relative path, prepend $cwd. func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath ;; esac # Cancel out all the simple stuff to save iterations. We also want # the path to end with a slash for ease of parsing, so make sure # there is one (and only one) here. func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"` while :; do # Processed it all yet? if test "$func_normal_abspath_tpath" = / ; then # If we ascended to the root using ".." the result may be empty now. if test -z "$func_normal_abspath_result" ; then func_normal_abspath_result=/ fi break fi func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$pathcar"` func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$pathcdr"` # Figure out what to do with it case $func_normal_abspath_tcomponent in "") # Trailing empty path component, ignore it. ;; ..) # Parent dir; strip last assembled component from result. func_dirname "$func_normal_abspath_result" func_normal_abspath_result=$func_dirname_result ;; *) # Actual path component, append it. func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent ;; esac done # Restore leading double-slash if one was found on entry. func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result } # func_relative_path SRCDIR DSTDIR # generates a relative path from SRCDIR to DSTDIR, with a trailing # slash if non-empty, suitable for immediately appending a filename # without needing to append a separator. # value returned in "$func_relative_path_result" func_relative_path () { func_relative_path_result= func_normal_abspath "$1" func_relative_path_tlibdir=$func_normal_abspath_result func_normal_abspath "$2" func_relative_path_tbindir=$func_normal_abspath_result # Ascend the tree starting from libdir while :; do # check if we have found a prefix of bindir case $func_relative_path_tbindir in $func_relative_path_tlibdir) # found an exact match func_relative_path_tcancelled= break ;; $func_relative_path_tlibdir*) # found a matching prefix func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" func_relative_path_tcancelled=$func_stripname_result if test -z "$func_relative_path_result"; then func_relative_path_result=. fi break ;; *) func_dirname $func_relative_path_tlibdir func_relative_path_tlibdir=${func_dirname_result} if test "x$func_relative_path_tlibdir" = x ; then # Have to descend all the way to the root! func_relative_path_result=../$func_relative_path_result func_relative_path_tcancelled=$func_relative_path_tbindir break fi func_relative_path_result=../$func_relative_path_result ;; esac done # Now calculate path; take care to avoid doubling-up slashes. func_stripname '' '/' "$func_relative_path_result" func_relative_path_result=$func_stripname_result func_stripname '/' '/' "$func_relative_path_tcancelled" if test "x$func_stripname_result" != x ; then func_relative_path_result=${func_relative_path_result}/${func_stripname_result} fi # Normalisation. If bindir is libdir, return empty string, # else relative path ending with a slash; either way, target # file name can be directly appended. if test ! -z "$func_relative_path_result"; then func_stripname './' '' "$func_relative_path_result/" func_relative_path_result=$func_stripname_result fi } # The name of this program: func_dirname_and_basename "$progpath" progname=$func_basename_result # Make sure we have an absolute path for reexecution: case $progpath in [\\/]*|[A-Za-z]:\\*) ;; *[\\/]*) progdir=$func_dirname_result progdir=`cd "$progdir" && pwd` progpath="$progdir/$progname" ;; *) save_IFS="$IFS" IFS=${PATH_SEPARATOR-:} for progdir in $PATH; do IFS="$save_IFS" test -x "$progdir/$progname" && break done IFS="$save_IFS" test -n "$progdir" || progdir=`pwd` progpath="$progdir/$progname" ;; esac # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. Xsed="${SED}"' -e 1s/^X//' sed_quote_subst='s/\([`"$\\]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\(["`\\]\)/\\\1/g' # Sed substitution that turns a string into a regex matching for the # string literally. sed_make_literal_regex='s,[].[^$\\*\/],\\&,g' # Sed substitution that converts a w32 file name or path # which contains forward slashes, into one that contains # (escaped) backslashes. A very naive implementation. lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' # Re-`\' parameter expansions in output of double_quote_subst that were # `\'-ed in input to the same. If an odd number of `\' preceded a '$' # in input to double_quote_subst, that '$' was protected from expansion. # Since each input `\' is now two `\'s, look for any number of runs of # four `\'s followed by two `\'s and then a '$'. `\' that '$'. bs='\\' bs2='\\\\' bs4='\\\\\\\\' dollar='\$' sed_double_backslash="\ s/$bs4/&\\ /g s/^$bs2$dollar/$bs&/ s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g s/\n//g" # Standard options: opt_dry_run=false opt_help=false opt_quiet=false opt_verbose=false opt_warning=: # func_echo arg... # Echo program name prefixed message, along with the current mode # name if it has been set yet. func_echo () { $ECHO "$progname: ${opt_mode+$opt_mode: }$*" } # func_verbose arg... # Echo program name prefixed message in verbose mode only. func_verbose () { $opt_verbose && func_echo ${1+"$@"} # A bug in bash halts the script if the last line of a function # fails when set -e is in force, so we need another command to # work around that: : } # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "$*" } # func_error arg... # Echo program name prefixed message to standard error. func_error () { $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2 } # func_warning arg... # Echo program name prefixed warning message to standard error. func_warning () { $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2 # bash bug again: : } # func_fatal_error arg... # Echo program name prefixed message to standard error, and exit. func_fatal_error () { func_error ${1+"$@"} exit $EXIT_FAILURE } # func_fatal_help arg... # Echo program name prefixed message to standard error, followed by # a help hint, and exit. func_fatal_help () { func_error ${1+"$@"} func_fatal_error "$help" } help="Try \`$progname --help' for more information." ## default # func_grep expression filename # Check whether EXPRESSION matches any line of FILENAME, without output. func_grep () { $GREP "$1" "$2" >/dev/null 2>&1 } # func_mkdir_p directory-path # Make sure the entire path to DIRECTORY-PATH is available. func_mkdir_p () { my_directory_path="$1" my_dir_list= if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then # Protect directory names starting with `-' case $my_directory_path in -*) my_directory_path="./$my_directory_path" ;; esac # While some portion of DIR does not yet exist... while test ! -d "$my_directory_path"; do # ...make a list in topmost first order. Use a colon delimited # list incase some portion of path contains whitespace. my_dir_list="$my_directory_path:$my_dir_list" # If the last portion added has no slash in it, the list is done case $my_directory_path in */*) ;; *) break ;; esac # ...otherwise throw away the child directory and loop my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"` done my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'` save_mkdir_p_IFS="$IFS"; IFS=':' for my_dir in $my_dir_list; do IFS="$save_mkdir_p_IFS" # mkdir can fail with a `File exist' error if two processes # try to create one of the directories concurrently. Don't # stop in that case! $MKDIR "$my_dir" 2>/dev/null || : done IFS="$save_mkdir_p_IFS" # Bail out if we (or some other process) failed to create a directory. test -d "$my_directory_path" || \ func_fatal_error "Failed to create \`$1'" fi } # func_mktempdir [string] # Make a temporary directory that won't clash with other running # libtool processes, and avoids race conditions if possible. If # given, STRING is the basename for that directory. func_mktempdir () { my_template="${TMPDIR-/tmp}/${1-$progname}" if test "$opt_dry_run" = ":"; then # Return a directory name, but don't create it in dry-run mode my_tmpdir="${my_template}-$$" else # If mktemp works, use that first and foremost my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null` if test ! -d "$my_tmpdir"; then # Failing that, at least try and use $RANDOM to avoid a race my_tmpdir="${my_template}-${RANDOM-0}$$" save_mktempdir_umask=`umask` umask 0077 $MKDIR "$my_tmpdir" umask $save_mktempdir_umask fi # If we're not in dry-run mode, bomb out on failure test -d "$my_tmpdir" || \ func_fatal_error "cannot create temporary directory \`$my_tmpdir'" fi $ECHO "$my_tmpdir" } # func_quote_for_eval arg # Aesthetically quote ARG to be evaled later. # This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT # is double-quoted, suitable for a subsequent eval, whereas # FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters # which are still active within double quotes backslashified. func_quote_for_eval () { case $1 in *[\\\`\"\$]*) func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;; *) func_quote_for_eval_unquoted_result="$1" ;; esac case $func_quote_for_eval_unquoted_result in # Double-quote args containing shell metacharacters to delay # word splitting, command substitution and and variable # expansion for a subsequent eval. # Many Bourne shells cannot handle close brackets correctly # in scan sets, so we specify it separately. *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\"" ;; *) func_quote_for_eval_result="$func_quote_for_eval_unquoted_result" esac } # func_quote_for_expand arg # Aesthetically quote ARG to be evaled later; same as above, # but do not quote variable references. func_quote_for_expand () { case $1 in *[\\\`\"]*) my_arg=`$ECHO "$1" | $SED \ -e "$double_quote_subst" -e "$sed_double_backslash"` ;; *) my_arg="$1" ;; esac case $my_arg in # Double-quote args containing shell metacharacters to delay # word splitting and command substitution for a subsequent eval. # Many Bourne shells cannot handle close brackets correctly # in scan sets, so we specify it separately. *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") my_arg="\"$my_arg\"" ;; esac func_quote_for_expand_result="$my_arg" } # func_show_eval cmd [fail_exp] # Unless opt_silent is true, then output CMD. Then, if opt_dryrun is # not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP # is given, then evaluate it. func_show_eval () { my_cmd="$1" my_fail_exp="${2-:}" ${opt_silent-false} || { func_quote_for_expand "$my_cmd" eval "func_echo $func_quote_for_expand_result" } if ${opt_dry_run-false}; then :; else eval "$my_cmd" my_status=$? if test "$my_status" -eq 0; then :; else eval "(exit $my_status); $my_fail_exp" fi fi } # func_show_eval_locale cmd [fail_exp] # Unless opt_silent is true, then output CMD. Then, if opt_dryrun is # not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP # is given, then evaluate it. Use the saved locale for evaluation. func_show_eval_locale () { my_cmd="$1" my_fail_exp="${2-:}" ${opt_silent-false} || { func_quote_for_expand "$my_cmd" eval "func_echo $func_quote_for_expand_result" } if ${opt_dry_run-false}; then :; else eval "$lt_user_locale $my_cmd" my_status=$? eval "$lt_safe_locale" if test "$my_status" -eq 0; then :; else eval "(exit $my_status); $my_fail_exp" fi fi } # func_tr_sh # Turn $1 into a string suitable for a shell variable name. # Result is stored in $func_tr_sh_result. All characters # not in the set a-zA-Z0-9_ are replaced with '_'. Further, # if $1 begins with a digit, a '_' is prepended as well. func_tr_sh () { case $1 in [0-9]* | *[!a-zA-Z0-9_]*) func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'` ;; * ) func_tr_sh_result=$1 ;; esac } # func_version # Echo version message to standard output and exit. func_version () { $opt_debug $SED -n '/(C)/!b go :more /\./!{ N s/\n# / / b more } :go /^# '$PROGRAM' (GNU /,/# warranty; / { s/^# // s/^# *$// s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/ p }' < "$progpath" exit $? } # func_usage # Echo short help message to standard output and exit. func_usage () { $opt_debug $SED -n '/^# Usage:/,/^# *.*--help/ { s/^# // s/^# *$// s/\$progname/'$progname'/ p }' < "$progpath" echo $ECHO "run \`$progname --help | more' for full usage" exit $? } # func_help [NOEXIT] # Echo long help message to standard output and exit, # unless 'noexit' is passed as argument. func_help () { $opt_debug $SED -n '/^# Usage:/,/# Report bugs to/ { :print s/^# // s/^# *$// s*\$progname*'$progname'* s*\$host*'"$host"'* s*\$SHELL*'"$SHELL"'* s*\$LTCC*'"$LTCC"'* s*\$LTCFLAGS*'"$LTCFLAGS"'* s*\$LD*'"$LD"'* s/\$with_gnu_ld/'"$with_gnu_ld"'/ s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/ s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/ p d } /^# .* home page:/b print /^# General help using/b print ' < "$progpath" ret=$? if test -z "$1"; then exit $ret fi } # func_missing_arg argname # Echo program name prefixed message to standard error and set global # exit_cmd. func_missing_arg () { $opt_debug func_error "missing argument for $1." exit_cmd=exit } # func_split_short_opt shortopt # Set func_split_short_opt_name and func_split_short_opt_arg shell # variables after splitting SHORTOPT after the 2nd character. func_split_short_opt () { my_sed_short_opt='1s/^\(..\).*$/\1/;q' my_sed_short_rest='1s/^..\(.*\)$/\1/;q' func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"` func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"` } # func_split_short_opt may be replaced by extended shell implementation # func_split_long_opt longopt # Set func_split_long_opt_name and func_split_long_opt_arg shell # variables after splitting LONGOPT at the `=' sign. func_split_long_opt () { my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q' my_sed_long_arg='1s/^--[^=]*=//' func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"` func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"` } # func_split_long_opt may be replaced by extended shell implementation exit_cmd=: magic="%%%MAGIC variable%%%" magic_exe="%%%MAGIC EXE variable%%%" # Global variables. nonopt= preserve_args= lo2o="s/\\.lo\$/.${objext}/" o2lo="s/\\.${objext}\$/.lo/" extracted_archives= extracted_serial=0 # If this variable is set in any of the actions, the command in it # will be execed at the end. This prevents here-documents from being # left over by shells. exec_cmd= # func_append var value # Append VALUE to the end of shell variable VAR. func_append () { eval "${1}=\$${1}\${2}" } # func_append may be replaced by extended shell implementation # func_append_quoted var value # Quote VALUE and append to the end of shell variable VAR, separated # by a space. func_append_quoted () { func_quote_for_eval "${2}" eval "${1}=\$${1}\\ \$func_quote_for_eval_result" } # func_append_quoted may be replaced by extended shell implementation # func_arith arithmetic-term... func_arith () { func_arith_result=`expr "${@}"` } # func_arith may be replaced by extended shell implementation # func_len string # STRING may not start with a hyphen. func_len () { func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len` } # func_len may be replaced by extended shell implementation # func_lo2o object func_lo2o () { func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"` } # func_lo2o may be replaced by extended shell implementation # func_xform libobj-or-source func_xform () { func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'` } # func_xform may be replaced by extended shell implementation # func_fatal_configuration arg... # Echo program name prefixed message to standard error, followed by # a configuration failure hint, and exit. func_fatal_configuration () { func_error ${1+"$@"} func_error "See the $PACKAGE documentation for more information." func_fatal_error "Fatal configuration error." } # func_config # Display the configuration for all the tags in this script. func_config () { re_begincf='^# ### BEGIN LIBTOOL' re_endcf='^# ### END LIBTOOL' # Default configuration. $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" # Now print the configurations for the tags. for tagname in $taglist; do $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" done exit $? } # func_features # Display the features supported by this script. func_features () { echo "host: $host" if test "$build_libtool_libs" = yes; then echo "enable shared libraries" else echo "disable shared libraries" fi if test "$build_old_libs" = yes; then echo "enable static libraries" else echo "disable static libraries" fi exit $? } # func_enable_tag tagname # Verify that TAGNAME is valid, and either flag an error and exit, or # enable the TAGNAME tag. We also add TAGNAME to the global $taglist # variable here. func_enable_tag () { # Global variable: tagname="$1" re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" sed_extractcf="/$re_begincf/,/$re_endcf/p" # Validate tagname. case $tagname in *[!-_A-Za-z0-9,/]*) func_fatal_error "invalid tag name: $tagname" ;; esac # Don't test for the "default" C tag, as we know it's # there but not specially marked. case $tagname in CC) ;; *) if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then taglist="$taglist $tagname" # Evaluate the configuration. Be careful to quote the path # and the sed script, to avoid splitting on whitespace, but # also don't use non-portable quotes within backquotes within # quotes we have to do it in 2 steps: extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` eval "$extractedcf" else func_error "ignoring unknown tag $tagname" fi ;; esac } # func_check_version_match # Ensure that we are using m4 macros, and libtool script from the same # release of libtool. func_check_version_match () { if test "$package_revision" != "$macro_revision"; then if test "$VERSION" != "$macro_version"; then if test -z "$macro_version"; then cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, but the $progname: definition of this LT_INIT comes from an older release. $progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION $progname: and run autoconf again. _LT_EOF else cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, but the $progname: definition of this LT_INIT comes from $PACKAGE $macro_version. $progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION $progname: and run autoconf again. _LT_EOF fi else cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, $progname: but the definition of this LT_INIT comes from revision $macro_revision. $progname: You should recreate aclocal.m4 with macros from revision $package_revision $progname: of $PACKAGE $VERSION and run autoconf again. _LT_EOF fi exit $EXIT_MISMATCH fi } # Shorthand for --mode=foo, only valid as the first argument case $1 in clean|clea|cle|cl) shift; set dummy --mode clean ${1+"$@"}; shift ;; compile|compil|compi|comp|com|co|c) shift; set dummy --mode compile ${1+"$@"}; shift ;; execute|execut|execu|exec|exe|ex|e) shift; set dummy --mode execute ${1+"$@"}; shift ;; finish|finis|fini|fin|fi|f) shift; set dummy --mode finish ${1+"$@"}; shift ;; install|instal|insta|inst|ins|in|i) shift; set dummy --mode install ${1+"$@"}; shift ;; link|lin|li|l) shift; set dummy --mode link ${1+"$@"}; shift ;; uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) shift; set dummy --mode uninstall ${1+"$@"}; shift ;; esac # Option defaults: opt_debug=: opt_dry_run=false opt_config=false opt_preserve_dup_deps=false opt_features=false opt_finish=false opt_help=false opt_help_all=false opt_silent=: opt_warning=: opt_verbose=: opt_silent=false opt_verbose=false # Parse options once, thoroughly. This comes as soon as possible in the # script to make things like `--version' happen as quickly as we can. { # this just eases exit handling while test $# -gt 0; do opt="$1" shift case $opt in --debug|-x) opt_debug='set -x' func_echo "enabling shell trace mode" $opt_debug ;; --dry-run|--dryrun|-n) opt_dry_run=: ;; --config) opt_config=: func_config ;; --dlopen|-dlopen) optarg="$1" opt_dlopen="${opt_dlopen+$opt_dlopen }$optarg" shift ;; --preserve-dup-deps) opt_preserve_dup_deps=: ;; --features) opt_features=: func_features ;; --finish) opt_finish=: set dummy --mode finish ${1+"$@"}; shift ;; --help) opt_help=: ;; --help-all) opt_help_all=: opt_help=': help-all' ;; --mode) test $# = 0 && func_missing_arg $opt && break optarg="$1" opt_mode="$optarg" case $optarg in # Valid mode arguments: clean|compile|execute|finish|install|link|relink|uninstall) ;; # Catch anything else as an error *) func_error "invalid argument for $opt" exit_cmd=exit break ;; esac shift ;; --no-silent|--no-quiet) opt_silent=false func_append preserve_args " $opt" ;; --no-warning|--no-warn) opt_warning=false func_append preserve_args " $opt" ;; --no-verbose) opt_verbose=false func_append preserve_args " $opt" ;; --silent|--quiet) opt_silent=: func_append preserve_args " $opt" opt_verbose=false ;; --verbose|-v) opt_verbose=: func_append preserve_args " $opt" opt_silent=false ;; --tag) test $# = 0 && func_missing_arg $opt && break optarg="$1" opt_tag="$optarg" func_append preserve_args " $opt $optarg" func_enable_tag "$optarg" shift ;; -\?|-h) func_usage ;; --help) func_help ;; --version) func_version ;; # Separate optargs to long options: --*=*) func_split_long_opt "$opt" set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"} shift ;; # Separate non-argument short options: -\?*|-h*|-n*|-v*) func_split_short_opt "$opt" set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"} shift ;; --) break ;; -*) func_fatal_help "unrecognized option \`$opt'" ;; *) set dummy "$opt" ${1+"$@"}; shift; break ;; esac done # Validate options: # save first non-option argument if test "$#" -gt 0; then nonopt="$opt" shift fi # preserve --debug test "$opt_debug" = : || func_append preserve_args " --debug" case $host in *cygwin* | *mingw* | *pw32* | *cegcc*) # don't eliminate duplications in $postdeps and $predeps opt_duplicate_compiler_generated_deps=: ;; *) opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps ;; esac $opt_help || { # Sanity checks first: func_check_version_match if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then func_fatal_configuration "not configured to build any kind of library" fi # Darwin sucks eval std_shrext=\"$shrext_cmds\" # Only execute mode is allowed to have -dlopen flags. if test -n "$opt_dlopen" && test "$opt_mode" != execute; then func_error "unrecognized option \`-dlopen'" $ECHO "$help" 1>&2 exit $EXIT_FAILURE fi # Change the help message to a mode-specific one. generic_help="$help" help="Try \`$progname --help --mode=$opt_mode' for more information." } # Bail if the options were screwed $exit_cmd $EXIT_FAILURE } ## ----------- ## ## Main. ## ## ----------- ## # func_lalib_p file # True iff FILE is a libtool `.la' library or `.lo' object file. # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_lalib_p () { test -f "$1" && $SED -e 4q "$1" 2>/dev/null \ | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 } # func_lalib_unsafe_p file # True iff FILE is a libtool `.la' library or `.lo' object file. # This function implements the same check as func_lalib_p without # resorting to external programs. To this end, it redirects stdin and # closes it afterwards, without saving the original file descriptor. # As a safety measure, use it only where a negative result would be # fatal anyway. Works if `file' does not exist. func_lalib_unsafe_p () { lalib_p=no if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then for lalib_p_l in 1 2 3 4 do read lalib_p_line case "$lalib_p_line" in \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; esac done exec 0<&5 5<&- fi test "$lalib_p" = yes } # func_ltwrapper_script_p file # True iff FILE is a libtool wrapper script # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_script_p () { func_lalib_p "$1" } # func_ltwrapper_executable_p file # True iff FILE is a libtool wrapper executable # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_executable_p () { func_ltwrapper_exec_suffix= case $1 in *.exe) ;; *) func_ltwrapper_exec_suffix=.exe ;; esac $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 } # func_ltwrapper_scriptname file # Assumes file is an ltwrapper_executable # uses $file to determine the appropriate filename for a # temporary ltwrapper_script. func_ltwrapper_scriptname () { func_dirname_and_basename "$1" "" "." func_stripname '' '.exe' "$func_basename_result" func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper" } # func_ltwrapper_p file # True iff FILE is a libtool wrapper script or wrapper executable # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_p () { func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" } # func_execute_cmds commands fail_cmd # Execute tilde-delimited COMMANDS. # If FAIL_CMD is given, eval that upon failure. # FAIL_CMD may read-access the current command in variable CMD! func_execute_cmds () { $opt_debug save_ifs=$IFS; IFS='~' for cmd in $1; do IFS=$save_ifs eval cmd=\"$cmd\" func_show_eval "$cmd" "${2-:}" done IFS=$save_ifs } # func_source file # Source FILE, adding directory component if necessary. # Note that it is not necessary on cygwin/mingw to append a dot to # FILE even if both FILE and FILE.exe exist: automatic-append-.exe # behavior happens only for exec(3), not for open(2)! Also, sourcing # `FILE.' does not work on cygwin managed mounts. func_source () { $opt_debug case $1 in */* | *\\*) . "$1" ;; *) . "./$1" ;; esac } # func_resolve_sysroot PATH # Replace a leading = in PATH with a sysroot. Store the result into # func_resolve_sysroot_result func_resolve_sysroot () { func_resolve_sysroot_result=$1 case $func_resolve_sysroot_result in =*) func_stripname '=' '' "$func_resolve_sysroot_result" func_resolve_sysroot_result=$lt_sysroot$func_stripname_result ;; esac } # func_replace_sysroot PATH # If PATH begins with the sysroot, replace it with = and # store the result into func_replace_sysroot_result. func_replace_sysroot () { case "$lt_sysroot:$1" in ?*:"$lt_sysroot"*) func_stripname "$lt_sysroot" '' "$1" func_replace_sysroot_result="=$func_stripname_result" ;; *) # Including no sysroot. func_replace_sysroot_result=$1 ;; esac } # func_infer_tag arg # Infer tagged configuration to use if any are available and # if one wasn't chosen via the "--tag" command line option. # Only attempt this if the compiler in the base compile # command doesn't match the default compiler. # arg is usually of the form 'gcc ...' func_infer_tag () { $opt_debug if test -n "$available_tags" && test -z "$tagname"; then CC_quoted= for arg in $CC; do func_append_quoted CC_quoted "$arg" done CC_expanded=`func_echo_all $CC` CC_quoted_expanded=`func_echo_all $CC_quoted` case $@ in # Blanks in the command may have been stripped by the calling shell, # but not from the CC environment variable when configure was run. " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; # Blanks at the start of $base_compile will cause this to fail # if we don't check for them as well. *) for z in $available_tags; do if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then # Evaluate the configuration. eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" CC_quoted= for arg in $CC; do # Double-quote args containing other shell metacharacters. func_append_quoted CC_quoted "$arg" done CC_expanded=`func_echo_all $CC` CC_quoted_expanded=`func_echo_all $CC_quoted` case "$@ " in " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) # The compiler in the base compile command matches # the one in the tagged configuration. # Assume this is the tagged configuration we want. tagname=$z break ;; esac fi done # If $tagname still isn't set, then no tagged configuration # was found and let the user know that the "--tag" command # line option must be used. if test -z "$tagname"; then func_echo "unable to infer tagged configuration" func_fatal_error "specify a tag with \`--tag'" # else # func_verbose "using $tagname tagged configuration" fi ;; esac fi } # func_write_libtool_object output_name pic_name nonpic_name # Create a libtool object file (analogous to a ".la" file), # but don't create it if we're doing a dry run. func_write_libtool_object () { write_libobj=${1} if test "$build_libtool_libs" = yes; then write_lobj=\'${2}\' else write_lobj=none fi if test "$build_old_libs" = yes; then write_oldobj=\'${3}\' else write_oldobj=none fi $opt_dry_run || { cat >${write_libobj}T </dev/null` if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | $SED -e "$lt_sed_naive_backslashify"` else func_convert_core_file_wine_to_w32_result= fi fi } # end: func_convert_core_file_wine_to_w32 # func_convert_core_path_wine_to_w32 ARG # Helper function used by path conversion functions when $build is *nix, and # $host is mingw, cygwin, or some other w32 environment. Relies on a correctly # configured wine environment available, with the winepath program in $build's # $PATH. Assumes ARG has no leading or trailing path separator characters. # # ARG is path to be converted from $build format to win32. # Result is available in $func_convert_core_path_wine_to_w32_result. # Unconvertible file (directory) names in ARG are skipped; if no directory names # are convertible, then the result may be empty. func_convert_core_path_wine_to_w32 () { $opt_debug # unfortunately, winepath doesn't convert paths, only file names func_convert_core_path_wine_to_w32_result="" if test -n "$1"; then oldIFS=$IFS IFS=: for func_convert_core_path_wine_to_w32_f in $1; do IFS=$oldIFS func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" if test -n "$func_convert_core_file_wine_to_w32_result" ; then if test -z "$func_convert_core_path_wine_to_w32_result"; then func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result" else func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" fi fi done IFS=$oldIFS fi } # end: func_convert_core_path_wine_to_w32 # func_cygpath ARGS... # Wrapper around calling the cygpath program via LT_CYGPATH. This is used when # when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) # $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or # (2), returns the Cygwin file name or path in func_cygpath_result (input # file name or path is assumed to be in w32 format, as previously converted # from $build's *nix or MSYS format). In case (3), returns the w32 file name # or path in func_cygpath_result (input file name or path is assumed to be in # Cygwin format). Returns an empty string on error. # # ARGS are passed to cygpath, with the last one being the file name or path to # be converted. # # Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH # environment variable; do not put it in $PATH. func_cygpath () { $opt_debug if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` if test "$?" -ne 0; then # on failure, ensure result is empty func_cygpath_result= fi else func_cygpath_result= func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'" fi } #end: func_cygpath # func_convert_core_msys_to_w32 ARG # Convert file name or path ARG from MSYS format to w32 format. Return # result in func_convert_core_msys_to_w32_result. func_convert_core_msys_to_w32 () { $opt_debug # awkward: cmd appends spaces to result func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"` } #end: func_convert_core_msys_to_w32 # func_convert_file_check ARG1 ARG2 # Verify that ARG1 (a file name in $build format) was converted to $host # format in ARG2. Otherwise, emit an error message, but continue (resetting # func_to_host_file_result to ARG1). func_convert_file_check () { $opt_debug if test -z "$2" && test -n "$1" ; then func_error "Could not determine host file name corresponding to" func_error " \`$1'" func_error "Continuing, but uninstalled executables may not work." # Fallback: func_to_host_file_result="$1" fi } # end func_convert_file_check # func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH # Verify that FROM_PATH (a path in $build format) was converted to $host # format in TO_PATH. Otherwise, emit an error message, but continue, resetting # func_to_host_file_result to a simplistic fallback value (see below). func_convert_path_check () { $opt_debug if test -z "$4" && test -n "$3"; then func_error "Could not determine the host path corresponding to" func_error " \`$3'" func_error "Continuing, but uninstalled executables may not work." # Fallback. This is a deliberately simplistic "conversion" and # should not be "improved". See libtool.info. if test "x$1" != "x$2"; then lt_replace_pathsep_chars="s|$1|$2|g" func_to_host_path_result=`echo "$3" | $SED -e "$lt_replace_pathsep_chars"` else func_to_host_path_result="$3" fi fi } # end func_convert_path_check # func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG # Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT # and appending REPL if ORIG matches BACKPAT. func_convert_path_front_back_pathsep () { $opt_debug case $4 in $1 ) func_to_host_path_result="$3$func_to_host_path_result" ;; esac case $4 in $2 ) func_append func_to_host_path_result "$3" ;; esac } # end func_convert_path_front_back_pathsep ################################################## # $build to $host FILE NAME CONVERSION FUNCTIONS # ################################################## # invoked via `$to_host_file_cmd ARG' # # In each case, ARG is the path to be converted from $build to $host format. # Result will be available in $func_to_host_file_result. # func_to_host_file ARG # Converts the file name ARG from $build format to $host format. Return result # in func_to_host_file_result. func_to_host_file () { $opt_debug $to_host_file_cmd "$1" } # end func_to_host_file # func_to_tool_file ARG LAZY # converts the file name ARG from $build format to toolchain format. Return # result in func_to_tool_file_result. If the conversion in use is listed # in (the comma separated) LAZY, no conversion takes place. func_to_tool_file () { $opt_debug case ,$2, in *,"$to_tool_file_cmd",*) func_to_tool_file_result=$1 ;; *) $to_tool_file_cmd "$1" func_to_tool_file_result=$func_to_host_file_result ;; esac } # end func_to_tool_file # func_convert_file_noop ARG # Copy ARG to func_to_host_file_result. func_convert_file_noop () { func_to_host_file_result="$1" } # end func_convert_file_noop # func_convert_file_msys_to_w32 ARG # Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic # conversion to w32 is not available inside the cwrapper. Returns result in # func_to_host_file_result. func_convert_file_msys_to_w32 () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then func_convert_core_msys_to_w32 "$1" func_to_host_file_result="$func_convert_core_msys_to_w32_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_msys_to_w32 # func_convert_file_cygwin_to_w32 ARG # Convert file name ARG from Cygwin to w32 format. Returns result in # func_to_host_file_result. func_convert_file_cygwin_to_w32 () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then # because $build is cygwin, we call "the" cygpath in $PATH; no need to use # LT_CYGPATH in this case. func_to_host_file_result=`cygpath -m "$1"` fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_cygwin_to_w32 # func_convert_file_nix_to_w32 ARG # Convert file name ARG from *nix to w32 format. Requires a wine environment # and a working winepath. Returns result in func_to_host_file_result. func_convert_file_nix_to_w32 () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then func_convert_core_file_wine_to_w32 "$1" func_to_host_file_result="$func_convert_core_file_wine_to_w32_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_nix_to_w32 # func_convert_file_msys_to_cygwin ARG # Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. # Returns result in func_to_host_file_result. func_convert_file_msys_to_cygwin () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then func_convert_core_msys_to_w32 "$1" func_cygpath -u "$func_convert_core_msys_to_w32_result" func_to_host_file_result="$func_cygpath_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_msys_to_cygwin # func_convert_file_nix_to_cygwin ARG # Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed # in a wine environment, working winepath, and LT_CYGPATH set. Returns result # in func_to_host_file_result. func_convert_file_nix_to_cygwin () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. func_convert_core_file_wine_to_w32 "$1" func_cygpath -u "$func_convert_core_file_wine_to_w32_result" func_to_host_file_result="$func_cygpath_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_nix_to_cygwin ############################################# # $build to $host PATH CONVERSION FUNCTIONS # ############################################# # invoked via `$to_host_path_cmd ARG' # # In each case, ARG is the path to be converted from $build to $host format. # The result will be available in $func_to_host_path_result. # # Path separators are also converted from $build format to $host format. If # ARG begins or ends with a path separator character, it is preserved (but # converted to $host format) on output. # # All path conversion functions are named using the following convention: # file name conversion function : func_convert_file_X_to_Y () # path conversion function : func_convert_path_X_to_Y () # where, for any given $build/$host combination the 'X_to_Y' value is the # same. If conversion functions are added for new $build/$host combinations, # the two new functions must follow this pattern, or func_init_to_host_path_cmd # will break. # func_init_to_host_path_cmd # Ensures that function "pointer" variable $to_host_path_cmd is set to the # appropriate value, based on the value of $to_host_file_cmd. to_host_path_cmd= func_init_to_host_path_cmd () { $opt_debug if test -z "$to_host_path_cmd"; then func_stripname 'func_convert_file_' '' "$to_host_file_cmd" to_host_path_cmd="func_convert_path_${func_stripname_result}" fi } # func_to_host_path ARG # Converts the path ARG from $build format to $host format. Return result # in func_to_host_path_result. func_to_host_path () { $opt_debug func_init_to_host_path_cmd $to_host_path_cmd "$1" } # end func_to_host_path # func_convert_path_noop ARG # Copy ARG to func_to_host_path_result. func_convert_path_noop () { func_to_host_path_result="$1" } # end func_convert_path_noop # func_convert_path_msys_to_w32 ARG # Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic # conversion to w32 is not available inside the cwrapper. Returns result in # func_to_host_path_result. func_convert_path_msys_to_w32 () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # Remove leading and trailing path separator characters from ARG. MSYS # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; # and winepath ignores them completely. func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" func_to_host_path_result="$func_convert_core_msys_to_w32_result" func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_msys_to_w32 # func_convert_path_cygwin_to_w32 ARG # Convert path ARG from Cygwin to w32 format. Returns result in # func_to_host_file_result. func_convert_path_cygwin_to_w32 () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_cygwin_to_w32 # func_convert_path_nix_to_w32 ARG # Convert path ARG from *nix to w32 format. Requires a wine environment and # a working winepath. Returns result in func_to_host_file_result. func_convert_path_nix_to_w32 () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" func_to_host_path_result="$func_convert_core_path_wine_to_w32_result" func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_nix_to_w32 # func_convert_path_msys_to_cygwin ARG # Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. # Returns result in func_to_host_file_result. func_convert_path_msys_to_cygwin () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" func_cygpath -u -p "$func_convert_core_msys_to_w32_result" func_to_host_path_result="$func_cygpath_result" func_convert_path_check : : \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" : "$1" fi } # end func_convert_path_msys_to_cygwin # func_convert_path_nix_to_cygwin ARG # Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a # a wine environment, working winepath, and LT_CYGPATH set. Returns result in # func_to_host_file_result. func_convert_path_nix_to_cygwin () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # Remove leading and trailing path separator characters from # ARG. msys behavior is inconsistent here, cygpath turns them # into '.;' and ';.', and winepath ignores them completely. func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" func_to_host_path_result="$func_cygpath_result" func_convert_path_check : : \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" : "$1" fi } # end func_convert_path_nix_to_cygwin # func_mode_compile arg... func_mode_compile () { $opt_debug # Get the compilation command and the source file. base_compile= srcfile="$nonopt" # always keep a non-empty value in "srcfile" suppress_opt=yes suppress_output= arg_mode=normal libobj= later= pie_flag= for arg do case $arg_mode in arg ) # do not "continue". Instead, add this to base_compile lastarg="$arg" arg_mode=normal ;; target ) libobj="$arg" arg_mode=normal continue ;; normal ) # Accept any command-line options. case $arg in -o) test -n "$libobj" && \ func_fatal_error "you cannot specify \`-o' more than once" arg_mode=target continue ;; -pie | -fpie | -fPIE) func_append pie_flag " $arg" continue ;; -shared | -static | -prefer-pic | -prefer-non-pic) func_append later " $arg" continue ;; -no-suppress) suppress_opt=no continue ;; -Xcompiler) arg_mode=arg # the next one goes into the "base_compile" arg list continue # The current "srcfile" will either be retained or ;; # replaced later. I would guess that would be a bug. -Wc,*) func_stripname '-Wc,' '' "$arg" args=$func_stripname_result lastarg= save_ifs="$IFS"; IFS=',' for arg in $args; do IFS="$save_ifs" func_append_quoted lastarg "$arg" done IFS="$save_ifs" func_stripname ' ' '' "$lastarg" lastarg=$func_stripname_result # Add the arguments to base_compile. func_append base_compile " $lastarg" continue ;; *) # Accept the current argument as the source file. # The previous "srcfile" becomes the current argument. # lastarg="$srcfile" srcfile="$arg" ;; esac # case $arg ;; esac # case $arg_mode # Aesthetically quote the previous argument. func_append_quoted base_compile "$lastarg" done # for arg case $arg_mode in arg) func_fatal_error "you must specify an argument for -Xcompile" ;; target) func_fatal_error "you must specify a target with \`-o'" ;; *) # Get the name of the library object. test -z "$libobj" && { func_basename "$srcfile" libobj="$func_basename_result" } ;; esac # Recognize several different file suffixes. # If the user specifies -o file.o, it is replaced with file.lo case $libobj in *.[cCFSifmso] | \ *.ada | *.adb | *.ads | *.asm | \ *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) func_xform "$libobj" libobj=$func_xform_result ;; esac case $libobj in *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; *) func_fatal_error "cannot determine name of library object from \`$libobj'" ;; esac func_infer_tag $base_compile for arg in $later; do case $arg in -shared) test "$build_libtool_libs" != yes && \ func_fatal_configuration "can not build a shared library" build_old_libs=no continue ;; -static) build_libtool_libs=no build_old_libs=yes continue ;; -prefer-pic) pic_mode=yes continue ;; -prefer-non-pic) pic_mode=no continue ;; esac done func_quote_for_eval "$libobj" test "X$libobj" != "X$func_quote_for_eval_result" \ && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ && func_warning "libobj name \`$libobj' may not contain shell special characters." func_dirname_and_basename "$obj" "/" "" objname="$func_basename_result" xdir="$func_dirname_result" lobj=${xdir}$objdir/$objname test -z "$base_compile" && \ func_fatal_help "you must specify a compilation command" # Delete any leftover library objects. if test "$build_old_libs" = yes; then removelist="$obj $lobj $libobj ${libobj}T" else removelist="$lobj $libobj ${libobj}T" fi # On Cygwin there's no "real" PIC flag so we must build both object types case $host_os in cygwin* | mingw* | pw32* | os2* | cegcc*) pic_mode=default ;; esac if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then # non-PIC code in shared libraries is not supported pic_mode=default fi # Calculate the filename of the output object if compiler does # not support -o with -c if test "$compiler_c_o" = no; then output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext} lockfile="$output_obj.lock" else output_obj= need_locks=no lockfile= fi # Lock this critical section if it is needed # We use this script file to make the link, it avoids creating a new file if test "$need_locks" = yes; then until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do func_echo "Waiting for $lockfile to be removed" sleep 2 done elif test "$need_locks" = warn; then if test -f "$lockfile"; then $ECHO "\ *** ERROR, $lockfile exists and contains: `cat $lockfile 2>/dev/null` This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi func_append removelist " $output_obj" $ECHO "$srcfile" > "$lockfile" fi $opt_dry_run || $RM $removelist func_append removelist " $lockfile" trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 srcfile=$func_to_tool_file_result func_quote_for_eval "$srcfile" qsrcfile=$func_quote_for_eval_result # Only build a PIC object if we are building libtool libraries. if test "$build_libtool_libs" = yes; then # Without this assignment, base_compile gets emptied. fbsd_hideous_sh_bug=$base_compile if test "$pic_mode" != no; then command="$base_compile $qsrcfile $pic_flag" else # Don't build PIC code command="$base_compile $qsrcfile" fi func_mkdir_p "$xdir$objdir" if test -z "$output_obj"; then # Place PIC objects in $objdir func_append command " -o $lobj" fi func_show_eval_locale "$command" \ 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' if test "$need_locks" = warn && test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then $ECHO "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi # Just move the object if needed, then go on to compile the next one if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then func_show_eval '$MV "$output_obj" "$lobj"' \ 'error=$?; $opt_dry_run || $RM $removelist; exit $error' fi # Allow error messages only from the first compilation. if test "$suppress_opt" = yes; then suppress_output=' >/dev/null 2>&1' fi fi # Only build a position-dependent object if we build old libraries. if test "$build_old_libs" = yes; then if test "$pic_mode" != yes; then # Don't build PIC code command="$base_compile $qsrcfile$pie_flag" else command="$base_compile $qsrcfile $pic_flag" fi if test "$compiler_c_o" = yes; then func_append command " -o $obj" fi # Suppress compiler output if we already did a PIC compilation. func_append command "$suppress_output" func_show_eval_locale "$command" \ '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' if test "$need_locks" = warn && test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then $ECHO "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi # Just move the object if needed if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then func_show_eval '$MV "$output_obj" "$obj"' \ 'error=$?; $opt_dry_run || $RM $removelist; exit $error' fi fi $opt_dry_run || { func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" # Unlock the critical section if it was locked if test "$need_locks" != no; then removelist=$lockfile $RM "$lockfile" fi } exit $EXIT_SUCCESS } $opt_help || { test "$opt_mode" = compile && func_mode_compile ${1+"$@"} } func_mode_help () { # We need to display help for each of the modes. case $opt_mode in "") # Generic help is extracted from the usage comments # at the start of this file. func_help ;; clean) $ECHO \ "Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... Remove files from the build directory. RM is the name of the program to use to delete files associated with each FILE (typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed to RM. If FILE is a libtool library, object or program, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; compile) $ECHO \ "Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE Compile a source file into a libtool library object. This mode accepts the following additional options: -o OUTPUT-FILE set the output file name to OUTPUT-FILE -no-suppress do not suppress compiler output for multiple passes -prefer-pic try to build PIC objects only -prefer-non-pic try to build non-PIC objects only -shared do not build a \`.o' file suitable for static linking -static only build a \`.o' file suitable for static linking -Wc,FLAG pass FLAG directly to the compiler COMPILE-COMMAND is a command to be used in creating a \`standard' object file from the given SOURCEFILE. The output file name is determined by removing the directory component from SOURCEFILE, then substituting the C source code suffix \`.c' with the library object suffix, \`.lo'." ;; execute) $ECHO \ "Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... Automatically set library path, then run a program. This mode accepts the following additional options: -dlopen FILE add the directory containing FILE to the library path This mode sets the library path environment variable according to \`-dlopen' flags. If any of the ARGS are libtool executable wrappers, then they are translated into their corresponding uninstalled binary, and any of their required library directories are added to the library path. Then, COMMAND is executed, with ARGS as arguments." ;; finish) $ECHO \ "Usage: $progname [OPTION]... --mode=finish [LIBDIR]... Complete the installation of libtool libraries. Each LIBDIR is a directory that contains libtool libraries. The commands that this mode executes may require superuser privileges. Use the \`--dry-run' option if you just want to see what would be executed." ;; install) $ECHO \ "Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... Install executables or libraries. INSTALL-COMMAND is the installation command. The first component should be either the \`install' or \`cp' program. The following components of INSTALL-COMMAND are treated specially: -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation The rest of the components are interpreted as arguments to that command (only BSD-compatible install options are recognized)." ;; link) $ECHO \ "Usage: $progname [OPTION]... --mode=link LINK-COMMAND... Link object files or libraries together to form another library, or to create an executable program. LINK-COMMAND is a command using the C compiler that you would use to create a program from several object files. The following components of LINK-COMMAND are treated specially: -all-static do not do any dynamic linking at all -avoid-version do not add a version suffix if possible -bindir BINDIR specify path to binaries directory (for systems where libraries must be found in the PATH setting at runtime) -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) -export-symbols SYMFILE try to export only the symbols listed in SYMFILE -export-symbols-regex REGEX try to export only the symbols matching REGEX -LLIBDIR search LIBDIR for required installed libraries -lNAME OUTPUT-FILE requires the installed library libNAME -module build a library that can dlopened -no-fast-install disable the fast-install mode -no-install link a not-installable executable -no-undefined declare that a library does not refer to external symbols -o OUTPUT-FILE create OUTPUT-FILE from the specified objects -objectlist FILE Use a list of object files found in FILE to specify objects -precious-files-regex REGEX don't remove output files matching REGEX -release RELEASE specify package release information -rpath LIBDIR the created library will eventually be installed in LIBDIR -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries -shared only do dynamic linking of libtool libraries -shrext SUFFIX override the standard shared library file extension -static do not do any dynamic linking of uninstalled libtool libraries -static-libtool-libs do not do any dynamic linking of libtool libraries -version-info CURRENT[:REVISION[:AGE]] specify library version info [each variable defaults to 0] -weak LIBNAME declare that the target provides the LIBNAME interface -Wc,FLAG -Xcompiler FLAG pass linker-specific FLAG directly to the compiler -Wl,FLAG -Xlinker FLAG pass linker-specific FLAG directly to the linker -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) All other options (arguments beginning with \`-') are ignored. Every other argument is treated as a filename. Files ending in \`.la' are treated as uninstalled libtool libraries, other files are standard or library object files. If the OUTPUT-FILE ends in \`.la', then a libtool library is created, only library objects (\`.lo' files) may be specified, and \`-rpath' is required, except when creating a convenience library. If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created using \`ar' and \`ranlib', or on Windows using \`lib'. If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file is created, otherwise an executable program is created." ;; uninstall) $ECHO \ "Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... Remove libraries from an installation directory. RM is the name of the program to use to delete files associated with each FILE (typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed to RM. If FILE is a libtool library, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; *) func_fatal_help "invalid operation mode \`$opt_mode'" ;; esac echo $ECHO "Try \`$progname --help' for more information about other modes." } # Now that we've collected a possible --mode arg, show help if necessary if $opt_help; then if test "$opt_help" = :; then func_mode_help else { func_help noexit for opt_mode in compile link execute install finish uninstall clean; do func_mode_help done } | sed -n '1p; 2,$s/^Usage:/ or: /p' { func_help noexit for opt_mode in compile link execute install finish uninstall clean; do echo func_mode_help done } | sed '1d /^When reporting/,/^Report/{ H d } $x /information about other modes/d /more detailed .*MODE/d s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' fi exit $? fi # func_mode_execute arg... func_mode_execute () { $opt_debug # The first argument is the command name. cmd="$nonopt" test -z "$cmd" && \ func_fatal_help "you must specify a COMMAND" # Handle -dlopen flags immediately. for file in $opt_dlopen; do test -f "$file" \ || func_fatal_help "\`$file' is not a file" dir= case $file in *.la) func_resolve_sysroot "$file" file=$func_resolve_sysroot_result # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$file" \ || func_fatal_help "\`$lib' is not a valid libtool archive" # Read the libtool library. dlname= library_names= func_source "$file" # Skip this library if it cannot be dlopened. if test -z "$dlname"; then # Warn if it was a shared library. test -n "$library_names" && \ func_warning "\`$file' was not linked with \`-export-dynamic'" continue fi func_dirname "$file" "" "." dir="$func_dirname_result" if test -f "$dir/$objdir/$dlname"; then func_append dir "/$objdir" else if test ! -f "$dir/$dlname"; then func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" fi fi ;; *.lo) # Just add the directory containing the .lo file. func_dirname "$file" "" "." dir="$func_dirname_result" ;; *) func_warning "\`-dlopen' is ignored for non-libtool libraries and objects" continue ;; esac # Get the absolute pathname. absdir=`cd "$dir" && pwd` test -n "$absdir" && dir="$absdir" # Now add the directory to shlibpath_var. if eval "test -z \"\$$shlibpath_var\""; then eval "$shlibpath_var=\"\$dir\"" else eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" fi done # This variable tells wrapper scripts just to set shlibpath_var # rather than running their programs. libtool_execute_magic="$magic" # Check if any of the arguments is a wrapper script. args= for file do case $file in -* | *.la | *.lo ) ;; *) # Do a test to see if this is really a libtool program. if func_ltwrapper_script_p "$file"; then func_source "$file" # Transform arg to wrapped name. file="$progdir/$program" elif func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" func_source "$func_ltwrapper_scriptname_result" # Transform arg to wrapped name. file="$progdir/$program" fi ;; esac # Quote arguments (to preserve shell metacharacters). func_append_quoted args "$file" done if test "X$opt_dry_run" = Xfalse; then if test -n "$shlibpath_var"; then # Export the shlibpath_var. eval "export $shlibpath_var" fi # Restore saved environment variables for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test \"\${save_$lt_var+set}\" = set; then $lt_var=\$save_$lt_var; export $lt_var else $lt_unset $lt_var fi" done # Now prepare to actually exec the command. exec_cmd="\$cmd$args" else # Display what would be done. if test -n "$shlibpath_var"; then eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" echo "export $shlibpath_var" fi $ECHO "$cmd$args" exit $EXIT_SUCCESS fi } test "$opt_mode" = execute && func_mode_execute ${1+"$@"} # func_mode_finish arg... func_mode_finish () { $opt_debug libs= libdirs= admincmds= for opt in "$nonopt" ${1+"$@"} do if test -d "$opt"; then func_append libdirs " $opt" elif test -f "$opt"; then if func_lalib_unsafe_p "$opt"; then func_append libs " $opt" else func_warning "\`$opt' is not a valid libtool archive" fi else func_fatal_error "invalid argument \`$opt'" fi done if test -n "$libs"; then if test -n "$lt_sysroot"; then sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" else sysroot_cmd= fi # Remove sysroot references if $opt_dry_run; then for lib in $libs; do echo "removing references to $lt_sysroot and \`=' prefixes from $lib" done else tmpdir=`func_mktempdir` for lib in $libs; do sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ > $tmpdir/tmp-la mv -f $tmpdir/tmp-la $lib done ${RM}r "$tmpdir" fi fi if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then for libdir in $libdirs; do if test -n "$finish_cmds"; then # Do each command in the finish commands. func_execute_cmds "$finish_cmds" 'admincmds="$admincmds '"$cmd"'"' fi if test -n "$finish_eval"; then # Do the single finish_eval. eval cmds=\"$finish_eval\" $opt_dry_run || eval "$cmds" || func_append admincmds " $cmds" fi done fi # Exit here if they wanted silent mode. $opt_silent && exit $EXIT_SUCCESS if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then echo "----------------------------------------------------------------------" echo "Libraries have been installed in:" for libdir in $libdirs; do $ECHO " $libdir" done echo echo "If you ever happen to want to link against installed libraries" echo "in a given directory, LIBDIR, you must either use libtool, and" echo "specify the full pathname of the library, or use the \`-LLIBDIR'" echo "flag during linking and do at least one of the following:" if test -n "$shlibpath_var"; then echo " - add LIBDIR to the \`$shlibpath_var' environment variable" echo " during execution" fi if test -n "$runpath_var"; then echo " - add LIBDIR to the \`$runpath_var' environment variable" echo " during linking" fi if test -n "$hardcode_libdir_flag_spec"; then libdir=LIBDIR eval flag=\"$hardcode_libdir_flag_spec\" $ECHO " - use the \`$flag' linker flag" fi if test -n "$admincmds"; then $ECHO " - have your system administrator run these commands:$admincmds" fi if test -f /etc/ld.so.conf; then echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" fi echo echo "See any operating system documentation about shared libraries for" case $host in solaris2.[6789]|solaris2.1[0-9]) echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" echo "pages." ;; *) echo "more information, such as the ld(1) and ld.so(8) manual pages." ;; esac echo "----------------------------------------------------------------------" fi exit $EXIT_SUCCESS } test "$opt_mode" = finish && func_mode_finish ${1+"$@"} # func_mode_install arg... func_mode_install () { $opt_debug # There may be an optional sh(1) argument at the beginning of # install_prog (especially on Windows NT). if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || # Allow the use of GNU shtool's install command. case $nonopt in *shtool*) :;; *) false;; esac; then # Aesthetically quote it. func_quote_for_eval "$nonopt" install_prog="$func_quote_for_eval_result " arg=$1 shift else install_prog= arg=$nonopt fi # The real first argument should be the name of the installation program. # Aesthetically quote it. func_quote_for_eval "$arg" func_append install_prog "$func_quote_for_eval_result" install_shared_prog=$install_prog case " $install_prog " in *[\\\ /]cp\ *) install_cp=: ;; *) install_cp=false ;; esac # We need to accept at least all the BSD install flags. dest= files= opts= prev= install_type= isdir=no stripme= no_mode=: for arg do arg2= if test -n "$dest"; then func_append files " $dest" dest=$arg continue fi case $arg in -d) isdir=yes ;; -f) if $install_cp; then :; else prev=$arg fi ;; -g | -m | -o) prev=$arg ;; -s) stripme=" -s" continue ;; -*) ;; *) # If the previous option needed an argument, then skip it. if test -n "$prev"; then if test "x$prev" = x-m && test -n "$install_override_mode"; then arg2=$install_override_mode no_mode=false fi prev= else dest=$arg continue fi ;; esac # Aesthetically quote the argument. func_quote_for_eval "$arg" func_append install_prog " $func_quote_for_eval_result" if test -n "$arg2"; then func_quote_for_eval "$arg2" fi func_append install_shared_prog " $func_quote_for_eval_result" done test -z "$install_prog" && \ func_fatal_help "you must specify an install program" test -n "$prev" && \ func_fatal_help "the \`$prev' option requires an argument" if test -n "$install_override_mode" && $no_mode; then if $install_cp; then :; else func_quote_for_eval "$install_override_mode" func_append install_shared_prog " -m $func_quote_for_eval_result" fi fi if test -z "$files"; then if test -z "$dest"; then func_fatal_help "no file or destination specified" else func_fatal_help "you must specify a destination" fi fi # Strip any trailing slash from the destination. func_stripname '' '/' "$dest" dest=$func_stripname_result # Check to see that the destination is a directory. test -d "$dest" && isdir=yes if test "$isdir" = yes; then destdir="$dest" destname= else func_dirname_and_basename "$dest" "" "." destdir="$func_dirname_result" destname="$func_basename_result" # Not a directory, so check to see that there is only one file specified. set dummy $files; shift test "$#" -gt 1 && \ func_fatal_help "\`$dest' is not a directory" fi case $destdir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) for file in $files; do case $file in *.lo) ;; *) func_fatal_help "\`$destdir' must be an absolute directory name" ;; esac done ;; esac # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic="$magic" staticlibs= future_libdirs= current_libdirs= for file in $files; do # Do each installation. case $file in *.$libext) # Do the static libraries later. func_append staticlibs " $file" ;; *.la) func_resolve_sysroot "$file" file=$func_resolve_sysroot_result # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$file" \ || func_fatal_help "\`$file' is not a valid libtool archive" library_names= old_library= relink_command= func_source "$file" # Add the libdir to current_libdirs if it is the destination. if test "X$destdir" = "X$libdir"; then case "$current_libdirs " in *" $libdir "*) ;; *) func_append current_libdirs " $libdir" ;; esac else # Note the libdir as a future libdir. case "$future_libdirs " in *" $libdir "*) ;; *) func_append future_libdirs " $libdir" ;; esac fi func_dirname "$file" "/" "" dir="$func_dirname_result" func_append dir "$objdir" if test -n "$relink_command"; then # Determine the prefix the user has applied to our future dir. inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` # Don't allow the user to place us outside of our expected # location b/c this prevents finding dependent libraries that # are installed to the same prefix. # At present, this check doesn't affect windows .dll's that # are installed into $libdir/../bin (currently, that works fine) # but it's something to keep an eye on. test "$inst_prefix_dir" = "$destdir" && \ func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir" if test -n "$inst_prefix_dir"; then # Stick the inst_prefix_dir data into the link command. relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` else relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` fi func_warning "relinking \`$file'" func_show_eval "$relink_command" \ 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"' fi # See the names of the shared library. set dummy $library_names; shift if test -n "$1"; then realname="$1" shift srcname="$realname" test -n "$relink_command" && srcname="$realname"T # Install the shared library and build the symlinks. func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ 'exit $?' tstripme="$stripme" case $host_os in cygwin* | mingw* | pw32* | cegcc*) case $realname in *.dll.a) tstripme="" ;; esac ;; esac if test -n "$tstripme" && test -n "$striplib"; then func_show_eval "$striplib $destdir/$realname" 'exit $?' fi if test "$#" -gt 0; then # Delete the old symlinks, and create new ones. # Try `ln -sf' first, because the `ln' binary might depend on # the symlink we replace! Solaris /bin/ln does not understand -f, # so we also need to try rm && ln -s. for linkname do test "$linkname" != "$realname" \ && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" done fi # Do each command in the postinstall commands. lib="$destdir/$realname" func_execute_cmds "$postinstall_cmds" 'exit $?' fi # Install the pseudo-library for information purposes. func_basename "$file" name="$func_basename_result" instname="$dir/$name"i func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' # Maybe install the static library, too. test -n "$old_library" && func_append staticlibs " $dir/$old_library" ;; *.lo) # Install (i.e. copy) a libtool object. # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile="$destdir/$destname" else func_basename "$file" destfile="$func_basename_result" destfile="$destdir/$destfile" fi # Deduce the name of the destination old-style object file. case $destfile in *.lo) func_lo2o "$destfile" staticdest=$func_lo2o_result ;; *.$objext) staticdest="$destfile" destfile= ;; *) func_fatal_help "cannot copy a libtool object to \`$destfile'" ;; esac # Install the libtool object if requested. test -n "$destfile" && \ func_show_eval "$install_prog $file $destfile" 'exit $?' # Install the old object if enabled. if test "$build_old_libs" = yes; then # Deduce the name of the old-style object file. func_lo2o "$file" staticobj=$func_lo2o_result func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' fi exit $EXIT_SUCCESS ;; *) # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile="$destdir/$destname" else func_basename "$file" destfile="$func_basename_result" destfile="$destdir/$destfile" fi # If the file is missing, and there is a .exe on the end, strip it # because it is most likely a libtool script we actually want to # install stripped_ext="" case $file in *.exe) if test ! -f "$file"; then func_stripname '' '.exe' "$file" file=$func_stripname_result stripped_ext=".exe" fi ;; esac # Do a test to see if this is really a libtool program. case $host in *cygwin* | *mingw*) if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" wrapper=$func_ltwrapper_scriptname_result else func_stripname '' '.exe' "$file" wrapper=$func_stripname_result fi ;; *) wrapper=$file ;; esac if func_ltwrapper_script_p "$wrapper"; then notinst_deplibs= relink_command= func_source "$wrapper" # Check the variables that should have been set. test -z "$generated_by_libtool_version" && \ func_fatal_error "invalid libtool wrapper script \`$wrapper'" finalize=yes for lib in $notinst_deplibs; do # Check to see that each library is installed. libdir= if test -f "$lib"; then func_source "$lib" fi libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test if test -n "$libdir" && test ! -f "$libfile"; then func_warning "\`$lib' has not been installed in \`$libdir'" finalize=no fi done relink_command= func_source "$wrapper" outputname= if test "$fast_install" = no && test -n "$relink_command"; then $opt_dry_run || { if test "$finalize" = yes; then tmpdir=`func_mktempdir` func_basename "$file$stripped_ext" file="$func_basename_result" outputname="$tmpdir/$file" # Replace the output file specification. relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` $opt_silent || { func_quote_for_expand "$relink_command" eval "func_echo $func_quote_for_expand_result" } if eval "$relink_command"; then : else func_error "error: relink \`$file' with the above command before installing it" $opt_dry_run || ${RM}r "$tmpdir" continue fi file="$outputname" else func_warning "cannot relink \`$file'" fi } else # Install the binary that we compiled earlier. file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` fi fi # remove .exe since cygwin /usr/bin/install will append another # one anyway case $install_prog,$host in */usr/bin/install*,*cygwin*) case $file:$destfile in *.exe:*.exe) # this is ok ;; *.exe:*) destfile=$destfile.exe ;; *:*.exe) func_stripname '' '.exe' "$destfile" destfile=$func_stripname_result ;; esac ;; esac func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' $opt_dry_run || if test -n "$outputname"; then ${RM}r "$tmpdir" fi ;; esac done for file in $staticlibs; do func_basename "$file" name="$func_basename_result" # Set up the ranlib parameters. oldlib="$destdir/$name" func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 tool_oldlib=$func_to_tool_file_result func_show_eval "$install_prog \$file \$oldlib" 'exit $?' if test -n "$stripme" && test -n "$old_striplib"; then func_show_eval "$old_striplib $tool_oldlib" 'exit $?' fi # Do each command in the postinstall commands. func_execute_cmds "$old_postinstall_cmds" 'exit $?' done test -n "$future_libdirs" && \ func_warning "remember to run \`$progname --finish$future_libdirs'" if test -n "$current_libdirs"; then # Maybe just do a dry run. $opt_dry_run && current_libdirs=" -n$current_libdirs" exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs' else exit $EXIT_SUCCESS fi } test "$opt_mode" = install && func_mode_install ${1+"$@"} # func_generate_dlsyms outputname originator pic_p # Extract symbols from dlprefiles and create ${outputname}S.o with # a dlpreopen symbol table. func_generate_dlsyms () { $opt_debug my_outputname="$1" my_originator="$2" my_pic_p="${3-no}" my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'` my_dlsyms= if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then if test -n "$NM" && test -n "$global_symbol_pipe"; then my_dlsyms="${my_outputname}S.c" else func_error "not configured to extract global symbols from dlpreopened files" fi fi if test -n "$my_dlsyms"; then case $my_dlsyms in "") ;; *.c) # Discover the nlist of each of the dlfiles. nlist="$output_objdir/${my_outputname}.nm" func_show_eval "$RM $nlist ${nlist}S ${nlist}T" # Parse the name list into a source file. func_verbose "creating $output_objdir/$my_dlsyms" $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ /* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */ /* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */ #ifdef __cplusplus extern \"C\" { #endif #if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) #pragma GCC diagnostic ignored \"-Wstrict-prototypes\" #endif /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) /* DATA imports from DLLs on WIN32 con't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT_DLSYM_CONST #elif defined(__osf__) /* This system does not cope well with relocations in const data. */ # define LT_DLSYM_CONST #else # define LT_DLSYM_CONST const #endif /* External symbol declarations for the compiler. */\ " if test "$dlself" = yes; then func_verbose "generating symbol list for \`$output'" $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" # Add our own program objects to the symbol list. progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` for progfile in $progfiles; do func_to_tool_file "$progfile" func_convert_file_msys_to_w32 func_verbose "extracting global C symbols from \`$func_to_tool_file_result'" $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" done if test -n "$exclude_expsyms"; then $opt_dry_run || { eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' } fi if test -n "$export_symbols_regex"; then $opt_dry_run || { eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' } fi # Prepare the list of exported symbols if test -z "$export_symbols"; then export_symbols="$output_objdir/$outputname.exp" $opt_dry_run || { $RM $export_symbols eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' case $host in *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' ;; esac } else $opt_dry_run || { eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' case $host in *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' ;; esac } fi fi for dlprefile in $dlprefiles; do func_verbose "extracting global C symbols from \`$dlprefile'" func_basename "$dlprefile" name="$func_basename_result" case $host in *cygwin* | *mingw* | *cegcc* ) # if an import library, we need to obtain dlname if func_win32_import_lib_p "$dlprefile"; then func_tr_sh "$dlprefile" eval "curr_lafile=\$libfile_$func_tr_sh_result" dlprefile_dlbasename="" if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then # Use subshell, to avoid clobbering current variable values dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` if test -n "$dlprefile_dlname" ; then func_basename "$dlprefile_dlname" dlprefile_dlbasename="$func_basename_result" else # no lafile. user explicitly requested -dlpreopen . $sharedlib_from_linklib_cmd "$dlprefile" dlprefile_dlbasename=$sharedlib_from_linklib_result fi fi $opt_dry_run || { if test -n "$dlprefile_dlbasename" ; then eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' else func_warning "Could not compute DLL name from $name" eval '$ECHO ": $name " >> "$nlist"' fi func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" } else # not an import lib $opt_dry_run || { eval '$ECHO ": $name " >> "$nlist"' func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" } fi ;; *) $opt_dry_run || { eval '$ECHO ": $name " >> "$nlist"' func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" } ;; esac done $opt_dry_run || { # Make sure we have at least an empty file. test -f "$nlist" || : > "$nlist" if test -n "$exclude_expsyms"; then $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T $MV "$nlist"T "$nlist" fi # Try sorting and uniquifying the output. if $GREP -v "^: " < "$nlist" | if sort -k 3 /dev/null 2>&1; then sort -k 3 else sort +2 fi | uniq > "$nlist"S; then : else $GREP -v "^: " < "$nlist" > "$nlist"S fi if test -f "$nlist"S; then eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' else echo '/* NONE */' >> "$output_objdir/$my_dlsyms" fi echo >> "$output_objdir/$my_dlsyms" "\ /* The mapping between symbol names and symbols. */ typedef struct { const char *name; void *address; } lt_dlsymlist; extern LT_DLSYM_CONST lt_dlsymlist lt_${my_prefix}_LTX_preloaded_symbols[]; LT_DLSYM_CONST lt_dlsymlist lt_${my_prefix}_LTX_preloaded_symbols[] = {\ { \"$my_originator\", (void *) 0 }," case $need_lib_prefix in no) eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" ;; *) eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" ;; esac echo >> "$output_objdir/$my_dlsyms" "\ {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt_${my_prefix}_LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif\ " } # !$opt_dry_run pic_flag_for_symtable= case "$compile_command " in *" -static "*) ;; *) case $host in # compiling the symbol table file with pic_flag works around # a FreeBSD bug that causes programs to crash when -lm is # linked before any other PIC object. But we must not use # pic_flag when linking with -static. The problem exists in # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; *-*-hpux*) pic_flag_for_symtable=" $pic_flag" ;; *) if test "X$my_pic_p" != Xno; then pic_flag_for_symtable=" $pic_flag" fi ;; esac ;; esac symtab_cflags= for arg in $LTCFLAGS; do case $arg in -pie | -fpie | -fPIE) ;; *) func_append symtab_cflags " $arg" ;; esac done # Now compile the dynamic symbol file. func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' # Clean up the generated files. func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"' # Transform the symbol file into the correct name. symfileobj="$output_objdir/${my_outputname}S.$objext" case $host in *cygwin* | *mingw* | *cegcc* ) if test -f "$output_objdir/$my_outputname.def"; then compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` else compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` fi ;; *) compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` ;; esac ;; *) func_fatal_error "unknown suffix for \`$my_dlsyms'" ;; esac else # We keep going just in case the user didn't refer to # lt_preloaded_symbols. The linker will fail if global_symbol_pipe # really was required. # Nullify the symbol file. compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` fi } # func_win32_libid arg # return the library type of file 'arg' # # Need a lot of goo to handle *both* DLLs and import libs # Has to be a shell function in order to 'eat' the argument # that is supplied when $file_magic_command is called. # Despite the name, also deal with 64 bit binaries. func_win32_libid () { $opt_debug win32_libid_type="unknown" win32_fileres=`file -L $1 2>/dev/null` case $win32_fileres in *ar\ archive\ import\ library*) # definitely import win32_libid_type="x86 archive import" ;; *ar\ archive*) # could be an import, or static # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then func_to_tool_file "$1" func_convert_file_msys_to_w32 win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | $SED -n -e ' 1,100{ / I /{ s,.*,import, p q } }'` case $win32_nmres in import*) win32_libid_type="x86 archive import";; *) win32_libid_type="x86 archive static";; esac fi ;; *DLL*) win32_libid_type="x86 DLL" ;; *executable*) # but shell scripts are "executable" too... case $win32_fileres in *MS\ Windows\ PE\ Intel*) win32_libid_type="x86 DLL" ;; esac ;; esac $ECHO "$win32_libid_type" } # func_cygming_dll_for_implib ARG # # Platform-specific function to extract the # name of the DLL associated with the specified # import library ARG. # Invoked by eval'ing the libtool variable # $sharedlib_from_linklib_cmd # Result is available in the variable # $sharedlib_from_linklib_result func_cygming_dll_for_implib () { $opt_debug sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` } # func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs # # The is the core of a fallback implementation of a # platform-specific function to extract the name of the # DLL associated with the specified import library LIBNAME. # # SECTION_NAME is either .idata$6 or .idata$7, depending # on the platform and compiler that created the implib. # # Echos the name of the DLL associated with the # specified import library. func_cygming_dll_for_implib_fallback_core () { $opt_debug match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` $OBJDUMP -s --section "$1" "$2" 2>/dev/null | $SED '/^Contents of section '"$match_literal"':/{ # Place marker at beginning of archive member dllname section s/.*/====MARK====/ p d } # These lines can sometimes be longer than 43 characters, but # are always uninteresting /:[ ]*file format pe[i]\{,1\}-/d /^In archive [^:]*:/d # Ensure marker is printed /^====MARK====/p # Remove all lines with less than 43 characters /^.\{43\}/!d # From remaining lines, remove first 43 characters s/^.\{43\}//' | $SED -n ' # Join marker and all lines until next marker into a single line /^====MARK====/ b para H $ b para b :para x s/\n//g # Remove the marker s/^====MARK====// # Remove trailing dots and whitespace s/[\. \t]*$// # Print /./p' | # we now have a list, one entry per line, of the stringified # contents of the appropriate section of all members of the # archive which possess that section. Heuristic: eliminate # all those which have a first or second character that is # a '.' (that is, objdump's representation of an unprintable # character.) This should work for all archives with less than # 0x302f exports -- but will fail for DLLs whose name actually # begins with a literal '.' or a single character followed by # a '.'. # # Of those that remain, print the first one. $SED -e '/^\./d;/^.\./d;q' } # func_cygming_gnu_implib_p ARG # This predicate returns with zero status (TRUE) if # ARG is a GNU/binutils-style import library. Returns # with nonzero status (FALSE) otherwise. func_cygming_gnu_implib_p () { $opt_debug func_to_tool_file "$1" func_convert_file_msys_to_w32 func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` test -n "$func_cygming_gnu_implib_tmp" } # func_cygming_ms_implib_p ARG # This predicate returns with zero status (TRUE) if # ARG is an MS-style import library. Returns # with nonzero status (FALSE) otherwise. func_cygming_ms_implib_p () { $opt_debug func_to_tool_file "$1" func_convert_file_msys_to_w32 func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` test -n "$func_cygming_ms_implib_tmp" } # func_cygming_dll_for_implib_fallback ARG # Platform-specific function to extract the # name of the DLL associated with the specified # import library ARG. # # This fallback implementation is for use when $DLLTOOL # does not support the --identify-strict option. # Invoked by eval'ing the libtool variable # $sharedlib_from_linklib_cmd # Result is available in the variable # $sharedlib_from_linklib_result func_cygming_dll_for_implib_fallback () { $opt_debug if func_cygming_gnu_implib_p "$1" ; then # binutils import library sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` elif func_cygming_ms_implib_p "$1" ; then # ms-generated import library sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` else # unknown sharedlib_from_linklib_result="" fi } # func_extract_an_archive dir oldlib func_extract_an_archive () { $opt_debug f_ex_an_ar_dir="$1"; shift f_ex_an_ar_oldlib="$1" if test "$lock_old_archive_extraction" = yes; then lockfile=$f_ex_an_ar_oldlib.lock until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do func_echo "Waiting for $lockfile to be removed" sleep 2 done fi func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ 'stat=$?; rm -f "$lockfile"; exit $stat' if test "$lock_old_archive_extraction" = yes; then $opt_dry_run || rm -f "$lockfile" fi if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then : else func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" fi } # func_extract_archives gentop oldlib ... func_extract_archives () { $opt_debug my_gentop="$1"; shift my_oldlibs=${1+"$@"} my_oldobjs="" my_xlib="" my_xabs="" my_xdir="" for my_xlib in $my_oldlibs; do # Extract the objects. case $my_xlib in [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;; *) my_xabs=`pwd`"/$my_xlib" ;; esac func_basename "$my_xlib" my_xlib="$func_basename_result" my_xlib_u=$my_xlib while :; do case " $extracted_archives " in *" $my_xlib_u "*) func_arith $extracted_serial + 1 extracted_serial=$func_arith_result my_xlib_u=lt$extracted_serial-$my_xlib ;; *) break ;; esac done extracted_archives="$extracted_archives $my_xlib_u" my_xdir="$my_gentop/$my_xlib_u" func_mkdir_p "$my_xdir" case $host in *-darwin*) func_verbose "Extracting $my_xabs" # Do not bother doing anything if just a dry run $opt_dry_run || { darwin_orig_dir=`pwd` cd $my_xdir || exit $? darwin_archive=$my_xabs darwin_curdir=`pwd` darwin_base_archive=`basename "$darwin_archive"` darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` if test -n "$darwin_arches"; then darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` darwin_arch= func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" for darwin_arch in $darwin_arches ; do func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}" $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}" cd "unfat-$$/${darwin_base_archive}-${darwin_arch}" func_extract_an_archive "`pwd`" "${darwin_base_archive}" cd "$darwin_curdir" $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" done # $darwin_arches ## Okay now we've a bunch of thin objects, gotta fatten them up :) darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u` darwin_file= darwin_files= for darwin_file in $darwin_filelist; do darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` $LIPO -create -output "$darwin_file" $darwin_files done # $darwin_filelist $RM -rf unfat-$$ cd "$darwin_orig_dir" else cd $darwin_orig_dir func_extract_an_archive "$my_xdir" "$my_xabs" fi # $darwin_arches } # !$opt_dry_run ;; *) func_extract_an_archive "$my_xdir" "$my_xabs" ;; esac my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` done func_extract_archives_result="$my_oldobjs" } # func_emit_wrapper [arg=no] # # Emit a libtool wrapper script on stdout. # Don't directly open a file because we may want to # incorporate the script contents within a cygwin/mingw # wrapper executable. Must ONLY be called from within # func_mode_link because it depends on a number of variables # set therein. # # ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR # variable will take. If 'yes', then the emitted script # will assume that the directory in which it is stored is # the $objdir directory. This is a cygwin/mingw-specific # behavior. func_emit_wrapper () { func_emit_wrapper_arg1=${1-no} $ECHO "\ #! $SHELL # $output - temporary wrapper script for $objdir/$outputname # Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION # # The $output program cannot be directly executed until all the libtool # libraries that it depends on are installed. # # This wrapper script should never be moved out of the build directory. # If it is, it will not operate correctly. # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. sed_quote_subst='$sed_quote_subst' # Be Bourne compatible if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH relink_command=\"$relink_command\" # This environment variable determines our operation mode. if test \"\$libtool_install_magic\" = \"$magic\"; then # install mode needs the following variables: generated_by_libtool_version='$macro_version' notinst_deplibs='$notinst_deplibs' else # When we are sourced in execute mode, \$file and \$ECHO are already set. if test \"\$libtool_execute_magic\" != \"$magic\"; then file=\"\$0\"" qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` $ECHO "\ # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$1 _LTECHO_EOF' } ECHO=\"$qECHO\" fi # Very basic option parsing. These options are (a) specific to # the libtool wrapper, (b) are identical between the wrapper # /script/ and the wrapper /executable/ which is used only on # windows platforms, and (c) all begin with the string "--lt-" # (application programs are unlikely to have options which match # this pattern). # # There are only two supported options: --lt-debug and # --lt-dump-script. There is, deliberately, no --lt-help. # # The first argument to this parsing function should be the # script's $0 value, followed by "$@". lt_option_debug= func_parse_lt_options () { lt_script_arg0=\$0 shift for lt_opt do case \"\$lt_opt\" in --lt-debug) lt_option_debug=1 ;; --lt-dump-script) lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` cat \"\$lt_dump_D/\$lt_dump_F\" exit 0 ;; --lt-*) \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 exit 1 ;; esac done # Print the debug banner immediately: if test -n \"\$lt_option_debug\"; then echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2 fi } # Used when --lt-debug. Prints its arguments to stdout # (redirection is the responsibility of the caller) func_lt_dump_args () { lt_dump_args_N=1; for lt_arg do \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\" lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` done } # Core function for launching the target application func_exec_program_core () { " case $host in # Backslashes separate directories on plain windows *-*-mingw | *-*-os2* | *-cegcc*) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2 func_lt_dump_args \${1+\"\$@\"} 1>&2 fi exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} " ;; *) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2 func_lt_dump_args \${1+\"\$@\"} 1>&2 fi exec \"\$progdir/\$program\" \${1+\"\$@\"} " ;; esac $ECHO "\ \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 exit 1 } # A function to encapsulate launching the target application # Strips options in the --lt-* namespace from \$@ and # launches target application with the remaining arguments. func_exec_program () { case \" \$* \" in *\\ --lt-*) for lt_wr_arg do case \$lt_wr_arg in --lt-*) ;; *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; esac shift done ;; esac func_exec_program_core \${1+\"\$@\"} } # Parse options func_parse_lt_options \"\$0\" \${1+\"\$@\"} # Find the directory that this script lives in. thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` test \"x\$thisdir\" = \"x\$file\" && thisdir=. # Follow symbolic links until we get to the real thisdir. file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` while test -n \"\$file\"; do destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` # If there was a directory component, then change thisdir. if test \"x\$destdir\" != \"x\$file\"; then case \"\$destdir\" in [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; *) thisdir=\"\$thisdir/\$destdir\" ;; esac fi file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` done # Usually 'no', except on cygwin/mingw when embedded into # the cwrapper. WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then # special case for '.' if test \"\$thisdir\" = \".\"; then thisdir=\`pwd\` fi # remove .libs from thisdir case \"\$thisdir\" in *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; $objdir ) thisdir=. ;; esac fi # Try to get the absolute directory name. absdir=\`cd \"\$thisdir\" && pwd\` test -n \"\$absdir\" && thisdir=\"\$absdir\" " if test "$fast_install" = yes; then $ECHO "\ program=lt-'$outputname'$exeext progdir=\"\$thisdir/$objdir\" if test ! -f \"\$progdir/\$program\" || { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ test \"X\$file\" != \"X\$progdir/\$program\"; }; then file=\"\$\$-\$program\" if test ! -d \"\$progdir\"; then $MKDIR \"\$progdir\" else $RM \"\$progdir/\$file\" fi" $ECHO "\ # relink executable if necessary if test -n \"\$relink_command\"; then if relink_command_output=\`eval \$relink_command 2>&1\`; then : else $ECHO \"\$relink_command_output\" >&2 $RM \"\$progdir/\$file\" exit 1 fi fi $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || { $RM \"\$progdir/\$program\"; $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } $RM \"\$progdir/\$file\" fi" else $ECHO "\ program='$outputname' progdir=\"\$thisdir/$objdir\" " fi $ECHO "\ if test -f \"\$progdir/\$program\"; then" # fixup the dll searchpath if we need to. # # Fix the DLL searchpath if we need to. Do this before prepending # to shlibpath, because on Windows, both are PATH and uninstalled # libraries must come first. if test -n "$dllsearchpath"; then $ECHO "\ # Add the dll search path components to the executable PATH PATH=$dllsearchpath:\$PATH " fi # Export our shlibpath_var if we have one. if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then $ECHO "\ # Add our own library path to $shlibpath_var $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" # Some systems cannot cope with colon-terminated $shlibpath_var # The second colon is a workaround for a bug in BeOS R4 sed $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` export $shlibpath_var " fi $ECHO "\ if test \"\$libtool_execute_magic\" != \"$magic\"; then # Run the actual program with our arguments. func_exec_program \${1+\"\$@\"} fi else # The program doesn't exist. \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2 \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 exit 1 fi fi\ " } # func_emit_cwrapperexe_src # emit the source code for a wrapper executable on stdout # Must ONLY be called from within func_mode_link because # it depends on a number of variable set therein. func_emit_cwrapperexe_src () { cat < #include #ifdef _MSC_VER # include # include # include #else # include # include # ifdef __CYGWIN__ # include # endif #endif #include #include #include #include #include #include #include #include /* declarations of non-ANSI functions */ #if defined(__MINGW32__) # ifdef __STRICT_ANSI__ int _putenv (const char *); # endif #elif defined(__CYGWIN__) # ifdef __STRICT_ANSI__ char *realpath (const char *, char *); int putenv (char *); int setenv (const char *, const char *, int); # endif /* #elif defined (other platforms) ... */ #endif /* portability defines, excluding path handling macros */ #if defined(_MSC_VER) # define setmode _setmode # define stat _stat # define chmod _chmod # define getcwd _getcwd # define putenv _putenv # define S_IXUSR _S_IEXEC # ifndef _INTPTR_T_DEFINED # define _INTPTR_T_DEFINED # define intptr_t int # endif #elif defined(__MINGW32__) # define setmode _setmode # define stat _stat # define chmod _chmod # define getcwd _getcwd # define putenv _putenv #elif defined(__CYGWIN__) # define HAVE_SETENV # define FOPEN_WB "wb" /* #elif defined (other platforms) ... */ #endif #if defined(PATH_MAX) # define LT_PATHMAX PATH_MAX #elif defined(MAXPATHLEN) # define LT_PATHMAX MAXPATHLEN #else # define LT_PATHMAX 1024 #endif #ifndef S_IXOTH # define S_IXOTH 0 #endif #ifndef S_IXGRP # define S_IXGRP 0 #endif /* path handling portability macros */ #ifndef DIR_SEPARATOR # define DIR_SEPARATOR '/' # define PATH_SEPARATOR ':' #endif #if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \ defined (__OS2__) # define HAVE_DOS_BASED_FILE_SYSTEM # define FOPEN_WB "wb" # ifndef DIR_SEPARATOR_2 # define DIR_SEPARATOR_2 '\\' # endif # ifndef PATH_SEPARATOR_2 # define PATH_SEPARATOR_2 ';' # endif #endif #ifndef DIR_SEPARATOR_2 # define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) #else /* DIR_SEPARATOR_2 */ # define IS_DIR_SEPARATOR(ch) \ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) #endif /* DIR_SEPARATOR_2 */ #ifndef PATH_SEPARATOR_2 # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) #else /* PATH_SEPARATOR_2 */ # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) #endif /* PATH_SEPARATOR_2 */ #ifndef FOPEN_WB # define FOPEN_WB "w" #endif #ifndef _O_BINARY # define _O_BINARY 0 #endif #define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) #define XFREE(stale) do { \ if (stale) { free ((void *) stale); stale = 0; } \ } while (0) #if defined(LT_DEBUGWRAPPER) static int lt_debug = 1; #else static int lt_debug = 0; #endif const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ void *xmalloc (size_t num); char *xstrdup (const char *string); const char *base_name (const char *name); char *find_executable (const char *wrapper); char *chase_symlinks (const char *pathspec); int make_executable (const char *path); int check_executable (const char *path); char *strendzap (char *str, const char *pat); void lt_debugprintf (const char *file, int line, const char *fmt, ...); void lt_fatal (const char *file, int line, const char *message, ...); static const char *nonnull (const char *s); static const char *nonempty (const char *s); void lt_setenv (const char *name, const char *value); char *lt_extend_str (const char *orig_value, const char *add, int to_end); void lt_update_exe_path (const char *name, const char *value); void lt_update_lib_path (const char *name, const char *value); char **prepare_spawn (char **argv); void lt_dump_script (FILE *f); EOF cat <= 0) && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) return 1; else return 0; } int make_executable (const char *path) { int rval = 0; struct stat st; lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", nonempty (path)); if ((!path) || (!*path)) return 0; if (stat (path, &st) >= 0) { rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); } return rval; } /* Searches for the full path of the wrapper. Returns newly allocated full path name if found, NULL otherwise Does not chase symlinks, even on platforms that support them. */ char * find_executable (const char *wrapper) { int has_slash = 0; const char *p; const char *p_next; /* static buffer for getcwd */ char tmp[LT_PATHMAX + 1]; int tmp_len; char *concat_name; lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", nonempty (wrapper)); if ((wrapper == NULL) || (*wrapper == '\0')) return NULL; /* Absolute path? */ #if defined (HAVE_DOS_BASED_FILE_SYSTEM) if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') { concat_name = xstrdup (wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } else { #endif if (IS_DIR_SEPARATOR (wrapper[0])) { concat_name = xstrdup (wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } #if defined (HAVE_DOS_BASED_FILE_SYSTEM) } #endif for (p = wrapper; *p; p++) if (*p == '/') { has_slash = 1; break; } if (!has_slash) { /* no slashes; search PATH */ const char *path = getenv ("PATH"); if (path != NULL) { for (p = path; *p; p = p_next) { const char *q; size_t p_len; for (q = p; *q; q++) if (IS_PATH_SEPARATOR (*q)) break; p_len = q - p; p_next = (*q == '\0' ? q : q + 1); if (p_len == 0) { /* empty path: current directory */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", nonnull (strerror (errno))); tmp_len = strlen (tmp); concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); } else { concat_name = XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, p, p_len); concat_name[p_len] = '/'; strcpy (concat_name + p_len + 1, wrapper); } if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } } /* not found in PATH; assume curdir */ } /* Relative path | not found in path: prepend cwd */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", nonnull (strerror (errno))); tmp_len = strlen (tmp); concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); return NULL; } char * chase_symlinks (const char *pathspec) { #ifndef S_ISLNK return xstrdup (pathspec); #else char buf[LT_PATHMAX]; struct stat s; char *tmp_pathspec = xstrdup (pathspec); char *p; int has_symlinks = 0; while (strlen (tmp_pathspec) && !has_symlinks) { lt_debugprintf (__FILE__, __LINE__, "checking path component for symlinks: %s\n", tmp_pathspec); if (lstat (tmp_pathspec, &s) == 0) { if (S_ISLNK (s.st_mode) != 0) { has_symlinks = 1; break; } /* search backwards for last DIR_SEPARATOR */ p = tmp_pathspec + strlen (tmp_pathspec) - 1; while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) p--; if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) { /* no more DIR_SEPARATORS left */ break; } *p = '\0'; } else { lt_fatal (__FILE__, __LINE__, "error accessing file \"%s\": %s", tmp_pathspec, nonnull (strerror (errno))); } } XFREE (tmp_pathspec); if (!has_symlinks) { return xstrdup (pathspec); } tmp_pathspec = realpath (pathspec, buf); if (tmp_pathspec == 0) { lt_fatal (__FILE__, __LINE__, "could not follow symlinks for %s", pathspec); } return xstrdup (tmp_pathspec); #endif } char * strendzap (char *str, const char *pat) { size_t len, patlen; assert (str != NULL); assert (pat != NULL); len = strlen (str); patlen = strlen (pat); if (patlen <= len) { str += len - patlen; if (strcmp (str, pat) == 0) *str = '\0'; } return str; } void lt_debugprintf (const char *file, int line, const char *fmt, ...) { va_list args; if (lt_debug) { (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); va_start (args, fmt); (void) vfprintf (stderr, fmt, args); va_end (args); } } static void lt_error_core (int exit_status, const char *file, int line, const char *mode, const char *message, va_list ap) { fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); vfprintf (stderr, message, ap); fprintf (stderr, ".\n"); if (exit_status >= 0) exit (exit_status); } void lt_fatal (const char *file, int line, const char *message, ...) { va_list ap; va_start (ap, message); lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); va_end (ap); } static const char * nonnull (const char *s) { return s ? s : "(null)"; } static const char * nonempty (const char *s) { return (s && !*s) ? "(empty)" : nonnull (s); } void lt_setenv (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_setenv) setting '%s' to '%s'\n", nonnull (name), nonnull (value)); { #ifdef HAVE_SETENV /* always make a copy, for consistency with !HAVE_SETENV */ char *str = xstrdup (value); setenv (name, str, 1); #else int len = strlen (name) + 1 + strlen (value) + 1; char *str = XMALLOC (char, len); sprintf (str, "%s=%s", name, value); if (putenv (str) != EXIT_SUCCESS) { XFREE (str); } #endif } } char * lt_extend_str (const char *orig_value, const char *add, int to_end) { char *new_value; if (orig_value && *orig_value) { int orig_value_len = strlen (orig_value); int add_len = strlen (add); new_value = XMALLOC (char, add_len + orig_value_len + 1); if (to_end) { strcpy (new_value, orig_value); strcpy (new_value + orig_value_len, add); } else { strcpy (new_value, add); strcpy (new_value + add_len, orig_value); } } else { new_value = xstrdup (add); } return new_value; } void lt_update_exe_path (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", nonnull (name), nonnull (value)); if (name && *name && value && *value) { char *new_value = lt_extend_str (getenv (name), value, 0); /* some systems can't cope with a ':'-terminated path #' */ int len = strlen (new_value); while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1])) { new_value[len-1] = '\0'; } lt_setenv (name, new_value); XFREE (new_value); } } void lt_update_lib_path (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", nonnull (name), nonnull (value)); if (name && *name && value && *value) { char *new_value = lt_extend_str (getenv (name), value, 0); lt_setenv (name, new_value); XFREE (new_value); } } EOF case $host_os in mingw*) cat <<"EOF" /* Prepares an argument vector before calling spawn(). Note that spawn() does not by itself call the command interpreter (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); GetVersionEx(&v); v.dwPlatformId == VER_PLATFORM_WIN32_NT; }) ? "cmd.exe" : "command.com"). Instead it simply concatenates the arguments, separated by ' ', and calls CreateProcess(). We must quote the arguments since Win32 CreateProcess() interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a special way: - Space and tab are interpreted as delimiters. They are not treated as delimiters if they are surrounded by double quotes: "...". - Unescaped double quotes are removed from the input. Their only effect is that within double quotes, space and tab are treated like normal characters. - Backslashes not followed by double quotes are not special. - But 2*n+1 backslashes followed by a double quote become n backslashes followed by a double quote (n >= 0): \" -> " \\\" -> \" \\\\\" -> \\" */ #define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" #define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" char ** prepare_spawn (char **argv) { size_t argc; char **new_argv; size_t i; /* Count number of arguments. */ for (argc = 0; argv[argc] != NULL; argc++) ; /* Allocate new argument vector. */ new_argv = XMALLOC (char *, argc + 1); /* Put quoted arguments into the new argument vector. */ for (i = 0; i < argc; i++) { const char *string = argv[i]; if (string[0] == '\0') new_argv[i] = xstrdup ("\"\""); else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) { int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); size_t length; unsigned int backslashes; const char *s; char *quoted_string; char *p; length = 0; backslashes = 0; if (quote_around) length++; for (s = string; *s != '\0'; s++) { char c = *s; if (c == '"') length += backslashes + 1; length++; if (c == '\\') backslashes++; else backslashes = 0; } if (quote_around) length += backslashes + 1; quoted_string = XMALLOC (char, length + 1); p = quoted_string; backslashes = 0; if (quote_around) *p++ = '"'; for (s = string; *s != '\0'; s++) { char c = *s; if (c == '"') { unsigned int j; for (j = backslashes + 1; j > 0; j--) *p++ = '\\'; } *p++ = c; if (c == '\\') backslashes++; else backslashes = 0; } if (quote_around) { unsigned int j; for (j = backslashes; j > 0; j--) *p++ = '\\'; *p++ = '"'; } *p = '\0'; new_argv[i] = quoted_string; } else new_argv[i] = (char *) string; } new_argv[argc] = NULL; return new_argv; } EOF ;; esac cat <<"EOF" void lt_dump_script (FILE* f) { EOF func_emit_wrapper yes | $SED -n -e ' s/^\(.\{79\}\)\(..*\)/\1\ \2/ h s/\([\\"]\)/\\\1/g s/$/\\n/ s/\([^\n]*\).*/ fputs ("\1", f);/p g D' cat <<"EOF" } EOF } # end: func_emit_cwrapperexe_src # func_win32_import_lib_p ARG # True if ARG is an import lib, as indicated by $file_magic_cmd func_win32_import_lib_p () { $opt_debug case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in *import*) : ;; *) false ;; esac } # func_mode_link arg... func_mode_link () { $opt_debug case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) # It is impossible to link a dll without this setting, and # we shouldn't force the makefile maintainer to figure out # which system we are compiling for in order to pass an extra # flag for every libtool invocation. # allow_undefined=no # FIXME: Unfortunately, there are problems with the above when trying # to make a dll which has undefined symbols, in which case not # even a static library is built. For now, we need to specify # -no-undefined on the libtool link line when we can be certain # that all symbols are satisfied, otherwise we get a static library. allow_undefined=yes ;; *) allow_undefined=yes ;; esac libtool_args=$nonopt base_compile="$nonopt $@" compile_command=$nonopt finalize_command=$nonopt compile_rpath= finalize_rpath= compile_shlibpath= finalize_shlibpath= convenience= old_convenience= deplibs= old_deplibs= compiler_flags= linker_flags= dllsearchpath= lib_search_path=`pwd` inst_prefix_dir= new_inherited_linker_flags= avoid_version=no bindir= dlfiles= dlprefiles= dlself=no export_dynamic=no export_symbols= export_symbols_regex= generated= libobjs= ltlibs= module=no no_install=no objs= non_pic_objects= precious_files_regex= prefer_static_libs=no preload=no prev= prevarg= release= rpath= xrpath= perm_rpath= temp_rpath= thread_safe=no vinfo= vinfo_number=no weak_libs= single_module="${wl}-single_module" func_infer_tag $base_compile # We need to know -static, to get the right output filenames. for arg do case $arg in -shared) test "$build_libtool_libs" != yes && \ func_fatal_configuration "can not build a shared library" build_old_libs=no break ;; -all-static | -static | -static-libtool-libs) case $arg in -all-static) if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then func_warning "complete static linking is impossible in this configuration" fi if test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; -static) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=built ;; -static-libtool-libs) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; esac build_libtool_libs=no build_old_libs=yes break ;; esac done # See if our shared archives depend on static archives. test -n "$old_archive_from_new_cmds" && build_old_libs=yes # Go through the arguments, transforming them on the way. while test "$#" -gt 0; do arg="$1" shift func_quote_for_eval "$arg" qarg=$func_quote_for_eval_unquoted_result func_append libtool_args " $func_quote_for_eval_result" # If the previous option needs an argument, assign it. if test -n "$prev"; then case $prev in output) func_append compile_command " @OUTPUT@" func_append finalize_command " @OUTPUT@" ;; esac case $prev in bindir) bindir="$arg" prev= continue ;; dlfiles|dlprefiles) if test "$preload" = no; then # Add the symbol object into the linking commands. func_append compile_command " @SYMFILE@" func_append finalize_command " @SYMFILE@" preload=yes fi case $arg in *.la | *.lo) ;; # We handle these cases below. force) if test "$dlself" = no; then dlself=needless export_dynamic=yes fi prev= continue ;; self) if test "$prev" = dlprefiles; then dlself=yes elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then dlself=yes else dlself=needless export_dynamic=yes fi prev= continue ;; *) if test "$prev" = dlfiles; then func_append dlfiles " $arg" else func_append dlprefiles " $arg" fi prev= continue ;; esac ;; expsyms) export_symbols="$arg" test -f "$arg" \ || func_fatal_error "symbol file \`$arg' does not exist" prev= continue ;; expsyms_regex) export_symbols_regex="$arg" prev= continue ;; framework) case $host in *-*-darwin*) case "$deplibs " in *" $qarg.ltframework "*) ;; *) func_append deplibs " $qarg.ltframework" # this is fixed later ;; esac ;; esac prev= continue ;; inst_prefix) inst_prefix_dir="$arg" prev= continue ;; objectlist) if test -f "$arg"; then save_arg=$arg moreargs= for fil in `cat "$save_arg"` do # func_append moreargs " $fil" arg=$fil # A libtool-controlled object. # Check to see that this really is a libtool object. if func_lalib_unsafe_p "$arg"; then pic_object= non_pic_object= # Read the .lo file func_source "$arg" if test -z "$pic_object" || test -z "$non_pic_object" || test "$pic_object" = none && test "$non_pic_object" = none; then func_fatal_error "cannot find name of object for \`$arg'" fi # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" if test "$pic_object" != none; then # Prepend the subdirectory the object is found in. pic_object="$xdir$pic_object" if test "$prev" = dlfiles; then if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then func_append dlfiles " $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test "$prev" = dlprefiles; then # Preload the old-style object. func_append dlprefiles " $pic_object" prev= fi # A PIC object. func_append libobjs " $pic_object" arg="$pic_object" fi # Non-PIC object. if test "$non_pic_object" != none; then # Prepend the subdirectory the object is found in. non_pic_object="$xdir$non_pic_object" # A standard non-PIC object func_append non_pic_objects " $non_pic_object" if test -z "$pic_object" || test "$pic_object" = none ; then arg="$non_pic_object" fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object="$pic_object" func_append non_pic_objects " $non_pic_object" fi else # Only an error if not doing a dry-run. if $opt_dry_run; then # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" func_lo2o "$arg" pic_object=$xdir$objdir/$func_lo2o_result non_pic_object=$xdir$func_lo2o_result func_append libobjs " $pic_object" func_append non_pic_objects " $non_pic_object" else func_fatal_error "\`$arg' is not a valid libtool object" fi fi done else func_fatal_error "link input file \`$arg' does not exist" fi arg=$save_arg prev= continue ;; precious_regex) precious_files_regex="$arg" prev= continue ;; release) release="-$arg" prev= continue ;; rpath | xrpath) # We need an absolute path. case $arg in [\\/]* | [A-Za-z]:[\\/]*) ;; *) func_fatal_error "only absolute run-paths are allowed" ;; esac if test "$prev" = rpath; then case "$rpath " in *" $arg "*) ;; *) func_append rpath " $arg" ;; esac else case "$xrpath " in *" $arg "*) ;; *) func_append xrpath " $arg" ;; esac fi prev= continue ;; shrext) shrext_cmds="$arg" prev= continue ;; weak) func_append weak_libs " $arg" prev= continue ;; xcclinker) func_append linker_flags " $qarg" func_append compiler_flags " $qarg" prev= func_append compile_command " $qarg" func_append finalize_command " $qarg" continue ;; xcompiler) func_append compiler_flags " $qarg" prev= func_append compile_command " $qarg" func_append finalize_command " $qarg" continue ;; xlinker) func_append linker_flags " $qarg" func_append compiler_flags " $wl$qarg" prev= func_append compile_command " $wl$qarg" func_append finalize_command " $wl$qarg" continue ;; *) eval "$prev=\"\$arg\"" prev= continue ;; esac fi # test -n "$prev" prevarg="$arg" case $arg in -all-static) if test -n "$link_static_flag"; then # See comment for -static flag below, for more details. func_append compile_command " $link_static_flag" func_append finalize_command " $link_static_flag" fi continue ;; -allow-undefined) # FIXME: remove this flag sometime in the future. func_fatal_error "\`-allow-undefined' must not be used because it is the default" ;; -avoid-version) avoid_version=yes continue ;; -bindir) prev=bindir continue ;; -dlopen) prev=dlfiles continue ;; -dlpreopen) prev=dlprefiles continue ;; -export-dynamic) export_dynamic=yes continue ;; -export-symbols | -export-symbols-regex) if test -n "$export_symbols" || test -n "$export_symbols_regex"; then func_fatal_error "more than one -exported-symbols argument is not allowed" fi if test "X$arg" = "X-export-symbols"; then prev=expsyms else prev=expsyms_regex fi continue ;; -framework) prev=framework continue ;; -inst-prefix-dir) prev=inst_prefix continue ;; # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* # so, if we see these flags be careful not to treat them like -L -L[A-Z][A-Z]*:*) case $with_gcc/$host in no/*-*-irix* | /*-*-irix*) func_append compile_command " $arg" func_append finalize_command " $arg" ;; esac continue ;; -L*) func_stripname "-L" '' "$arg" if test -z "$func_stripname_result"; then if test "$#" -gt 0; then func_fatal_error "require no space between \`-L' and \`$1'" else func_fatal_error "need path for \`-L' option" fi fi func_resolve_sysroot "$func_stripname_result" dir=$func_resolve_sysroot_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) absdir=`cd "$dir" && pwd` test -z "$absdir" && \ func_fatal_error "cannot determine absolute directory name of \`$dir'" dir="$absdir" ;; esac case "$deplibs " in *" -L$dir "* | *" $arg "*) # Will only happen for absolute or sysroot arguments ;; *) # Preserve sysroot, but never include relative directories case $dir in [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; *) func_append deplibs " -L$dir" ;; esac func_append lib_search_path " $dir" ;; esac case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` case :$dllsearchpath: in *":$dir:"*) ;; ::) dllsearchpath=$dir;; *) func_append dllsearchpath ":$dir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; ::) dllsearchpath=$testbindir;; *) func_append dllsearchpath ":$testbindir";; esac ;; esac continue ;; -l*) if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) # These systems don't actually have a C or math library (as such) continue ;; *-*-os2*) # These systems don't actually have a C library (as such) test "X$arg" = "X-lc" && continue ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc due to us having libc/libc_r. test "X$arg" = "X-lc" && continue ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C and math libraries are in the System framework func_append deplibs " System.ltframework" continue ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype test "X$arg" = "X-lc" && continue ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work test "X$arg" = "X-lc" && continue ;; esac elif test "X$arg" = "X-lc_r"; then case $host in *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc_r directly, use -pthread flag. continue ;; esac fi func_append deplibs " $arg" continue ;; -module) module=yes continue ;; # Tru64 UNIX uses -model [arg] to determine the layout of C++ # classes, name mangling, and exception handling. # Darwin uses the -arch flag to determine output architecture. -model|-arch|-isysroot|--sysroot) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" prev=xcompiler continue ;; -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" case "$new_inherited_linker_flags " in *" $arg "*) ;; * ) func_append new_inherited_linker_flags " $arg" ;; esac continue ;; -multi_module) single_module="${wl}-multi_module" continue ;; -no-fast-install) fast_install=no continue ;; -no-install) case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) # The PATH hackery in wrapper scripts is required on Windows # and Darwin in order for the loader to find any dlls it needs. func_warning "\`-no-install' is ignored for $host" func_warning "assuming \`-no-fast-install' instead" fast_install=no ;; *) no_install=yes ;; esac continue ;; -no-undefined) allow_undefined=no continue ;; -objectlist) prev=objectlist continue ;; -o) prev=output ;; -precious-files-regex) prev=precious_regex continue ;; -release) prev=release continue ;; -rpath) prev=rpath continue ;; -R) prev=xrpath continue ;; -R*) func_stripname '-R' '' "$arg" dir=$func_stripname_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; =*) func_stripname '=' '' "$dir" dir=$lt_sysroot$func_stripname_result ;; *) func_fatal_error "only absolute run-paths are allowed" ;; esac case "$xrpath " in *" $dir "*) ;; *) func_append xrpath " $dir" ;; esac continue ;; -shared) # The effects of -shared are defined in a previous loop. continue ;; -shrext) prev=shrext continue ;; -static | -static-libtool-libs) # The effects of -static are defined in a previous loop. # We used to do the same as -all-static on platforms that # didn't have a PIC flag, but the assumption that the effects # would be equivalent was wrong. It would break on at least # Digital Unix and AIX. continue ;; -thread-safe) thread_safe=yes continue ;; -version-info) prev=vinfo continue ;; -version-number) prev=vinfo vinfo_number=yes continue ;; -weak) prev=weak continue ;; -Wc,*) func_stripname '-Wc,' '' "$arg" args=$func_stripname_result arg= save_ifs="$IFS"; IFS=',' for flag in $args; do IFS="$save_ifs" func_quote_for_eval "$flag" func_append arg " $func_quote_for_eval_result" func_append compiler_flags " $func_quote_for_eval_result" done IFS="$save_ifs" func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; -Wl,*) func_stripname '-Wl,' '' "$arg" args=$func_stripname_result arg= save_ifs="$IFS"; IFS=',' for flag in $args; do IFS="$save_ifs" func_quote_for_eval "$flag" func_append arg " $wl$func_quote_for_eval_result" func_append compiler_flags " $wl$func_quote_for_eval_result" func_append linker_flags " $func_quote_for_eval_result" done IFS="$save_ifs" func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; -Xcompiler) prev=xcompiler continue ;; -Xlinker) prev=xlinker continue ;; -XCClinker) prev=xcclinker continue ;; # -msg_* for osf cc -msg_*) func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" ;; # Flags to be passed through unchanged, with rationale: # -64, -mips[0-9] enable 64-bit mode for the SGI compiler # -r[0-9][0-9]* specify processor for the SGI compiler # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler # +DA*, +DD* enable 64-bit mode for the HP compiler # -q* compiler args for the IBM compiler # -m*, -t[45]*, -txscale* architecture-specific flags for GCC # -F/path path to uninstalled frameworks, gcc on darwin # -p, -pg, --coverage, -fprofile-* profiling flags for GCC # @file GCC response files # -tp=* Portland pgcc target processor selection # --sysroot=* for sysroot support # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ -O*|-flto*|-fwhopr*|-fuse-linker-plugin) func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" func_append compile_command " $arg" func_append finalize_command " $arg" func_append compiler_flags " $arg" continue ;; # Some other compiler flag. -* | +*) func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" ;; *.$objext) # A standard object. func_append objs " $arg" ;; *.lo) # A libtool-controlled object. # Check to see that this really is a libtool object. if func_lalib_unsafe_p "$arg"; then pic_object= non_pic_object= # Read the .lo file func_source "$arg" if test -z "$pic_object" || test -z "$non_pic_object" || test "$pic_object" = none && test "$non_pic_object" = none; then func_fatal_error "cannot find name of object for \`$arg'" fi # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" if test "$pic_object" != none; then # Prepend the subdirectory the object is found in. pic_object="$xdir$pic_object" if test "$prev" = dlfiles; then if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then func_append dlfiles " $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test "$prev" = dlprefiles; then # Preload the old-style object. func_append dlprefiles " $pic_object" prev= fi # A PIC object. func_append libobjs " $pic_object" arg="$pic_object" fi # Non-PIC object. if test "$non_pic_object" != none; then # Prepend the subdirectory the object is found in. non_pic_object="$xdir$non_pic_object" # A standard non-PIC object func_append non_pic_objects " $non_pic_object" if test -z "$pic_object" || test "$pic_object" = none ; then arg="$non_pic_object" fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object="$pic_object" func_append non_pic_objects " $non_pic_object" fi else # Only an error if not doing a dry-run. if $opt_dry_run; then # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" func_lo2o "$arg" pic_object=$xdir$objdir/$func_lo2o_result non_pic_object=$xdir$func_lo2o_result func_append libobjs " $pic_object" func_append non_pic_objects " $non_pic_object" else func_fatal_error "\`$arg' is not a valid libtool object" fi fi ;; *.$libext) # An archive. func_append deplibs " $arg" func_append old_deplibs " $arg" continue ;; *.la) # A libtool-controlled library. func_resolve_sysroot "$arg" if test "$prev" = dlfiles; then # This library was specified with -dlopen. func_append dlfiles " $func_resolve_sysroot_result" prev= elif test "$prev" = dlprefiles; then # The library was specified with -dlpreopen. func_append dlprefiles " $func_resolve_sysroot_result" prev= else func_append deplibs " $func_resolve_sysroot_result" fi continue ;; # Some other compiler argument. *) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" ;; esac # arg # Now actually substitute the argument into the commands. if test -n "$arg"; then func_append compile_command " $arg" func_append finalize_command " $arg" fi done # argument parsing loop test -n "$prev" && \ func_fatal_help "the \`$prevarg' option requires an argument" if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then eval arg=\"$export_dynamic_flag_spec\" func_append compile_command " $arg" func_append finalize_command " $arg" fi oldlibs= # calculate the name of the file, without its directory func_basename "$output" outputname="$func_basename_result" libobjs_save="$libobjs" if test -n "$shlibpath_var"; then # get the directories listed in $shlibpath_var eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\` else shlib_search_path= fi eval sys_lib_search_path=\"$sys_lib_search_path_spec\" eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" func_dirname "$output" "/" "" output_objdir="$func_dirname_result$objdir" func_to_tool_file "$output_objdir/" tool_output_objdir=$func_to_tool_file_result # Create the object directory. func_mkdir_p "$output_objdir" # Determine the type of output case $output in "") func_fatal_help "you must specify an output file" ;; *.$libext) linkmode=oldlib ;; *.lo | *.$objext) linkmode=obj ;; *.la) linkmode=lib ;; *) linkmode=prog ;; # Anything else should be a program. esac specialdeplibs= libs= # Find all interdependent deplibs by searching for libraries # that are linked more than once (e.g. -la -lb -la) for deplib in $deplibs; do if $opt_preserve_dup_deps ; then case "$libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append libs " $deplib" done if test "$linkmode" = lib; then libs="$predeps $libs $compiler_lib_search_path $postdeps" # Compute libraries that are listed more than once in $predeps # $postdeps and mark them as special (i.e., whose duplicates are # not to be eliminated). pre_post_deps= if $opt_duplicate_compiler_generated_deps; then for pre_post_dep in $predeps $postdeps; do case "$pre_post_deps " in *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; esac func_append pre_post_deps " $pre_post_dep" done fi pre_post_deps= fi deplibs= newdependency_libs= newlib_search_path= need_relink=no # whether we're linking any uninstalled libtool libraries notinst_deplibs= # not-installed libtool libraries notinst_path= # paths that contain not-installed libtool libraries case $linkmode in lib) passes="conv dlpreopen link" for file in $dlfiles $dlprefiles; do case $file in *.la) ;; *) func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file" ;; esac done ;; prog) compile_deplibs= finalize_deplibs= alldeplibs=no newdlfiles= newdlprefiles= passes="conv scan dlopen dlpreopen link" ;; *) passes="conv" ;; esac for pass in $passes; do # The preopen pass in lib mode reverses $deplibs; put it back here # so that -L comes before libs that need it for instance... if test "$linkmode,$pass" = "lib,link"; then ## FIXME: Find the place where the list is rebuilt in the wrong ## order, and fix it there properly tmp_deplibs= for deplib in $deplibs; do tmp_deplibs="$deplib $tmp_deplibs" done deplibs="$tmp_deplibs" fi if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan"; then libs="$deplibs" deplibs= fi if test "$linkmode" = prog; then case $pass in dlopen) libs="$dlfiles" ;; dlpreopen) libs="$dlprefiles" ;; link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; esac fi if test "$linkmode,$pass" = "lib,dlpreopen"; then # Collect and forward deplibs of preopened libtool libs for lib in $dlprefiles; do # Ignore non-libtool-libs dependency_libs= func_resolve_sysroot "$lib" case $lib in *.la) func_source "$func_resolve_sysroot_result" ;; esac # Collect preopened libtool deplibs, except any this library # has declared as weak libs for deplib in $dependency_libs; do func_basename "$deplib" deplib_base=$func_basename_result case " $weak_libs " in *" $deplib_base "*) ;; *) func_append deplibs " $deplib" ;; esac done done libs="$dlprefiles" fi if test "$pass" = dlopen; then # Collect dlpreopened libraries save_deplibs="$deplibs" deplibs= fi for deplib in $libs; do lib= found=no case $deplib in -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else func_append compiler_flags " $deplib" if test "$linkmode" = lib ; then case "$new_inherited_linker_flags " in *" $deplib "*) ;; * ) func_append new_inherited_linker_flags " $deplib" ;; esac fi fi continue ;; -l*) if test "$linkmode" != lib && test "$linkmode" != prog; then func_warning "\`-l' is ignored for archives/objects" continue fi func_stripname '-l' '' "$deplib" name=$func_stripname_result if test "$linkmode" = lib; then searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" else searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" fi for searchdir in $searchdirs; do for search_ext in .la $std_shrext .so .a; do # Search the libtool library lib="$searchdir/lib${name}${search_ext}" if test -f "$lib"; then if test "$search_ext" = ".la"; then found=yes else found=no fi break 2 fi done done if test "$found" != yes; then # deplib doesn't seem to be a libtool library if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" fi continue else # deplib is a libtool library # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, # We need to do some special things here, and not later. if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then case " $predeps $postdeps " in *" $deplib "*) if func_lalib_p "$lib"; then library_names= old_library= func_source "$lib" for l in $old_library $library_names; do ll="$l" done if test "X$ll" = "X$old_library" ; then # only static version available found=no func_dirname "$lib" "" "." ladir="$func_dirname_result" lib=$ladir/$old_library if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" fi continue fi fi ;; *) ;; esac fi fi ;; # -l *.ltframework) if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" if test "$linkmode" = lib ; then case "$new_inherited_linker_flags " in *" $deplib "*) ;; * ) func_append new_inherited_linker_flags " $deplib" ;; esac fi fi continue ;; -L*) case $linkmode in lib) deplibs="$deplib $deplibs" test "$pass" = conv && continue newdependency_libs="$deplib $newdependency_libs" func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; prog) if test "$pass" = conv; then deplibs="$deplib $deplibs" continue fi if test "$pass" = scan; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; *) func_warning "\`-L' is ignored for archives/objects" ;; esac # linkmode continue ;; # -L -R*) if test "$pass" = link; then func_stripname '-R' '' "$deplib" func_resolve_sysroot "$func_stripname_result" dir=$func_resolve_sysroot_result # Make sure the xrpath contains only unique directories. case "$xrpath " in *" $dir "*) ;; *) func_append xrpath " $dir" ;; esac fi deplibs="$deplib $deplibs" continue ;; *.la) func_resolve_sysroot "$deplib" lib=$func_resolve_sysroot_result ;; *.$libext) if test "$pass" = conv; then deplibs="$deplib $deplibs" continue fi case $linkmode in lib) # Linking convenience modules into shared libraries is allowed, # but linking other static libraries is non-portable. case " $dlpreconveniencelibs " in *" $deplib "*) ;; *) valid_a_lib=no case $deplibs_check_method in match_pattern*) set dummy $deplibs_check_method; shift match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ | $EGREP "$match_pattern_regex" > /dev/null; then valid_a_lib=yes fi ;; pass_all) valid_a_lib=yes ;; esac if test "$valid_a_lib" != yes; then echo $ECHO "*** Warning: Trying to link with static lib archive $deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because the file extensions .$libext of this argument makes me believe" echo "*** that it is just a static archive that I should not use here." else echo $ECHO "*** Warning: Linking the shared library $output against the" $ECHO "*** static library $deplib is not portable!" deplibs="$deplib $deplibs" fi ;; esac continue ;; prog) if test "$pass" != link; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi continue ;; esac # linkmode ;; # *.$libext *.lo | *.$objext) if test "$pass" = conv; then deplibs="$deplib $deplibs" elif test "$linkmode" = prog; then if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then # If there is no dlopen support or we're linking statically, # we need to preload. func_append newdlprefiles " $deplib" compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else func_append newdlfiles " $deplib" fi fi continue ;; %DEPLIBS%) alldeplibs=yes continue ;; esac # case $deplib if test "$found" = yes || test -f "$lib"; then : else func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'" fi # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$lib" \ || func_fatal_error "\`$lib' is not a valid libtool archive" func_dirname "$lib" "" "." ladir="$func_dirname_result" dlname= dlopen= dlpreopen= libdir= library_names= old_library= inherited_linker_flags= # If the library was installed with an old release of libtool, # it will not redefine variables installed, or shouldnotlink installed=yes shouldnotlink=no avoidtemprpath= # Read the .la file func_source "$lib" # Convert "-framework foo" to "foo.ltframework" if test -n "$inherited_linker_flags"; then tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do case " $new_inherited_linker_flags " in *" $tmp_inherited_linker_flag "*) ;; *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; esac done fi dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan" || { test "$linkmode" != prog && test "$linkmode" != lib; }; then test -n "$dlopen" && func_append dlfiles " $dlopen" test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" fi if test "$pass" = conv; then # Only check for convenience libraries deplibs="$lib $deplibs" if test -z "$libdir"; then if test -z "$old_library"; then func_fatal_error "cannot find name of link library for \`$lib'" fi # It is a libtool convenience library, so add in its objects. func_append convenience " $ladir/$objdir/$old_library" func_append old_convenience " $ladir/$objdir/$old_library" elif test "$linkmode" != prog && test "$linkmode" != lib; then func_fatal_error "\`$lib' is not a convenience library" fi tmp_libs= for deplib in $dependency_libs; do deplibs="$deplib $deplibs" if $opt_preserve_dup_deps ; then case "$tmp_libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append tmp_libs " $deplib" done continue fi # $pass = conv # Get the name of the library we link against. linklib= if test -n "$old_library" && { test "$prefer_static_libs" = yes || test "$prefer_static_libs,$installed" = "built,no"; }; then linklib=$old_library else for l in $old_library $library_names; do linklib="$l" done fi if test -z "$linklib"; then func_fatal_error "cannot find name of link library for \`$lib'" fi # This library was specified with -dlopen. if test "$pass" = dlopen; then if test -z "$libdir"; then func_fatal_error "cannot -dlopen a convenience library: \`$lib'" fi if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then # If there is no dlname, no dlopen support or we're linking # statically, we need to preload. We also need to preload any # dependent libraries so libltdl's deplib preloader doesn't # bomb out in the load deplibs phase. func_append dlprefiles " $lib $dependency_libs" else func_append newdlfiles " $lib" fi continue fi # $pass = dlopen # We need an absolute path. case $ladir in [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; *) abs_ladir=`cd "$ladir" && pwd` if test -z "$abs_ladir"; then func_warning "cannot determine absolute directory name of \`$ladir'" func_warning "passing it literally to the linker, although it might fail" abs_ladir="$ladir" fi ;; esac func_basename "$lib" laname="$func_basename_result" # Find the relevant object directory and library name. if test "X$installed" = Xyes; then if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then func_warning "library \`$lib' was moved." dir="$ladir" absdir="$abs_ladir" libdir="$abs_ladir" else dir="$lt_sysroot$libdir" absdir="$lt_sysroot$libdir" fi test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes else if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then dir="$ladir" absdir="$abs_ladir" # Remove this search path later func_append notinst_path " $abs_ladir" else dir="$ladir/$objdir" absdir="$abs_ladir/$objdir" # Remove this search path later func_append notinst_path " $abs_ladir" fi fi # $installed = yes func_stripname 'lib' '.la' "$laname" name=$func_stripname_result # This library was specified with -dlpreopen. if test "$pass" = dlpreopen; then if test -z "$libdir" && test "$linkmode" = prog; then func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'" fi case "$host" in # special handling for platforms with PE-DLLs. *cygwin* | *mingw* | *cegcc* ) # Linker will automatically link against shared library if both # static and shared are present. Therefore, ensure we extract # symbols from the import library if a shared library is present # (otherwise, the dlopen module name will be incorrect). We do # this by putting the import library name into $newdlprefiles. # We recover the dlopen module name by 'saving' the la file # name in a special purpose variable, and (later) extracting the # dlname from the la file. if test -n "$dlname"; then func_tr_sh "$dir/$linklib" eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" func_append newdlprefiles " $dir/$linklib" else func_append newdlprefiles " $dir/$old_library" # Keep a list of preopened convenience libraries to check # that they are being used correctly in the link pass. test -z "$libdir" && \ func_append dlpreconveniencelibs " $dir/$old_library" fi ;; * ) # Prefer using a static library (so that no silly _DYNAMIC symbols # are required to link). if test -n "$old_library"; then func_append newdlprefiles " $dir/$old_library" # Keep a list of preopened convenience libraries to check # that they are being used correctly in the link pass. test -z "$libdir" && \ func_append dlpreconveniencelibs " $dir/$old_library" # Otherwise, use the dlname, so that lt_dlopen finds it. elif test -n "$dlname"; then func_append newdlprefiles " $dir/$dlname" else func_append newdlprefiles " $dir/$linklib" fi ;; esac fi # $pass = dlpreopen if test -z "$libdir"; then # Link the convenience library if test "$linkmode" = lib; then deplibs="$dir/$old_library $deplibs" elif test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$dir/$old_library $compile_deplibs" finalize_deplibs="$dir/$old_library $finalize_deplibs" else deplibs="$lib $deplibs" # used for prog,scan pass fi continue fi if test "$linkmode" = prog && test "$pass" != link; then func_append newlib_search_path " $ladir" deplibs="$lib $deplibs" linkalldeplibs=no if test "$link_all_deplibs" != no || test -z "$library_names" || test "$build_libtool_libs" = no; then linkalldeplibs=yes fi tmp_libs= for deplib in $dependency_libs; do case $deplib in -L*) func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; esac # Need to link against all dependency_libs? if test "$linkalldeplibs" = yes; then deplibs="$deplib $deplibs" else # Need to hardcode shared library paths # or/and link against static libraries newdependency_libs="$deplib $newdependency_libs" fi if $opt_preserve_dup_deps ; then case "$tmp_libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append tmp_libs " $deplib" done # for deplib continue fi # $linkmode = prog... if test "$linkmode,$pass" = "prog,link"; then if test -n "$library_names" && { { test "$prefer_static_libs" = no || test "$prefer_static_libs,$installed" = "built,yes"; } || test -z "$old_library"; }; then # We need to hardcode the library path if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then # Make sure the rpath contains only unique directories. case "$temp_rpath:" in *"$absdir:"*) ;; *) func_append temp_rpath "$absdir:" ;; esac fi # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) func_append compile_rpath " $absdir" ;; esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac ;; esac fi # $linkmode,$pass = prog,link... if test "$alldeplibs" = yes && { test "$deplibs_check_method" = pass_all || { test "$build_libtool_libs" = yes && test -n "$library_names"; }; }; then # We only need to search for static libraries continue fi fi link_static=no # Whether the deplib will be linked statically use_static_libs=$prefer_static_libs if test "$use_static_libs" = built && test "$installed" = yes; then use_static_libs=no fi if test -n "$library_names" && { test "$use_static_libs" = no || test -z "$old_library"; }; then case $host in *cygwin* | *mingw* | *cegcc*) # No point in relinking DLLs because paths are not encoded func_append notinst_deplibs " $lib" need_relink=no ;; *) if test "$installed" = no; then func_append notinst_deplibs " $lib" need_relink=yes fi ;; esac # This is a shared library # Warn about portability, can't link against -module's on some # systems (darwin). Don't bleat about dlopened modules though! dlopenmodule="" for dlpremoduletest in $dlprefiles; do if test "X$dlpremoduletest" = "X$lib"; then dlopenmodule="$dlpremoduletest" break fi done if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then echo if test "$linkmode" = prog; then $ECHO "*** Warning: Linking the executable $output against the loadable module" else $ECHO "*** Warning: Linking the shared library $output against the loadable module" fi $ECHO "*** $linklib is not portable!" fi if test "$linkmode" = lib && test "$hardcode_into_libs" = yes; then # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) func_append compile_rpath " $absdir" ;; esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac ;; esac fi if test -n "$old_archive_from_expsyms_cmds"; then # figure out the soname set dummy $library_names shift realname="$1" shift libname=`eval "\\$ECHO \"$libname_spec\""` # use dlname if we got it. it's perfectly good, no? if test -n "$dlname"; then soname="$dlname" elif test -n "$soname_spec"; then # bleh windows case $host in *cygwin* | mingw* | *cegcc*) func_arith $current - $age major=$func_arith_result versuffix="-$major" ;; esac eval soname=\"$soname_spec\" else soname="$realname" fi # Make a new name for the extract_expsyms_cmds to use soroot="$soname" func_basename "$soroot" soname="$func_basename_result" func_stripname 'lib' '.dll' "$soname" newlib=libimp-$func_stripname_result.a # If the library has no export list, then create one now if test -f "$output_objdir/$soname-def"; then : else func_verbose "extracting exported symbol list from \`$soname'" func_execute_cmds "$extract_expsyms_cmds" 'exit $?' fi # Create $newlib if test -f "$output_objdir/$newlib"; then :; else func_verbose "generating import library for \`$soname'" func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' fi # make sure the library variables are pointing to the new library dir=$output_objdir linklib=$newlib fi # test -n "$old_archive_from_expsyms_cmds" if test "$linkmode" = prog || test "$opt_mode" != relink; then add_shlibpath= add_dir= add= lib_linked=yes case $hardcode_action in immediate | unsupported) if test "$hardcode_direct" = no; then add="$dir/$linklib" case $host in *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;; *-*-sysv4*uw2*) add_dir="-L$dir" ;; *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ *-*-unixware7*) add_dir="-L$dir" ;; *-*-darwin* ) # if the lib is a (non-dlopened) module then we can not # link against it, someone is ignoring the earlier warnings if /usr/bin/file -L $add 2> /dev/null | $GREP ": [^:]* bundle" >/dev/null ; then if test "X$dlopenmodule" != "X$lib"; then $ECHO "*** Warning: lib $linklib is a module, not a shared library" if test -z "$old_library" ; then echo echo "*** And there doesn't seem to be a static archive available" echo "*** The link will probably fail, sorry" else add="$dir/$old_library" fi elif test -n "$old_library"; then add="$dir/$old_library" fi fi esac elif test "$hardcode_minus_L" = no; then case $host in *-*-sunos*) add_shlibpath="$dir" ;; esac add_dir="-L$dir" add="-l$name" elif test "$hardcode_shlibpath_var" = no; then add_shlibpath="$dir" add="-l$name" else lib_linked=no fi ;; relink) if test "$hardcode_direct" = yes && test "$hardcode_direct_absolute" = no; then add="$dir/$linklib" elif test "$hardcode_minus_L" = yes; then add_dir="-L$absdir" # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) func_append add_dir " -L$inst_prefix_dir$libdir" ;; esac fi add="-l$name" elif test "$hardcode_shlibpath_var" = yes; then add_shlibpath="$dir" add="-l$name" else lib_linked=no fi ;; *) lib_linked=no ;; esac if test "$lib_linked" != yes; then func_fatal_configuration "unsupported hardcode properties" fi if test -n "$add_shlibpath"; then case :$compile_shlibpath: in *":$add_shlibpath:"*) ;; *) func_append compile_shlibpath "$add_shlibpath:" ;; esac fi if test "$linkmode" = prog; then test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" test -n "$add" && compile_deplibs="$add $compile_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" if test "$hardcode_direct" != yes && test "$hardcode_minus_L" != yes && test "$hardcode_shlibpath_var" = yes; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) func_append finalize_shlibpath "$libdir:" ;; esac fi fi fi if test "$linkmode" = prog || test "$opt_mode" = relink; then add_shlibpath= add_dir= add= # Finalize command for both is simple: just hardcode it. if test "$hardcode_direct" = yes && test "$hardcode_direct_absolute" = no; then add="$libdir/$linklib" elif test "$hardcode_minus_L" = yes; then add_dir="-L$libdir" add="-l$name" elif test "$hardcode_shlibpath_var" = yes; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) func_append finalize_shlibpath "$libdir:" ;; esac add="-l$name" elif test "$hardcode_automatic" = yes; then if test -n "$inst_prefix_dir" && test -f "$inst_prefix_dir$libdir/$linklib" ; then add="$inst_prefix_dir$libdir/$linklib" else add="$libdir/$linklib" fi else # We cannot seem to hardcode it, guess we'll fake it. add_dir="-L$libdir" # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) func_append add_dir " -L$inst_prefix_dir$libdir" ;; esac fi add="-l$name" fi if test "$linkmode" = prog; then test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" test -n "$add" && finalize_deplibs="$add $finalize_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" fi fi elif test "$linkmode" = prog; then # Here we assume that one of hardcode_direct or hardcode_minus_L # is not unsupported. This is valid on all known static and # shared platforms. if test "$hardcode_direct" != unsupported; then test -n "$old_library" && linklib="$old_library" compile_deplibs="$dir/$linklib $compile_deplibs" finalize_deplibs="$dir/$linklib $finalize_deplibs" else compile_deplibs="-l$name -L$dir $compile_deplibs" finalize_deplibs="-l$name -L$dir $finalize_deplibs" fi elif test "$build_libtool_libs" = yes; then # Not a shared library if test "$deplibs_check_method" != pass_all; then # We're trying link a shared library against a static one # but the system doesn't support it. # Just print a warning and add the library to dependency_libs so # that the program can be linked against the static library. echo $ECHO "*** Warning: This system can not link to static lib archive $lib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have." if test "$module" = yes; then echo "*** But as you try to build a module library, libtool will still create " echo "*** a static module, that should work as long as the dlopening application" echo "*** is linked with the -dlopen flag to resolve symbols at runtime." if test -z "$global_symbol_pipe"; then echo echo "*** However, this would only work if libtool was able to extract symbol" echo "*** lists from a program, using \`nm' or equivalent, but libtool could" echo "*** not find such a program. So, this module is probably useless." echo "*** \`nm' from GNU binutils and a full rebuild may help." fi if test "$build_old_libs" = no; then build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi else deplibs="$dir/$old_library $deplibs" link_static=yes fi fi # link shared/static library? if test "$linkmode" = lib; then if test -n "$dependency_libs" && { test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes || test "$link_static" = yes; }; then # Extract -R from dependency_libs temp_deplibs= for libdir in $dependency_libs; do case $libdir in -R*) func_stripname '-R' '' "$libdir" temp_xrpath=$func_stripname_result case " $xrpath " in *" $temp_xrpath "*) ;; *) func_append xrpath " $temp_xrpath";; esac;; *) func_append temp_deplibs " $libdir";; esac done dependency_libs="$temp_deplibs" fi func_append newlib_search_path " $absdir" # Link against this library test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" # ... and its dependency_libs tmp_libs= for deplib in $dependency_libs; do newdependency_libs="$deplib $newdependency_libs" case $deplib in -L*) func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result";; *) func_resolve_sysroot "$deplib" ;; esac if $opt_preserve_dup_deps ; then case "$tmp_libs " in *" $func_resolve_sysroot_result "*) func_append specialdeplibs " $func_resolve_sysroot_result" ;; esac fi func_append tmp_libs " $func_resolve_sysroot_result" done if test "$link_all_deplibs" != no; then # Add the search paths of all dependency libraries for deplib in $dependency_libs; do path= case $deplib in -L*) path="$deplib" ;; *.la) func_resolve_sysroot "$deplib" deplib=$func_resolve_sysroot_result func_dirname "$deplib" "" "." dir=$func_dirname_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; *) absdir=`cd "$dir" && pwd` if test -z "$absdir"; then func_warning "cannot determine absolute directory name of \`$dir'" absdir="$dir" fi ;; esac if $GREP "^installed=no" $deplib > /dev/null; then case $host in *-*-darwin*) depdepl= eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` if test -n "$deplibrary_names" ; then for tmp in $deplibrary_names ; do depdepl=$tmp done if test -f "$absdir/$objdir/$depdepl" ; then depdepl="$absdir/$objdir/$depdepl" darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` if test -z "$darwin_install_name"; then darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` fi func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}" func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}" path= fi fi ;; *) path="-L$absdir/$objdir" ;; esac else eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` test -z "$libdir" && \ func_fatal_error "\`$deplib' is not a valid libtool archive" test "$absdir" != "$libdir" && \ func_warning "\`$deplib' seems to be moved" path="-L$absdir" fi ;; esac case " $deplibs " in *" $path "*) ;; *) deplibs="$path $deplibs" ;; esac done fi # link_all_deplibs != no fi # linkmode = lib done # for deplib in $libs if test "$pass" = link; then if test "$linkmode" = "prog"; then compile_deplibs="$new_inherited_linker_flags $compile_deplibs" finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" else compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` fi fi dependency_libs="$newdependency_libs" if test "$pass" = dlpreopen; then # Link the dlpreopened libraries before other libraries for deplib in $save_deplibs; do deplibs="$deplib $deplibs" done fi if test "$pass" != dlopen; then if test "$pass" != conv; then # Make sure lib_search_path contains only unique directories. lib_search_path= for dir in $newlib_search_path; do case "$lib_search_path " in *" $dir "*) ;; *) func_append lib_search_path " $dir" ;; esac done newlib_search_path= fi if test "$linkmode,$pass" != "prog,link"; then vars="deplibs" else vars="compile_deplibs finalize_deplibs" fi for var in $vars dependency_libs; do # Add libraries to $var in reverse order eval tmp_libs=\"\$$var\" new_libs= for deplib in $tmp_libs; do # FIXME: Pedantically, this is the right thing to do, so # that some nasty dependency loop isn't accidentally # broken: #new_libs="$deplib $new_libs" # Pragmatically, this seems to cause very few problems in # practice: case $deplib in -L*) new_libs="$deplib $new_libs" ;; -R*) ;; *) # And here is the reason: when a library appears more # than once as an explicit dependence of a library, or # is implicitly linked in more than once by the # compiler, it is considered special, and multiple # occurrences thereof are not removed. Compare this # with having the same library being listed as a # dependency of multiple other libraries: in this case, # we know (pedantically, we assume) the library does not # need to be listed more than once, so we keep only the # last copy. This is not always right, but it is rare # enough that we require users that really mean to play # such unportable linking tricks to link the library # using -Wl,-lname, so that libtool does not consider it # for duplicate removal. case " $specialdeplibs " in *" $deplib "*) new_libs="$deplib $new_libs" ;; *) case " $new_libs " in *" $deplib "*) ;; *) new_libs="$deplib $new_libs" ;; esac ;; esac ;; esac done tmp_libs= for deplib in $new_libs; do case $deplib in -L*) case " $tmp_libs " in *" $deplib "*) ;; *) func_append tmp_libs " $deplib" ;; esac ;; *) func_append tmp_libs " $deplib" ;; esac done eval $var=\"$tmp_libs\" done # for var fi # Last step: remove runtime libs from dependency_libs # (they stay in deplibs) tmp_libs= for i in $dependency_libs ; do case " $predeps $postdeps $compiler_lib_search_path " in *" $i "*) i="" ;; esac if test -n "$i" ; then func_append tmp_libs " $i" fi done dependency_libs=$tmp_libs done # for pass if test "$linkmode" = prog; then dlfiles="$newdlfiles" fi if test "$linkmode" = prog || test "$linkmode" = lib; then dlprefiles="$newdlprefiles" fi case $linkmode in oldlib) if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then func_warning "\`-dlopen' is ignored for archives" fi case " $deplibs" in *\ -l* | *\ -L*) func_warning "\`-l' and \`-L' are ignored for archives" ;; esac test -n "$rpath" && \ func_warning "\`-rpath' is ignored for archives" test -n "$xrpath" && \ func_warning "\`-R' is ignored for archives" test -n "$vinfo" && \ func_warning "\`-version-info/-version-number' is ignored for archives" test -n "$release" && \ func_warning "\`-release' is ignored for archives" test -n "$export_symbols$export_symbols_regex" && \ func_warning "\`-export-symbols' is ignored for archives" # Now set the variables for building old libraries. build_libtool_libs=no oldlibs="$output" func_append objs "$old_deplibs" ;; lib) # Make sure we only generate libraries of the form `libNAME.la'. case $outputname in lib*) func_stripname 'lib' '.la' "$outputname" name=$func_stripname_result eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" ;; *) test "$module" = no && \ func_fatal_help "libtool library \`$output' must begin with \`lib'" if test "$need_lib_prefix" != no; then # Add the "lib" prefix for modules if required func_stripname '' '.la' "$outputname" name=$func_stripname_result eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" else func_stripname '' '.la' "$outputname" libname=$func_stripname_result fi ;; esac if test -n "$objs"; then if test "$deplibs_check_method" != pass_all; then func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs" else echo $ECHO "*** Warning: Linking the shared library $output against the non-libtool" $ECHO "*** objects $objs is not portable!" func_append libobjs " $objs" fi fi test "$dlself" != no && \ func_warning "\`-dlopen self' is ignored for libtool libraries" set dummy $rpath shift test "$#" -gt 1 && \ func_warning "ignoring multiple \`-rpath's for a libtool library" install_libdir="$1" oldlibs= if test -z "$rpath"; then if test "$build_libtool_libs" = yes; then # Building a libtool convenience library. # Some compilers have problems with a `.al' extension so # convenience libraries should have the same extension an # archive normally would. oldlibs="$output_objdir/$libname.$libext $oldlibs" build_libtool_libs=convenience build_old_libs=yes fi test -n "$vinfo" && \ func_warning "\`-version-info/-version-number' is ignored for convenience libraries" test -n "$release" && \ func_warning "\`-release' is ignored for convenience libraries" else # Parse the version information argument. save_ifs="$IFS"; IFS=':' set dummy $vinfo 0 0 0 shift IFS="$save_ifs" test -n "$7" && \ func_fatal_help "too many parameters to \`-version-info'" # convert absolute version numbers to libtool ages # this retains compatibility with .la files and attempts # to make the code below a bit more comprehensible case $vinfo_number in yes) number_major="$1" number_minor="$2" number_revision="$3" # # There are really only two kinds -- those that # use the current revision as the major version # and those that subtract age and use age as # a minor version. But, then there is irix # which has an extra 1 added just for fun # case $version_type in # correct linux to gnu/linux during the next big refactor darwin|linux|osf|windows|none) func_arith $number_major + $number_minor current=$func_arith_result age="$number_minor" revision="$number_revision" ;; freebsd-aout|freebsd-elf|qnx|sunos) current="$number_major" revision="$number_minor" age="0" ;; irix|nonstopux) func_arith $number_major + $number_minor current=$func_arith_result age="$number_minor" revision="$number_minor" lt_irix_increment=no ;; esac ;; no) current="$1" revision="$2" age="$3" ;; esac # Check that each of the things are valid numbers. case $current in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "CURRENT \`$current' must be a nonnegative integer" func_fatal_error "\`$vinfo' is not valid version information" ;; esac case $revision in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "REVISION \`$revision' must be a nonnegative integer" func_fatal_error "\`$vinfo' is not valid version information" ;; esac case $age in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "AGE \`$age' must be a nonnegative integer" func_fatal_error "\`$vinfo' is not valid version information" ;; esac if test "$age" -gt "$current"; then func_error "AGE \`$age' is greater than the current interface number \`$current'" func_fatal_error "\`$vinfo' is not valid version information" fi # Calculate the version variables. major= versuffix= verstring= case $version_type in none) ;; darwin) # Like Linux, but with the current version available in # verstring for coding it into the library header func_arith $current - $age major=.$func_arith_result versuffix="$major.$age.$revision" # Darwin ld doesn't like 0 for these options... func_arith $current + 1 minor_current=$func_arith_result xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision" verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" ;; freebsd-aout) major=".$current" versuffix=".$current.$revision"; ;; freebsd-elf) major=".$current" versuffix=".$current" ;; irix | nonstopux) if test "X$lt_irix_increment" = "Xno"; then func_arith $current - $age else func_arith $current - $age + 1 fi major=$func_arith_result case $version_type in nonstopux) verstring_prefix=nonstopux ;; *) verstring_prefix=sgi ;; esac verstring="$verstring_prefix$major.$revision" # Add in all the interfaces that we are compatible with. loop=$revision while test "$loop" -ne 0; do func_arith $revision - $loop iface=$func_arith_result func_arith $loop - 1 loop=$func_arith_result verstring="$verstring_prefix$major.$iface:$verstring" done # Before this point, $major must not contain `.'. major=.$major versuffix="$major.$revision" ;; linux) # correct to gnu/linux during the next big refactor func_arith $current - $age major=.$func_arith_result versuffix="$major.$age.$revision" ;; osf) func_arith $current - $age major=.$func_arith_result versuffix=".$current.$age.$revision" verstring="$current.$age.$revision" # Add in all the interfaces that we are compatible with. loop=$age while test "$loop" -ne 0; do func_arith $current - $loop iface=$func_arith_result func_arith $loop - 1 loop=$func_arith_result verstring="$verstring:${iface}.0" done # Make executables depend on our current version. func_append verstring ":${current}.0" ;; qnx) major=".$current" versuffix=".$current" ;; sunos) major=".$current" versuffix=".$current.$revision" ;; windows) # Use '-' rather than '.', since we only want one # extension on DOS 8.3 filesystems. func_arith $current - $age major=$func_arith_result versuffix="-$major" ;; *) func_fatal_configuration "unknown library version type \`$version_type'" ;; esac # Clear the version info if we defaulted, and they specified a release. if test -z "$vinfo" && test -n "$release"; then major= case $version_type in darwin) # we can't check for "0.0" in archive_cmds due to quoting # problems, so we reset it completely verstring= ;; *) verstring="0.0" ;; esac if test "$need_version" = no; then versuffix= else versuffix=".0.0" fi fi # Remove version info from name if versioning should be avoided if test "$avoid_version" = yes && test "$need_version" = no; then major= versuffix= verstring="" fi # Check to see if the archive will have undefined symbols. if test "$allow_undefined" = yes; then if test "$allow_undefined_flag" = unsupported; then func_warning "undefined symbols not allowed in $host shared libraries" build_libtool_libs=no build_old_libs=yes fi else # Don't allow undefined symbols. allow_undefined_flag="$no_undefined_flag" fi fi func_generate_dlsyms "$libname" "$libname" "yes" func_append libobjs " $symfileobj" test "X$libobjs" = "X " && libobjs= if test "$opt_mode" != relink; then # Remove our outputs, but don't remove object files since they # may have been created when compiling PIC objects. removelist= tempremovelist=`$ECHO "$output_objdir/*"` for p in $tempremovelist; do case $p in *.$objext | *.gcno) ;; $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*) if test "X$precious_files_regex" != "X"; then if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 then continue fi fi func_append removelist " $p" ;; *) ;; esac done test -n "$removelist" && \ func_show_eval "${RM}r \$removelist" fi # Now set the variables for building old libraries. if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then func_append oldlibs " $output_objdir/$libname.$libext" # Transform .lo files to .o files. oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP` fi # Eliminate all temporary directories. #for path in $notinst_path; do # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` #done if test -n "$xrpath"; then # If the user specified any rpath flags, then add them. temp_xrpath= for libdir in $xrpath; do func_replace_sysroot "$libdir" func_append temp_xrpath " -R$func_replace_sysroot_result" case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac done if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then dependency_libs="$temp_xrpath $dependency_libs" fi fi # Make sure dlfiles contains only unique files that won't be dlpreopened old_dlfiles="$dlfiles" dlfiles= for lib in $old_dlfiles; do case " $dlprefiles $dlfiles " in *" $lib "*) ;; *) func_append dlfiles " $lib" ;; esac done # Make sure dlprefiles contains only unique files old_dlprefiles="$dlprefiles" dlprefiles= for lib in $old_dlprefiles; do case "$dlprefiles " in *" $lib "*) ;; *) func_append dlprefiles " $lib" ;; esac done if test "$build_libtool_libs" = yes; then if test -n "$rpath"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) # these systems don't actually have a c library (as such)! ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C library is in the System framework func_append deplibs " System.ltframework" ;; *-*-netbsd*) # Don't link with libc until the a.out ld.so is fixed. ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc due to us having libc/libc_r. ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work ;; *) # Add libc to deplibs on all other systems if necessary. if test "$build_libtool_need_lc" = "yes"; then func_append deplibs " -lc" fi ;; esac fi # Transform deplibs into only deplibs that can be linked in shared. name_save=$name libname_save=$libname release_save=$release versuffix_save=$versuffix major_save=$major # I'm not sure if I'm treating the release correctly. I think # release should show up in the -l (ie -lgmp5) so we don't want to # add it in twice. Is that correct? release="" versuffix="" major="" newdeplibs= droppeddeps=no case $deplibs_check_method in pass_all) # Don't check for shared/static. Everything works. # This might be a little naive. We might want to check # whether the library exists or not. But this is on # osf3 & osf4 and I'm not really sure... Just # implementing what was already the behavior. newdeplibs=$deplibs ;; test_compile) # This code stresses the "libraries are programs" paradigm to its # limits. Maybe even breaks it. We compile a program, linking it # against the deplibs as a proxy for the library. Then we can check # whether they linked in statically or dynamically with ldd. $opt_dry_run || $RM conftest.c cat > conftest.c </dev/null` $nocaseglob else potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` fi for potent_lib in $potential_libs; do # Follow soft links. if ls -lLd "$potent_lib" 2>/dev/null | $GREP " -> " >/dev/null; then continue fi # The statement above tries to avoid entering an # endless loop below, in case of cyclic links. # We might still enter an endless loop, since a link # loop can be closed while we follow links, # but so what? potlib="$potent_lib" while test -h "$potlib" 2>/dev/null; do potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` case $potliblink in [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";; esac done if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | $SED -e 10q | $EGREP "$file_magic_regex" > /dev/null; then func_append newdeplibs " $a_deplib" a_deplib="" break 2 fi done done fi if test -n "$a_deplib" ; then droppeddeps=yes echo $ECHO "*** Warning: linker path does not have real file for library $a_deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib" ; then $ECHO "*** with $libname but no candidates were found. (...for file magic test)" else $ECHO "*** with $libname and none of the candidates passed a file format test" $ECHO "*** using a file magic. Last file checked: $potlib" fi fi ;; *) # Add a -L argument. func_append newdeplibs " $a_deplib" ;; esac done # Gone through all deplibs. ;; match_pattern*) set dummy $deplibs_check_method; shift match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` for a_deplib in $deplibs; do case $a_deplib in -l*) func_stripname -l '' "$a_deplib" name=$func_stripname_result if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then case " $predeps $postdeps " in *" $a_deplib "*) func_append newdeplibs " $a_deplib" a_deplib="" ;; esac fi if test -n "$a_deplib" ; then libname=`eval "\\$ECHO \"$libname_spec\""` for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do potential_libs=`ls $i/$libname[.-]* 2>/dev/null` for potent_lib in $potential_libs; do potlib="$potent_lib" # see symlink-check above in file_magic test if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ $EGREP "$match_pattern_regex" > /dev/null; then func_append newdeplibs " $a_deplib" a_deplib="" break 2 fi done done fi if test -n "$a_deplib" ; then droppeddeps=yes echo $ECHO "*** Warning: linker path does not have real file for library $a_deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib" ; then $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" else $ECHO "*** with $libname and none of the candidates passed a file format test" $ECHO "*** using a regex pattern. Last file checked: $potlib" fi fi ;; *) # Add a -L argument. func_append newdeplibs " $a_deplib" ;; esac done # Gone through all deplibs. ;; none | unknown | *) newdeplibs="" tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then for i in $predeps $postdeps ; do # can't use Xsed below, because $i might contain '/' tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"` done fi case $tmp_deplibs in *[!\ \ ]*) echo if test "X$deplibs_check_method" = "Xnone"; then echo "*** Warning: inter-library dependencies are not supported in this platform." else echo "*** Warning: inter-library dependencies are not known to be supported." fi echo "*** All declared inter-library dependencies are being dropped." droppeddeps=yes ;; esac ;; esac versuffix=$versuffix_save major=$major_save release=$release_save libname=$libname_save name=$name_save case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library with the System framework newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` ;; esac if test "$droppeddeps" = yes; then if test "$module" = yes; then echo echo "*** Warning: libtool could not satisfy all declared inter-library" $ECHO "*** dependencies of module $libname. Therefore, libtool will create" echo "*** a static module, that should work as long as the dlopening" echo "*** application is linked with the -dlopen flag." if test -z "$global_symbol_pipe"; then echo echo "*** However, this would only work if libtool was able to extract symbol" echo "*** lists from a program, using \`nm' or equivalent, but libtool could" echo "*** not find such a program. So, this module is probably useless." echo "*** \`nm' from GNU binutils and a full rebuild may help." fi if test "$build_old_libs" = no; then oldlibs="$output_objdir/$libname.$libext" build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi else echo "*** The inter-library dependencies that have been dropped here will be" echo "*** automatically added whenever a program is linked with this library" echo "*** or is declared to -dlopen it." if test "$allow_undefined" = no; then echo echo "*** Since this library must not contain undefined symbols," echo "*** because either the platform does not support them or" echo "*** it was explicitly requested with -no-undefined," echo "*** libtool will only create a static version of it." if test "$build_old_libs" = no; then oldlibs="$output_objdir/$libname.$libext" build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi fi fi # Done checking deplibs! deplibs=$newdeplibs fi # Time to change all our "foo.ltframework" stuff back to "-framework foo" case $host in *-*-darwin*) newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $deplibs " in *" -L$path/$objdir "*) func_append new_libs " -L$path/$objdir" ;; esac ;; esac done for deplib in $deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) func_append new_libs " $deplib" ;; esac ;; *) func_append new_libs " $deplib" ;; esac done deplibs="$new_libs" # All the library-specific variables (install_libdir is set above). library_names= old_library= dlname= # Test again, we may have decided not to build it any more if test "$build_libtool_libs" = yes; then # Remove ${wl} instances when linking with ld. # FIXME: should test the right _cmds variable. case $archive_cmds in *\$LD\ *) wl= ;; esac if test "$hardcode_into_libs" = yes; then # Hardcode the library paths hardcode_libdirs= dep_rpath= rpath="$finalize_rpath" test "$opt_mode" != relink && rpath="$compile_rpath$rpath" for libdir in $rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then func_replace_sysroot "$libdir" libdir=$func_replace_sysroot_result if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append dep_rpath " $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) func_append perm_rpath " $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" fi if test -n "$runpath_var" && test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do func_append rpath "$dir:" done eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" fi test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" fi shlibpath="$finalize_shlibpath" test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath" if test -n "$shlibpath"; then eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" fi # Get the real and link names of the library. eval shared_ext=\"$shrext_cmds\" eval library_names=\"$library_names_spec\" set dummy $library_names shift realname="$1" shift if test -n "$soname_spec"; then eval soname=\"$soname_spec\" else soname="$realname" fi if test -z "$dlname"; then dlname=$soname fi lib="$output_objdir/$realname" linknames= for link do func_append linknames " $link" done # Use standard objects if they are pic test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` test "X$libobjs" = "X " && libobjs= delfiles= if test -n "$export_symbols" && test -n "$include_expsyms"; then $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" export_symbols="$output_objdir/$libname.uexp" func_append delfiles " $export_symbols" fi orig_export_symbols= case $host_os in cygwin* | mingw* | cegcc*) if test -n "$export_symbols" && test -z "$export_symbols_regex"; then # exporting using user supplied symfile if test "x`$SED 1q $export_symbols`" != xEXPORTS; then # and it's NOT already a .def file. Must figure out # which of the given symbols are data symbols and tag # them as such. So, trigger use of export_symbols_cmds. # export_symbols gets reassigned inside the "prepare # the list of exported symbols" if statement, so the # include_expsyms logic still works. orig_export_symbols="$export_symbols" export_symbols= always_export_symbols=yes fi fi ;; esac # Prepare the list of exported symbols if test -z "$export_symbols"; then if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then func_verbose "generating symbol list for \`$libname.la'" export_symbols="$output_objdir/$libname.exp" $opt_dry_run || $RM $export_symbols cmds=$export_symbols_cmds save_ifs="$IFS"; IFS='~' for cmd1 in $cmds; do IFS="$save_ifs" # Take the normal branch if the nm_file_list_spec branch # doesn't work or if tool conversion is not needed. case $nm_file_list_spec~$to_tool_file_cmd in *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) try_normal_branch=yes eval cmd=\"$cmd1\" func_len " $cmd" len=$func_len_result ;; *) try_normal_branch=no ;; esac if test "$try_normal_branch" = yes \ && { test "$len" -lt "$max_cmd_len" \ || test "$max_cmd_len" -le -1; } then func_show_eval "$cmd" 'exit $?' skipped_export=false elif test -n "$nm_file_list_spec"; then func_basename "$output" output_la=$func_basename_result save_libobjs=$libobjs save_output=$output output=${output_objdir}/${output_la}.nm func_to_tool_file "$output" libobjs=$nm_file_list_spec$func_to_tool_file_result func_append delfiles " $output" func_verbose "creating $NM input file list: $output" for obj in $save_libobjs; do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" done > "$output" eval cmd=\"$cmd1\" func_show_eval "$cmd" 'exit $?' output=$save_output libobjs=$save_libobjs skipped_export=false else # The command line is too long to execute in one step. func_verbose "using reloadable object file for export list..." skipped_export=: # Break out early, otherwise skipped_export may be # set to false by a later but shorter cmd. break fi done IFS="$save_ifs" if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' func_show_eval '$MV "${export_symbols}T" "$export_symbols"' fi fi fi if test -n "$export_symbols" && test -n "$include_expsyms"; then tmp_export_symbols="$export_symbols" test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' fi if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then # The given exports_symbols file has to be filtered, so filter it. func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" # FIXME: $output_objdir/$libname.filter potentially contains lots of # 's' commands which not all seds can handle. GNU sed should be fine # though. Also, the filter scales superlinearly with the number of # global variables. join(1) would be nice here, but unfortunately # isn't a blessed tool. $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter func_append delfiles " $export_symbols $output_objdir/$libname.filter" export_symbols=$output_objdir/$libname.def $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols fi tmp_deplibs= for test_deplib in $deplibs; do case " $convenience " in *" $test_deplib "*) ;; *) func_append tmp_deplibs " $test_deplib" ;; esac done deplibs="$tmp_deplibs" if test -n "$convenience"; then if test -n "$whole_archive_flag_spec" && test "$compiler_needs_object" = yes && test -z "$libobjs"; then # extract the archives, so we have objects to list. # TODO: could optimize this to just extract one archive. whole_archive_flag_spec= fi if test -n "$whole_archive_flag_spec"; then save_libobjs=$libobjs eval libobjs=\"\$libobjs $whole_archive_flag_spec\" test "X$libobjs" = "X " && libobjs= else gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $convenience func_append libobjs " $func_extract_archives_result" test "X$libobjs" = "X " && libobjs= fi fi if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then eval flag=\"$thread_safe_flag_spec\" func_append linker_flags " $flag" fi # Make a backup of the uninstalled library when relinking if test "$opt_mode" = relink; then $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? fi # Do each of the archive commands. if test "$module" = yes && test -n "$module_cmds" ; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then eval test_cmds=\"$module_expsym_cmds\" cmds=$module_expsym_cmds else eval test_cmds=\"$module_cmds\" cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then eval test_cmds=\"$archive_expsym_cmds\" cmds=$archive_expsym_cmds else eval test_cmds=\"$archive_cmds\" cmds=$archive_cmds fi fi if test "X$skipped_export" != "X:" && func_len " $test_cmds" && len=$func_len_result && test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then : else # The command line is too long to link in one step, link piecewise # or, if using GNU ld and skipped_export is not :, use a linker # script. # Save the value of $output and $libobjs because we want to # use them later. If we have whole_archive_flag_spec, we # want to use save_libobjs as it was before # whole_archive_flag_spec was expanded, because we can't # assume the linker understands whole_archive_flag_spec. # This may have to be revisited, in case too many # convenience libraries get linked in and end up exceeding # the spec. if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then save_libobjs=$libobjs fi save_output=$output func_basename "$output" output_la=$func_basename_result # Clear the reloadable object creation command queue and # initialize k to one. test_cmds= concat_cmds= objlist= last_robj= k=1 if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then output=${output_objdir}/${output_la}.lnkscript func_verbose "creating GNU ld script: $output" echo 'INPUT (' > $output for obj in $save_libobjs do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" >> $output done echo ')' >> $output func_append delfiles " $output" func_to_tool_file "$output" output=$func_to_tool_file_result elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then output=${output_objdir}/${output_la}.lnk func_verbose "creating linker input file list: $output" : > $output set x $save_libobjs shift firstobj= if test "$compiler_needs_object" = yes; then firstobj="$1 " shift fi for obj do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" >> $output done func_append delfiles " $output" func_to_tool_file "$output" output=$firstobj\"$file_list_spec$func_to_tool_file_result\" else if test -n "$save_libobjs"; then func_verbose "creating reloadable object files..." output=$output_objdir/$output_la-${k}.$objext eval test_cmds=\"$reload_cmds\" func_len " $test_cmds" len0=$func_len_result len=$len0 # Loop over the list of objects to be linked. for obj in $save_libobjs do func_len " $obj" func_arith $len + $func_len_result len=$func_arith_result if test "X$objlist" = X || test "$len" -lt "$max_cmd_len"; then func_append objlist " $obj" else # The command $test_cmds is almost too long, add a # command to the queue. if test "$k" -eq 1 ; then # The first file doesn't have a previous command to add. reload_objs=$objlist eval concat_cmds=\"$reload_cmds\" else # All subsequent reloadable object files will link in # the last one created. reload_objs="$objlist $last_robj" eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" fi last_robj=$output_objdir/$output_la-${k}.$objext func_arith $k + 1 k=$func_arith_result output=$output_objdir/$output_la-${k}.$objext objlist=" $obj" func_len " $last_robj" func_arith $len0 + $func_len_result len=$func_arith_result fi done # Handle the remaining objects by creating one last # reloadable object file. All subsequent reloadable object # files will link in the last one created. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ reload_objs="$objlist $last_robj" eval concat_cmds=\"\${concat_cmds}$reload_cmds\" if test -n "$last_robj"; then eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\" fi func_append delfiles " $output" else output= fi if ${skipped_export-false}; then func_verbose "generating symbol list for \`$libname.la'" export_symbols="$output_objdir/$libname.exp" $opt_dry_run || $RM $export_symbols libobjs=$output # Append the command to create the export file. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" if test -n "$last_robj"; then eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" fi fi test -n "$save_libobjs" && func_verbose "creating a temporary reloadable object file: $output" # Loop through the commands generated above and execute them. save_ifs="$IFS"; IFS='~' for cmd in $concat_cmds; do IFS="$save_ifs" $opt_silent || { func_quote_for_expand "$cmd" eval "func_echo $func_quote_for_expand_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test "$opt_mode" = relink; then ( cd "$output_objdir" && \ $RM "${realname}T" && \ $MV "${realname}U" "$realname" ) fi exit $lt_exit } done IFS="$save_ifs" if test -n "$export_symbols_regex" && ${skipped_export-false}; then func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' func_show_eval '$MV "${export_symbols}T" "$export_symbols"' fi fi if ${skipped_export-false}; then if test -n "$export_symbols" && test -n "$include_expsyms"; then tmp_export_symbols="$export_symbols" test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' fi if test -n "$orig_export_symbols"; then # The given exports_symbols file has to be filtered, so filter it. func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" # FIXME: $output_objdir/$libname.filter potentially contains lots of # 's' commands which not all seds can handle. GNU sed should be fine # though. Also, the filter scales superlinearly with the number of # global variables. join(1) would be nice here, but unfortunately # isn't a blessed tool. $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter func_append delfiles " $export_symbols $output_objdir/$libname.filter" export_symbols=$output_objdir/$libname.def $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols fi fi libobjs=$output # Restore the value of output. output=$save_output if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then eval libobjs=\"\$libobjs $whole_archive_flag_spec\" test "X$libobjs" = "X " && libobjs= fi # Expand the library linking commands again to reset the # value of $libobjs for piecewise linking. # Do each of the archive commands. if test "$module" = yes && test -n "$module_cmds" ; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then cmds=$module_expsym_cmds else cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then cmds=$archive_expsym_cmds else cmds=$archive_cmds fi fi fi if test -n "$delfiles"; then # Append the command to remove temporary files to $cmds. eval cmds=\"\$cmds~\$RM $delfiles\" fi # Add any objects from preloaded convenience libraries if test -n "$dlprefiles"; then gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $dlprefiles func_append libobjs " $func_extract_archives_result" test "X$libobjs" = "X " && libobjs= fi save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $opt_silent || { func_quote_for_expand "$cmd" eval "func_echo $func_quote_for_expand_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test "$opt_mode" = relink; then ( cd "$output_objdir" && \ $RM "${realname}T" && \ $MV "${realname}U" "$realname" ) fi exit $lt_exit } done IFS="$save_ifs" # Restore the uninstalled library and exit if test "$opt_mode" = relink; then $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? if test -n "$convenience"; then if test -z "$whole_archive_flag_spec"; then func_show_eval '${RM}r "$gentop"' fi fi exit $EXIT_SUCCESS fi # Create links to the real library. for linkname in $linknames; do if test "$realname" != "$linkname"; then func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' fi done # If -module or -export-dynamic was specified, set the dlname. if test "$module" = yes || test "$export_dynamic" = yes; then # On all known operating systems, these are identical. dlname="$soname" fi fi ;; obj) if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then func_warning "\`-dlopen' is ignored for objects" fi case " $deplibs" in *\ -l* | *\ -L*) func_warning "\`-l' and \`-L' are ignored for objects" ;; esac test -n "$rpath" && \ func_warning "\`-rpath' is ignored for objects" test -n "$xrpath" && \ func_warning "\`-R' is ignored for objects" test -n "$vinfo" && \ func_warning "\`-version-info' is ignored for objects" test -n "$release" && \ func_warning "\`-release' is ignored for objects" case $output in *.lo) test -n "$objs$old_deplibs" && \ func_fatal_error "cannot build library object \`$output' from non-libtool objects" libobj=$output func_lo2o "$libobj" obj=$func_lo2o_result ;; *) libobj= obj="$output" ;; esac # Delete the old objects. $opt_dry_run || $RM $obj $libobj # Objects from convenience libraries. This assumes # single-version convenience libraries. Whenever we create # different ones for PIC/non-PIC, this we'll have to duplicate # the extraction. reload_conv_objs= gentop= # reload_cmds runs $LD directly, so let us get rid of # -Wl from whole_archive_flag_spec and hope we can get by with # turning comma into space.. wl= if test -n "$convenience"; then if test -n "$whole_archive_flag_spec"; then eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` else gentop="$output_objdir/${obj}x" func_append generated " $gentop" func_extract_archives $gentop $convenience reload_conv_objs="$reload_objs $func_extract_archives_result" fi fi # If we're not building shared, we need to use non_pic_objs test "$build_libtool_libs" != yes && libobjs="$non_pic_objects" # Create the old-style object. reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test output="$obj" func_execute_cmds "$reload_cmds" 'exit $?' # Exit if we aren't doing a library object file. if test -z "$libobj"; then if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi exit $EXIT_SUCCESS fi if test "$build_libtool_libs" != yes; then if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi # Create an invalid libtool object if no PIC, so that we don't # accidentally link it into a program. # $show "echo timestamp > $libobj" # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? exit $EXIT_SUCCESS fi if test -n "$pic_flag" || test "$pic_mode" != default; then # Only do commands if we really have different PIC objects. reload_objs="$libobjs $reload_conv_objs" output="$libobj" func_execute_cmds "$reload_cmds" 'exit $?' fi if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi exit $EXIT_SUCCESS ;; prog) case $host in *cygwin*) func_stripname '' '.exe' "$output" output=$func_stripname_result.exe;; esac test -n "$vinfo" && \ func_warning "\`-version-info' is ignored for programs" test -n "$release" && \ func_warning "\`-release' is ignored for programs" test "$preload" = yes \ && test "$dlopen_support" = unknown \ && test "$dlopen_self" = unknown \ && test "$dlopen_self_static" = unknown && \ func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support." case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library is the System framework compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` ;; esac case $host in *-*-darwin*) # Don't allow lazy linking, it breaks C++ global constructors # But is supposedly fixed on 10.4 or later (yay!). if test "$tagname" = CXX ; then case ${MACOSX_DEPLOYMENT_TARGET-10.0} in 10.[0123]) func_append compile_command " ${wl}-bind_at_load" func_append finalize_command " ${wl}-bind_at_load" ;; esac fi # Time to change all our "foo.ltframework" stuff back to "-framework foo" compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $compile_deplibs " in *" -L$path/$objdir "*) func_append new_libs " -L$path/$objdir" ;; esac ;; esac done for deplib in $compile_deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) func_append new_libs " $deplib" ;; esac ;; *) func_append new_libs " $deplib" ;; esac done compile_deplibs="$new_libs" func_append compile_command " $compile_deplibs" func_append finalize_command " $finalize_deplibs" if test -n "$rpath$xrpath"; then # If the user specified any rpath flags, then add them. for libdir in $rpath $xrpath; do # This is the magic to use -rpath. case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac done fi # Now hardcode the library paths rpath= hardcode_libdirs= for libdir in $compile_rpath $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append rpath " $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) func_append perm_rpath " $libdir" ;; esac fi case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'` case :$dllsearchpath: in *":$libdir:"*) ;; ::) dllsearchpath=$libdir;; *) func_append dllsearchpath ":$libdir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; ::) dllsearchpath=$testbindir;; *) func_append dllsearchpath ":$testbindir";; esac ;; esac done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval rpath=\" $hardcode_libdir_flag_spec\" fi compile_rpath="$rpath" rpath= hardcode_libdirs= for libdir in $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append rpath " $flag" fi elif test -n "$runpath_var"; then case "$finalize_perm_rpath " in *" $libdir "*) ;; *) func_append finalize_perm_rpath " $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval rpath=\" $hardcode_libdir_flag_spec\" fi finalize_rpath="$rpath" if test -n "$libobjs" && test "$build_old_libs" = yes; then # Transform all the library objects into standard objects. compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` fi func_generate_dlsyms "$outputname" "@PROGRAM@" "no" # template prelinking step if test -n "$prelink_cmds"; then func_execute_cmds "$prelink_cmds" 'exit $?' fi wrappers_required=yes case $host in *cegcc* | *mingw32ce*) # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. wrappers_required=no ;; *cygwin* | *mingw* ) if test "$build_libtool_libs" != yes; then wrappers_required=no fi ;; *) if test "$need_relink" = no || test "$build_libtool_libs" != yes; then wrappers_required=no fi ;; esac if test "$wrappers_required" = no; then # Replace the output file specification. compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` link_command="$compile_command$compile_rpath" # We have no uninstalled library dependencies, so finalize right now. exit_status=0 func_show_eval "$link_command" 'exit_status=$?' if test -n "$postlink_cmds"; then func_to_tool_file "$output" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi # Delete the generated files. if test -f "$output_objdir/${outputname}S.${objext}"; then func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"' fi exit $exit_status fi if test -n "$compile_shlibpath$finalize_shlibpath"; then compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" fi if test -n "$finalize_shlibpath"; then finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" fi compile_var= finalize_var= if test -n "$runpath_var"; then if test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do func_append rpath "$dir:" done compile_var="$runpath_var=\"$rpath\$$runpath_var\" " fi if test -n "$finalize_perm_rpath"; then # We should set the runpath_var. rpath= for dir in $finalize_perm_rpath; do func_append rpath "$dir:" done finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " fi fi if test "$no_install" = yes; then # We don't need to create a wrapper script. link_command="$compile_var$compile_command$compile_rpath" # Replace the output file specification. link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` # Delete the old output file. $opt_dry_run || $RM $output # Link the executable and exit func_show_eval "$link_command" 'exit $?' if test -n "$postlink_cmds"; then func_to_tool_file "$output" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi exit $EXIT_SUCCESS fi if test "$hardcode_action" = relink; then # Fast installation is not supported link_command="$compile_var$compile_command$compile_rpath" relink_command="$finalize_var$finalize_command$finalize_rpath" func_warning "this platform does not like uninstalled shared libraries" func_warning "\`$output' will be relinked during installation" else if test "$fast_install" != no; then link_command="$finalize_var$compile_command$finalize_rpath" if test "$fast_install" = yes; then relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` else # fast_install is set to needless relink_command= fi else link_command="$compile_var$compile_command$compile_rpath" relink_command="$finalize_var$finalize_command$finalize_rpath" fi fi # Replace the output file specification. link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` # Delete the old output files. $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname func_show_eval "$link_command" 'exit $?' if test -n "$postlink_cmds"; then func_to_tool_file "$output_objdir/$outputname" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi # Now create the wrapper script. func_verbose "creating $output" # Quote the relink command for shipping. if test -n "$relink_command"; then # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else func_quote_for_eval "$var_value" relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" fi done relink_command="(cd `pwd`; $relink_command)" relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` fi # Only actually do things if not in dry run mode. $opt_dry_run || { # win32 will think the script is a binary if it has # a .exe suffix, so we strip it off here. case $output in *.exe) func_stripname '' '.exe' "$output" output=$func_stripname_result ;; esac # test for cygwin because mv fails w/o .exe extensions case $host in *cygwin*) exeext=.exe func_stripname '' '.exe' "$outputname" outputname=$func_stripname_result ;; *) exeext= ;; esac case $host in *cygwin* | *mingw* ) func_dirname_and_basename "$output" "" "." output_name=$func_basename_result output_path=$func_dirname_result cwrappersource="$output_path/$objdir/lt-$output_name.c" cwrapper="$output_path/$output_name.exe" $RM $cwrappersource $cwrapper trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 func_emit_cwrapperexe_src > $cwrappersource # The wrapper executable is built using the $host compiler, # because it contains $host paths and files. If cross- # compiling, it, like the target executable, must be # executed on the $host or under an emulation environment. $opt_dry_run || { $LTCC $LTCFLAGS -o $cwrapper $cwrappersource $STRIP $cwrapper } # Now, create the wrapper script for func_source use: func_ltwrapper_scriptname $cwrapper $RM $func_ltwrapper_scriptname_result trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 $opt_dry_run || { # note: this script will not be executed, so do not chmod. if test "x$build" = "x$host" ; then $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result else func_emit_wrapper no > $func_ltwrapper_scriptname_result fi } ;; * ) $RM $output trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 func_emit_wrapper no > $output chmod +x $output ;; esac } exit $EXIT_SUCCESS ;; esac # See if we need to build an old-fashioned archive. for oldlib in $oldlibs; do if test "$build_libtool_libs" = convenience; then oldobjs="$libobjs_save $symfileobj" addlibs="$convenience" build_libtool_libs=no else if test "$build_libtool_libs" = module; then oldobjs="$libobjs_save" build_libtool_libs=no else oldobjs="$old_deplibs $non_pic_objects" if test "$preload" = yes && test -f "$symfileobj"; then func_append oldobjs " $symfileobj" fi fi addlibs="$old_convenience" fi if test -n "$addlibs"; then gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $addlibs func_append oldobjs " $func_extract_archives_result" fi # Do each command in the archive commands. if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then cmds=$old_archive_from_new_cmds else # Add any objects from preloaded convenience libraries if test -n "$dlprefiles"; then gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $dlprefiles func_append oldobjs " $func_extract_archives_result" fi # POSIX demands no paths to be encoded in archives. We have # to avoid creating archives with duplicate basenames if we # might have to extract them afterwards, e.g., when creating a # static archive out of a convenience library, or when linking # the entirety of a libtool archive into another (currently # not supported by libtool). if (for obj in $oldobjs do func_basename "$obj" $ECHO "$func_basename_result" done | sort | sort -uc >/dev/null 2>&1); then : else echo "copying selected object files to avoid basename conflicts..." gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_mkdir_p "$gentop" save_oldobjs=$oldobjs oldobjs= counter=1 for obj in $save_oldobjs do func_basename "$obj" objbase="$func_basename_result" case " $oldobjs " in " ") oldobjs=$obj ;; *[\ /]"$objbase "*) while :; do # Make sure we don't pick an alternate name that also # overlaps. newobj=lt$counter-$objbase func_arith $counter + 1 counter=$func_arith_result case " $oldobjs " in *[\ /]"$newobj "*) ;; *) if test ! -f "$gentop/$newobj"; then break; fi ;; esac done func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" func_append oldobjs " $gentop/$newobj" ;; *) func_append oldobjs " $obj" ;; esac done fi func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 tool_oldlib=$func_to_tool_file_result eval cmds=\"$old_archive_cmds\" func_len " $cmds" len=$func_len_result if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then cmds=$old_archive_cmds elif test -n "$archiver_list_spec"; then func_verbose "using command file archive linking..." for obj in $oldobjs do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" done > $output_objdir/$libname.libcmd func_to_tool_file "$output_objdir/$libname.libcmd" oldobjs=" $archiver_list_spec$func_to_tool_file_result" cmds=$old_archive_cmds else # the command line is too long to link in one step, link in parts func_verbose "using piecewise archive linking..." save_RANLIB=$RANLIB RANLIB=: objlist= concat_cmds= save_oldobjs=$oldobjs oldobjs= # Is there a better way of finding the last object in the list? for obj in $save_oldobjs do last_oldobj=$obj done eval test_cmds=\"$old_archive_cmds\" func_len " $test_cmds" len0=$func_len_result len=$len0 for obj in $save_oldobjs do func_len " $obj" func_arith $len + $func_len_result len=$func_arith_result func_append objlist " $obj" if test "$len" -lt "$max_cmd_len"; then : else # the above command should be used before it gets too long oldobjs=$objlist if test "$obj" = "$last_oldobj" ; then RANLIB=$save_RANLIB fi test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\" objlist= len=$len0 fi done RANLIB=$save_RANLIB oldobjs=$objlist if test "X$oldobjs" = "X" ; then eval cmds=\"\$concat_cmds\" else eval cmds=\"\$concat_cmds~\$old_archive_cmds\" fi fi fi func_execute_cmds "$cmds" 'exit $?' done test -n "$generated" && \ func_show_eval "${RM}r$generated" # Now create the libtool archive. case $output in *.la) old_library= test "$build_old_libs" = yes && old_library="$libname.$libext" func_verbose "creating $output" # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else func_quote_for_eval "$var_value" relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" fi done # Quote the link command for shipping. relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` if test "$hardcode_automatic" = yes ; then relink_command= fi # Only create the output if not a dry run. $opt_dry_run || { for installed in no yes; do if test "$installed" = yes; then if test -z "$install_libdir"; then break fi output="$output_objdir/$outputname"i # Replace all uninstalled libtool libraries with the installed ones newdependency_libs= for deplib in $dependency_libs; do case $deplib in *.la) func_basename "$deplib" name="$func_basename_result" func_resolve_sysroot "$deplib" eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` test -z "$libdir" && \ func_fatal_error "\`$deplib' is not a valid libtool archive" func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" ;; -L*) func_stripname -L '' "$deplib" func_replace_sysroot "$func_stripname_result" func_append newdependency_libs " -L$func_replace_sysroot_result" ;; -R*) func_stripname -R '' "$deplib" func_replace_sysroot "$func_stripname_result" func_append newdependency_libs " -R$func_replace_sysroot_result" ;; *) func_append newdependency_libs " $deplib" ;; esac done dependency_libs="$newdependency_libs" newdlfiles= for lib in $dlfiles; do case $lib in *.la) func_basename "$lib" name="$func_basename_result" eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` test -z "$libdir" && \ func_fatal_error "\`$lib' is not a valid libtool archive" func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" ;; *) func_append newdlfiles " $lib" ;; esac done dlfiles="$newdlfiles" newdlprefiles= for lib in $dlprefiles; do case $lib in *.la) # Only pass preopened files to the pseudo-archive (for # eventual linking with the app. that links it) if we # didn't already link the preopened objects directly into # the library: func_basename "$lib" name="$func_basename_result" eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` test -z "$libdir" && \ func_fatal_error "\`$lib' is not a valid libtool archive" func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" ;; esac done dlprefiles="$newdlprefiles" else newdlfiles= for lib in $dlfiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; *) abs=`pwd`"/$lib" ;; esac func_append newdlfiles " $abs" done dlfiles="$newdlfiles" newdlprefiles= for lib in $dlprefiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; *) abs=`pwd`"/$lib" ;; esac func_append newdlprefiles " $abs" done dlprefiles="$newdlprefiles" fi $RM $output # place dlname in correct position for cygwin # In fact, it would be nice if we could use this code for all target # systems that can't hard-code library paths into their executables # and that have no shared library path variable independent of PATH, # but it turns out we can't easily determine that from inspecting # libtool variables, so we have to hard-code the OSs to which it # applies here; at the moment, that means platforms that use the PE # object format with DLL files. See the long comment at the top of # tests/bindir.at for full details. tdlname=$dlname case $host,$output,$installed,$module,$dlname in *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) # If a -bindir argument was supplied, place the dll there. if test "x$bindir" != x ; then func_relative_path "$install_libdir" "$bindir" tdlname=$func_relative_path_result$dlname else # Otherwise fall back on heuristic. tdlname=../bin/$dlname fi ;; esac $ECHO > $output "\ # $outputname - a libtool library file # Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION # # Please DO NOT delete this file! # It is necessary for linking the library. # The name that we can dlopen(3). dlname='$tdlname' # Names of this library. library_names='$library_names' # The name of the static archive. old_library='$old_library' # Linker flags that can not go in dependency_libs. inherited_linker_flags='$new_inherited_linker_flags' # Libraries that this one depends upon. dependency_libs='$dependency_libs' # Names of additional weak libraries provided by this library weak_library_names='$weak_libs' # Version information for $libname. current=$current age=$age revision=$revision # Is this an already installed library? installed=$installed # Should we warn about portability when linking against -modules? shouldnotlink=$module # Files to dlopen/dlpreopen dlopen='$dlfiles' dlpreopen='$dlprefiles' # Directory that this library needs to be installed in: libdir='$install_libdir'" if test "$installed" = no && test "$need_relink" = yes; then $ECHO >> $output "\ relink_command=\"$relink_command\"" fi done } # Do a symbolic link so that the libtool archive can be found in # LD_LIBRARY_PATH before the program is installed. func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' ;; esac exit $EXIT_SUCCESS } { test "$opt_mode" = link || test "$opt_mode" = relink; } && func_mode_link ${1+"$@"} # func_mode_uninstall arg... func_mode_uninstall () { $opt_debug RM="$nonopt" files= rmforce= exit_status=0 # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic="$magic" for arg do case $arg in -f) func_append RM " $arg"; rmforce=yes ;; -*) func_append RM " $arg" ;; *) func_append files " $arg" ;; esac done test -z "$RM" && \ func_fatal_help "you must specify an RM program" rmdirs= for file in $files; do func_dirname "$file" "" "." dir="$func_dirname_result" if test "X$dir" = X.; then odir="$objdir" else odir="$dir/$objdir" fi func_basename "$file" name="$func_basename_result" test "$opt_mode" = uninstall && odir="$dir" # Remember odir for removal later, being careful to avoid duplicates if test "$opt_mode" = clean; then case " $rmdirs " in *" $odir "*) ;; *) func_append rmdirs " $odir" ;; esac fi # Don't error if the file doesn't exist and rm -f was used. if { test -L "$file"; } >/dev/null 2>&1 || { test -h "$file"; } >/dev/null 2>&1 || test -f "$file"; then : elif test -d "$file"; then exit_status=1 continue elif test "$rmforce" = yes; then continue fi rmfiles="$file" case $name in *.la) # Possibly a libtool archive, so verify it. if func_lalib_p "$file"; then func_source $dir/$name # Delete the libtool libraries and symlinks. for n in $library_names; do func_append rmfiles " $odir/$n" done test -n "$old_library" && func_append rmfiles " $odir/$old_library" case "$opt_mode" in clean) case " $library_names " in *" $dlname "*) ;; *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; esac test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" ;; uninstall) if test -n "$library_names"; then # Do each command in the postuninstall commands. func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' fi if test -n "$old_library"; then # Do each command in the old_postuninstall commands. func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' fi # FIXME: should reinstall the best remaining shared library. ;; esac fi ;; *.lo) # Possibly a libtool object, so verify it. if func_lalib_p "$file"; then # Read the .lo file func_source $dir/$name # Add PIC object to the list of files to remove. if test -n "$pic_object" && test "$pic_object" != none; then func_append rmfiles " $dir/$pic_object" fi # Add non-PIC object to the list of files to remove. if test -n "$non_pic_object" && test "$non_pic_object" != none; then func_append rmfiles " $dir/$non_pic_object" fi fi ;; *) if test "$opt_mode" = clean ; then noexename=$name case $file in *.exe) func_stripname '' '.exe' "$file" file=$func_stripname_result func_stripname '' '.exe' "$name" noexename=$func_stripname_result # $file with .exe has already been added to rmfiles, # add $file without .exe func_append rmfiles " $file" ;; esac # Do a test to see if this is a libtool program. if func_ltwrapper_p "$file"; then if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" relink_command= func_source $func_ltwrapper_scriptname_result func_append rmfiles " $func_ltwrapper_scriptname_result" else relink_command= func_source $dir/$noexename fi # note $name still contains .exe if it was in $file originally # as does the version of $file that was added into $rmfiles func_append rmfiles " $odir/$name $odir/${name}S.${objext}" if test "$fast_install" = yes && test -n "$relink_command"; then func_append rmfiles " $odir/lt-$name" fi if test "X$noexename" != "X$name" ; then func_append rmfiles " $odir/lt-${noexename}.c" fi fi fi ;; esac func_show_eval "$RM $rmfiles" 'exit_status=1' done # Try to remove the ${objdir}s in the directories where we deleted files for dir in $rmdirs; do if test -d "$dir"; then func_show_eval "rmdir $dir >/dev/null 2>&1" fi done exit $exit_status } { test "$opt_mode" = uninstall || test "$opt_mode" = clean; } && func_mode_uninstall ${1+"$@"} test -z "$opt_mode" && { help="$generic_help" func_fatal_help "you must specify a MODE" } test -z "$exec_cmd" && \ func_fatal_help "invalid operation mode \`$opt_mode'" if test -n "$exec_cmd"; then eval exec "$exec_cmd" exit $EXIT_FAILURE fi exit $exit_status # The TAGs below are defined such that we never get into a situation # in which we disable both kinds of libraries. Given conflicting # choices, we go for a static library, that is the most portable, # since we can't tell whether shared libraries were disabled because # the user asked for that or because the platform doesn't support # them. This is particularly important on AIX, because we don't # support having both static and shared libraries enabled at the same # time on that platform, so we default to a shared-only configuration. # If a disable-shared tag is given, we'll fallback to a static-only # configuration. But we'll never go from static-only to shared-only. # ### BEGIN LIBTOOL TAG CONFIG: disable-shared build_libtool_libs=no build_old_libs=yes # ### END LIBTOOL TAG CONFIG: disable-shared # ### BEGIN LIBTOOL TAG CONFIG: disable-static build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` # ### END LIBTOOL TAG CONFIG: disable-static # Local Variables: # mode:shell-script # sh-indentation:2 # End: # vi:sw=2 PHYLIPNEW-3.69.650/install-sh0000755000175000017500000003325512171071677012311 00000000000000#!/bin/sh # install - install a program, script, or datafile scriptversion=2011-11-20.07; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # 'make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. nl=' ' IFS=" "" $nl" # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit=${DOITPROG-} if test -z "$doit"; then doit_exec=exec else doit_exec=$doit fi # Put in absolute file names if you don't have them in your path; # or use environment vars. chgrpprog=${CHGRPPROG-chgrp} chmodprog=${CHMODPROG-chmod} chownprog=${CHOWNPROG-chown} cmpprog=${CMPPROG-cmp} cpprog=${CPPROG-cp} mkdirprog=${MKDIRPROG-mkdir} mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} posix_glob='?' initialize_posix_glob=' test "$posix_glob" != "?" || { if (set -f) 2>/dev/null; then posix_glob= else posix_glob=: fi } ' posix_mkdir= # Desired mode of installed file. mode=0755 chgrpcmd= chmodcmd=$chmodprog chowncmd= mvcmd=$mvprog rmcmd="$rmprog -f" stripcmd= src= dst= dir_arg= dst_arg= copy_on_change=false no_target_directory= usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: --help display this help and exit. --version display version info and exit. -c (ignored) -C install only if different (preserve the last data modification time) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -s $stripprog installed files. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG " while test $# -ne 0; do case $1 in -c) ;; -C) copy_on_change=true;; -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 case $mode in *' '* | *' '* | *' '* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac shift;; -o) chowncmd="$chownprog $2" shift;; -s) stripcmd=$stripprog;; -t) dst_arg=$2 # Protect names problematic for 'test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac shift;; -T) no_target_directory=true;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac shift done if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dst_arg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dst_arg" shift # fnord fi shift # arg dst_arg=$arg # Protect names problematic for 'test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call 'install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 trap "ret=141; $do_exit" 13 trap "ret=143; $do_exit" 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names problematic for 'test' and other utilities. case $src in -* | [=\(\)!]) src=./$src;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dst_arg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dst_arg # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then if test -n "$no_target_directory"; then echo "$0: $dst_arg: Is a directory" >&2 exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else # Prefer dirname, but fall back on a substitute if dirname fails. dstdir=` (dirname "$dst") 2>/dev/null || expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$dst" : 'X\(//\)[^/]' \| \ X"$dst" : 'X\(//\)$' \| \ X"$dst" : 'X\(/\)' \| . 2>/dev/null || echo X"$dst" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q' ` test -d "$dstdir" dstdir_status=$? fi fi obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # Create intermediate dirs using mode 755 as modified by the umask. # This is like FreeBSD 'install' as of 1997-10-28. umask=`umask` case $stripcmd.$umask in # Optimize common cases. *[2367][2367]) mkdir_umask=$umask;; .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; *[0-7]) mkdir_umask=`expr $umask + 22 \ - $umask % 100 % 40 + $umask % 20 \ - $umask % 10 % 4 + $umask % 2 `;; *) mkdir_umask=$umask,go-w;; esac # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false case $umask in *[123567][0-7][0-7]) # POSIX mkdir -p sets u+wx bits regardless of umask, which # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 if (umask $mkdir_umask && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # The umask is ridiculous, or mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix='/';; [-=\(\)!]*) prefix='./';; *) prefix='';; esac eval "$initialize_posix_glob" oIFS=$IFS IFS=/ $posix_glob set -f set fnord $dstdir shift $posix_glob set +f IFS=$oIFS prefixes= for d do test X"$d" = X && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask=$mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=$dstdir/_inst.$$_ rmtmp=$dstdir/_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && eval "$initialize_posix_glob" && $posix_glob set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && $posix_glob set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then rm -f "$dsttmp" else # Rename the file to the real destination. $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. { # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { test ! -f "$dst" || $doit $rmcmd -f "$dst" 2>/dev/null || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } } || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: PHYLIPNEW-3.69.650/INSTALL0000644000175000017500000003660512171071677011340 00000000000000Installation Instructions ************************* Copyright (C) 1994-1996, 1999-2002, 2004-2012 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. HP-UX `make' updates targets which have the same time stamps as their prerequisites, which makes it generally unusable when shipped generated files such as `configure' are involved. Use GNU `make' instead. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf limitation. Until the limitation is lifted, you can use this workaround: CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. PHYLIPNEW-3.69.650/emboss_acd/0002775000175000017500000000000012171071712012446 500000000000000PHYLIPNEW-3.69.650/emboss_acd/ftreedistpair.acd0000664000175000017500000000435211727433154015716 00000000000000application: ftreedistpair [ documentation: "Calculate distance between two sets of trees" groups: "Phylogeny:Consensus" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0557 Phylogenetic tree distances calculation" ] section: input [ information: "Input section" type: "page" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] tree: bintreefile [ parameter: "Y" knowntype: "newick" information: "Second phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: dtype [ additional: "Y" information: "Distance type" values: "s:Symmetric difference; b:Branch score distance" default: "b" relations: "EDAM_data:2527 Parameter" ] list: pairing [ additional: "Y" information: "Tree pairing method" values: "c:Distances between corresponding pairs each tree file; l:Distances between all possible pairs in each tree file" default: "l" relations: "EDAM_data:2527 Parameter" ] list: style [ additional: "Y" information: "Distances output option" values: "f:Full_matrix; v:Verbose, one pair per line; s:Sparse, one pair per line" default: "v" relations: "EDAM_data:2527 Parameter" ] boolean: noroot [ additional: "Y" default: "N" information: "Trees to be treated as rooted" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "treedist output" information: "Phylip treedist program output file" relations: "EDAM_data:1442 Phylogenetic tree report (tree distances)" ] boolean: progress [ additional: "Y" default: "N" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fpars.acd0000664000175000017500000001055011727433154014161 00000000000000application: fpars [ documentation: "Discrete character parsimony" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" help: "File containing one or more data sets" relations: "EDAM_data:1427 Phylogenetic discrete data" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Weights file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "y" minimum: "1" maximum: "1" header: "Method" values: "w:Wagner; c:Camin-Sokal" information: "Choose the parsimony method to use" default: "Wagner" relations: "EDAM_data:2527 Parameter" ] integer: maxtrees [ additional: "Y" information: "Number of trees to save" default: "100" minimum: "1" maximum: "1000000" relations: "EDAM_data:2527 Parameter" ] toggle: thorough [ additional: "@(!$(intreefile.isdefined))" information: "More thorough search" default: "Y" relations: "EDAM_data:2527 Parameter" ] boolean: rearrange [ additional: "$(thorough)" default: "Y" information: "Rearrange on just one best tree" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(infile.discretesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "1" information: "Threshold value" default: "1" relations: "EDAM_data:2146 Threshold" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "pars output" information: "Phylip pars program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print steps at each site" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print states at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "@($(ancseq) | $(printdata))" default: "Y" information: "Use dot differencing to display results" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/ffactor.acd0000664000175000017500000000355311727433154014477 00000000000000application: ffactor [ documentation: "Multistate to binary recoding program" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0550 Sequence alignment analysis (phylogenetic modelling)" ] section: input [ information: "Input section" type: "page" ] infile: infile [ parameter: "Y" information: "Phylip factor program input file" knowntype: "factor input" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] boolean: anc [ additional: "Y" default: "N" information: "Put ancestral states in output file" relations: "EDAM_data:2527 Parameter" ] boolean: factors [ additional: "Y" default: "N" information: "Put factors information in output file" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "factor output" information: "Phylip factor program output file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] outfile: outfactorfile [ extension: "factor" information: "Phylip factor data output file (optional)" nullok: "Y" knowntype: "phylip factor" relations: "EDAM_data:1427 Phylogenetic discrete data" ] outfile: outancfile [ extension: "ancestor" information: "Phylip ancestor data output file (optional)" nullok: "Y" knowntype: "phylip ancestor" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fpromlk.acd0000664000175000017500000001567711727433154014537 00000000000000application: fpromlk [ documentation: "Protein phylogeny by maximum likelihood" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapproteinphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1384 Sequence alignment (protein)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] integer: ncategories [ additional: "Y" default: "1" minimum: "1" maximum: "9" information: "Number of substitution rate categories" relations: "EDAM_data:2527 Parameter" ] array: rate [ additional: "@($(ncategories) > 1)" information: "Rate for each category" size: "$(ncategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] properties: categories [ additional: "@($(ncategories) > 1)" characters: "1-$(ncategories)" information: "File of substitution rate categories" nullok: "@($(ncategories) == 1)" size: "1" length: "$(sequence.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] list: model [ additional: "Y" minimum: "1" maximum: "1" header: "Probability model" values: "j:Jones-Taylor-Thornton; h:Henikoff/Tillier PMBs; d:Dayhoff PAM" information: "Probability model for amino acid change" default: "Jones-Taylor-Thornton" relations: "EDAM_data:2527 Parameter" ] list: gammatype [ additional: "Y" minimum: "1" maximum: "1" header: "Rate variation among sites" values: "g:Gamma distributed rates; i:Gamma+invariant sites; h:User defined HMM of rates; n:Constant rate" information: "Rate variation among sites" default: "n" relations: "EDAM_data:2527 Parameter" ] float: gammacoefficient [ additional: "@($(gammatype)==g)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ngammacat [ additional: "@($(gammatype)==g)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9)" relations: "EDAM_data:2527 Parameter" ] float: invarcoefficient [ additional: "@($(gammatype)==i)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ninvarcat [ additional: "@($(gammatype)==i)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9) including one for invariant sites" relations: "EDAM_data:2527 Parameter" ] float: invarfrac [ additional: "@($(gammatype)==i)" information: "Fraction of invariant sites" default: "0.0" minimum: "0.0" maximum: "0.9999" relations: "EDAM_data:2527 Parameter" ] integer: nhmmcategories [ additional: "@($(gammatype)==h)" default: "1" minimum: "1" maximum: "9" information: "Number of HMM rate categories" relations: "EDAM_data:2527 Parameter" ] array: hmmrates [ additional: "@($(nhmmcategories) > 1)" information: "HMM category rates" default: "1.0" minimum: "0.0" size: "$(nhmmcategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] array: hmmprobabilities [ additional: "@($(nhmmcategories) > 1)" information: "Probability for each HMM category" default: "1.0" minimum: "0.0" maximum: "1.0" size: "$(nhmmcategories)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] boolean: adjsite [ additional: "@($(gammatype)!=n)" default: "N" information: "Rates at adjacent sites correlated" relations: "EDAM_data:2527 Parameter" ] float: lambda [ additional: "$(adjsite)" default: "1.0" information: "Mean block length of sites having the same rate" minimum: "1.0" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "promlk output" information: "Phylip promlk program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: hypstate [ additional: "Y" default: "N" information: "Reconstruct hypothetical sequence" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnapenny.acd0000664000175000017500000000636711727433154015043 00000000000000application: fdnapenny [ documentation: "Penny algorithm for DNA" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1383 Sequence alignment (nucleic acid)" ] properties: weights [ additional: "Y" characters: "01" length: "$(sequence.length)" size: "1" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: howoften [ additional: "Y" information: "How often to report, in trees" default: "100" relations: "EDAM_data:2527 Parameter" ] integer: howmany [ additional: "Y" information: "How many groups of trees" default: "1000" relations: "EDAM_data:2527 Parameter" ] boolean: simple [ additional: "Y" information: "Branch and bound is simple" default: "Y" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "1.0" default: "1.0" information: "Threshold value" relations: "EDAM_data:2146 Threshold" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dnapenny output" information: "Phylip dnapenny program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print out steps in each site" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print sequences at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnaml.acd0000664000175000017500000001674211727433154014320 00000000000000application: fdnaml [ documentation: "Estimate nucleotide phylogeny by maximum likelihood" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1383 Sequence alignment (nucleic acid)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] integer: ncategories [ additional: "Y" default: "1" minimum: "1" maximum: "9" information: "Number of substitution rate categories" relations: "EDAM_data:2527 Parameter" ] array: rate [ additional: "@($(ncategories) > 1)" information: "Rate for each category" size: "$(ncategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] properties: categories [ additional: "@($(ncategories) > 1)" characters: "1-$(ncategories)" information: "File of substitution rate categories" nullok: "@($(ncategories) == 1)" size: "1" length: "$(sequence.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] float: ttratio [ additional: "Y" default: "2.0" minimum: "0.001" information: "Transition/transversion ratio" relations: "EDAM_data:2527 Parameter" ] toggle: freqsfrom [ additional: "Y" default: "Y" information: "Use empirical base frequencies from seqeunce input" relations: "EDAM_data:2527 Parameter" ] array: basefreq [ additional: "@(!$(freqsfrom))" size: "4" minimum: "0.0" maximum: "1.0" default: "0.25 0.25 0.25 0.25" information: "Base frequencies for A C G T/U (use blanks to separate)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] list: gammatype [ additional: "Y" minimum: "1" maximum: "1" header: "Rate variation among sites" values: "g:Gamma distributed rates; i:Gamma+invariant sites; h:User defined HMM of rates; n:Constant rate" information: "Rate variation among sites" default: "Constant rate" relations: "EDAM_data:2527 Parameter" ] float: gammacoefficient [ additional: "@($(gammatype)==g)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ngammacat [ additional: "@($(gammatype)==g)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9)" relations: "EDAM_data:2527 Parameter" ] float: invarcoefficient [ additional: "@($(gammatype)==i)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ninvarcat [ additional: "@($(gammatype)==i)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9) including one for invariant sites" relations: "EDAM_data:2527 Parameter" ] float: invarfrac [ additional: "@($(gammatype)==i)" information: "Fraction of invariant sites" default: "0.0" minimum: "0.0" maximum: "0.9999" relations: "EDAM_data:2527 Parameter" ] integer: nhmmcategories [ additional: "@($(gammatype)==h)" default: "1" minimum: "1" maximum: "9" information: "Number of HMM rate categories" relations: "EDAM_data:2527 Parameter" ] array: hmmrates [ additional: "@($(nhmmcategories) > 1)" information: "HMM category rates" default: "1.0" minimum: "0.0" size: "$(nhmmcategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] array: hmmprobabilities [ additional: "@($(nhmmcategories) > 1)" information: "Probability for each HMM category" default: "1.0" minimum: "0.0" maximum: "1.0" size: "$(nhmmcategories)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] boolean: adjsite [ additional: "@($(gammatype)!=n)" default: "N" information: "Rates at adjacent sites correlated" relations: "EDAM_data:2527 Parameter" ] float: lambda [ additional: "$(adjsite)" default: "1.0" information: "Mean block length of sites having the same rate" minimum: "1.0" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" maximum: "32767" minimum: "1" default: "1" relations: "EDAM_data:2527 Parameter" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] boolean: rough [ additional: "Y" default: "Y" information: "Speedier but rougher analysis" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dnaml output" information: "Phylip dnaml program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: hypstate [ additional: "Y" default: "N" information: "Reconstruct hypothetical sequence" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/ffreqboot.acd0000664000175000017500000000625411727433154015043 00000000000000application: ffreqboot [ documentation: "Bootstrapped genetic frequencies algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0552 Phylogenetic tree bootstrapping" ] section: input [ information: "Input section" type: "page" ] frequencies: infile [ parameter: "Y" relations: "EDAM_data:1426 Phylogenetic continuous quantitative data" ] properties: weights [ additional: "Y" characters: "01" information: "Phylip weights file (optional)" help: "Weights file" length: "$(infile.length)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: test [ additional: "y" minimum: "1" maximum: "1" header: "Test" values: "b:Bootstrap; j:Jackknife; c:Permute species for each character; o:Permute character order; s:Permute within species; r:Rewrite data" information: "Choose test" default: "b" relations: "EDAM_data:2527 Parameter" ] toggle: regular [ additional: "@( $(test) == { b | j } )" information: "Altered sampling fraction" default: "N" relations: "EDAM_data:2527 Parameter" ] float: fracsample [ additional: "@(!$(regular))" information: "Samples as percentage of sites" default: "100.0" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2527 Parameter" ] integer: blocksize [ information: "Block size for bootstraping" additional: "@($(test) == b)" default: "1" minimum: "1" relations: "EDAM_data:2527 Parameter" ] integer: reps [ additional: "@($(test) != r)" information: "How many replicates" minimum: "1" default: "100" relations: "EDAM_data:2527 Parameter" ] list: justweights [ additional: "@( $(test) == { b | j } )" minimum: "1" maximum: "1" header: "Write out datasets or just weights" values: "d:Datasets; w:Weights" information: "Write out datasets or just weights" default: "d" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "@($(test) != r)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "seqbootfreq output" information: "Phylip seqboot_freq program output file" relations: "EDAM_data:2245 Sequence set (bootstrapped)" ] boolean: printdata [ additional: "Y" default: "N" information: "Print out the data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "$(printdata)" default: "Y" information: "Use dot-differencing" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fkitsch.acd0000664000175000017500000000641511727433154014506 00000000000000application: fkitsch [ documentation: "Fitch-Margoliash method with contemporary tips" groups: "Phylogeny:Distance matrix" gui: "yes" batch: "yes" cpu: "high" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0546 Phylogenetic tree construction (minimum distance methods)" ] section: input [ information: "Input section" type: "page" ] distances: datafile [ parameter: "Y" help: "File containing one or more distance matrices" knowntype: "distance matrix" information: "Phylip distance matrix file" relations: "EDAM_data:0870 Sequence distance matrix" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: matrixtype [ additional: "Y" minimum: "1" maximum: "1" header: "Matrix type" values: "s:Square; u:Upper triangular; l:Lower triangular" information: "Type of data matrix" default: "s" relations: "EDAM_data:2527 Parameter" ] boolean: minev [ additional: "Y" information: "Minimum evolution" default: "N" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] float: power [ additional: "Y" default: "2.0" information: "Power" relations: "EDAM_data:2527 Parameter" ] boolean: negallowed [ additional: "Y" default: "N" information: "Negative branch lengths allowed" relations: "EDAM_data:2527 Parameter" ] boolean: replicates [ additional: "Y" default: "N" information: "Subreplicates" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "kitsch output" information: "Phylip kitsch program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnamove.acd0000664000175000017500000000505711727433154014653 00000000000000application: fdnamove [ documentation: "Interactive DNA parsimony" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" relations: "EDAM_data:1383 Sequence alignment (nucleic acid)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "01" information: "Phylip weights file (optional)" length: "$(sequence.length)" size: "" help: "Weights file - ignore sites with weight zero" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "1" information: "Threshold value" default: "1" relations: "EDAM_data:2146 Threshold" ] list: initialtree [ additional: "Y" minimum: "1" maximum: "1" header: "Initial tree" values: "a:Arbitary; u:User; s:Specify" information: "Initial tree" default: "Arbitary" relations: "EDAM_data:2527 Parameter" ] integer: screenwidth [ additional: "Y" default: "80" information: "Width of terminal screen in characters" relations: "EDAM_data:2152 Rendering parameter" ] integer: screenlines [ additional: "Y" default: "24" information: "Number of lines on screen" relations: "EDAM_data:2152 Rendering parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outtreefile [ additional: "Y" extension: "treefile" knowntype: "newick tree" information: "Phylip tree output file (optional)" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fprotdist.acd0000664000175000017500000001175011727433154015067 00000000000000application: fprotdist [ documentation: "Protein distance algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0289 Sequence distance matrix generation" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapproteinphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1384 Sequence alignment (protein)" ] integer: ncategories [ additional: "Y" default: "1" minimum: "1" maximum: "9" information: "Number of substitution rate categories" relations: "EDAM_data:2527 Parameter" ] array: rate [ additional: "@($(ncategories) > 1)" information: "Rate for each category" size: "$(ncategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] properties: categories [ additional: "@($(ncategories) > 1)" characters: "1-$(ncategories)" information: "File of substitution rate categories" nullok: "@($(ncategories) == 1)" size: "1" length: "$(sequence.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "y" minimum: "1" maximum: "1" header: "Method" values: "j:Jones-Taylor-Thornton matrix; h:Henikoff/Tiller PMB matrix; d:Dayhoff PAM matrix; k:Kimura formula; s:Similarity table; c:Categories model" information: "Choose the method to use" default: "j" relations: "EDAM_data:2527 Parameter" ] list: gammatype [ additional: "@($(method) == { j | h | d | c })" minimum: "1" maximum: "1" header: "Rate variation among sites" values: "g:Gamma distributed rates; i:Gamma+invariant sites; c:Constant rate" information: "Rate variation among sites" default: "c" relations: "EDAM_data:2527 Parameter" ] float: gammacoefficient [ additional: "@($(gammatype)==g)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] float: invarcoefficient [ additional: "@($(gammatype)==i)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] list: aacateg [ additional: "@($(method) == c)" minimum: "1" maximum: "1" header: "Which categorizations of amino acids to use - all have groups: (Glu Gln Asp Asn), (Lys Arg His), (Phe Tyr Trp)plus:" values: "G:George/Hunt/Barker (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr Pro); C:Chemical (Cys Met), (Val Leu Ileu Gly Ala Ser Thr), (Pro); H:Hall (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr),(Pro)" information: "Choose the category to use" default: "G" relations: "EDAM_data:2527 Parameter" ] list: whichcode [ additional: "@($(method) == c)" minimum: "1" maximum: "1" header: "Which genetic code" values: "u:Universal; c:Ciliate; m:Universal mitochondrial; v:Vertebrate mitochondrial; f:Fly mitochondrial; y:Yeast mitochondrial" information: "Which genetic code" default: "u" relations: "EDAM_data:2527 Parameter" ] float: ease [ additional: "@($(method) == c)" minimum: "0.0" maximum: "1.0" default: "0.457" information: "Prob change category (1.0=easy)" relations: "EDAM_data:2527 Parameter" ] float: ttratio [ additional: "@($(method) == c)" minimum: "0.0" default: "2.0" information: "Transition/transversion ratio" relations: "EDAM_data:2527 Parameter" ] array: basefreq [ additional: "@($(method) == c)" size: "4" minimum: "0.0" maximum: "1.0" default: "0.25 0.25 0.25 0.25" information: "Base frequencies for A C G T/U (use blanks to separate)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "distance matrix" information: "Phylip distance matrix output file" relations: "EDAM_data:0870 Sequence distance matrix" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fpenny.acd0000664000175000017500000001000311727433154014336 00000000000000application: fpenny [ documentation: "Penny algorithm, branch-and-bound" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" help: "File containing one or more data sets" knowntype: "discrete characters" information: "Phylip character discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Phylip weights file (optional)" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: ancfile [ additional: "Y" characters: "01?" information: "Phylip ancestral states file (optional)" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: mixfile [ additional: "Y" characters: "CSW" information: "Phylip mix output file (optional)" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "y" minimum: "1" maximum: "1" header: "Method" values: "Wag:Wagner; Cam:Camin-Sokal; Mix:Mixed;" information: "Choose the method to use" default: "Wagner" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(infile.discretesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] integer: howmany [ additional: "Y" information: "How many groups of trees" default: "1000" relations: "EDAM_data:2527 Parameter" ] integer: howoften [ additional: "Y" information: "How often to report, in trees" default: "100" relations: "EDAM_data:2527 Parameter" ] boolean: simple [ additional: "Y" information: "Branch and bound is simple" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "Y" minimum: "1.0" default: "$(infile.discretesize)" information: "Threshold value" relations: "EDAM_data:2146 Threshold" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "penny output" information: "Phylip penny program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print out steps in each site" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print states at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fcontrast.acd0000664000175000017500000000420111727433154015045 00000000000000application: fcontrast [ documentation: "Continuous character contrasts" groups: "Phylogeny:Continuous characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0550 Sequence alignment analysis (phylogenetic modelling)" ] section: input [ information: "Input section" type: "page" ] frequencies: infile [ parameter: "Y" help: "File containing one or more sets of data" relations: "EDAM_data:1426 Phylogenetic continuous quantitative data" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file (optional)" nullok: "N" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] boolean: varywithin [ additional: "Y" default: "N" information: "Within-population variation in data" relations: "EDAM_data:2527 Parameter" ] boolean: reg [ additional: "@(!$(varywithin))" default: "Y" information: "Print out correlations and regressions" relations: "EDAM_data:2527 Parameter" ] boolean: writecont [ additional: "@(!$(varywithin))" default: "N" information: "Print out contrasts" relations: "EDAM_data:2527 Parameter" ] boolean: nophylo [ additional: "$(varywithin)" default: "Y" information: "LRT test of no phylogenetic component, with and without VarA" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "contrast output" information: "Phylip contrast program output file" relations: "EDAM_data:1444 Phylogenetic character contrasts" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fneighbor.acd0000664000175000017500000000614011727433154015011 00000000000000application: fneighbor [ documentation: "Phylogenies from distance matrix by N-J or UPGMA method" groups: "Phylogeny:Distance matrix" gui: "yes" batch: "yes" cpu: "high" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0546 Phylogenetic tree construction (minimum distance methods)" ] section: input [ information: "Input section" type: "page" ] distances: datafile [ parameter: "Y" knowntype: "distance matrix" information: "Phylip distance matrix file" relations: "EDAM_data:0870 Sequence distance matrix" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: matrixtype [ additional: "Y" minimum: "1" maximum: "1" header: "Matrix type" values: "s:Square; u:Upper triangular; l:Lower triangular" information: "Type of data matrix" default: "s" relations: "EDAM_data:2527 Parameter" ] list: treetype [ additional: "Y" minimum: "1" maximum: "1" header: "Tree type" values: "n:Neighbor-joining; u:UPGMA" information: "Neighbor-joining or UPGMA tree" default: "n" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "@($(treetype)==n)" minimum: "0" maximum: "$(datafile.distancesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] toggle: jumble [ additional: "Y" default: "N" information: "Randomise input order of species" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(jumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] boolean: replicates [ additional: "Y" default: "N" information: "Subreplicates" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "neighbor output" information: "Phylip neighbor program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdrawtree.acd0000664000175000017500000001530011727433154015027 00000000000000application: fdrawtree [ documentation: "Plots an unrooted tree diagram" groups: "Phylogeny:Tree drawing" batch: "no" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_topic:0092 Data visualisation" relations: "EDAM_operation:0567 Phylogenetic tree rendering" ] section: input [ information: "Input section" type: "page" ] string: fontfile [ default: "font1" information: "Fontfile name" knowntype: "phylip font" relations: "EDAM_identifier:1050 File name" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: plotfile [ parameter: "Y" knowntype: "drawtree output" information: "Phylip drawtree output file" relations: "EDAM_data:0872 Phylogenetic tree" ] list: plotter [ additional: "Y" minimum: "1" maximum: "1" header: "Plotter or printer" values: "l:Postscript printer file format; m:PICT format (for drawing programs); j:HP Laserjet 75 dpi PCL file format; s:HP Laserjet 150 dpi PCL file format; y:HP Laserjet 300 dpi PCL file format; w:MS-Windows Bitmap; f:FIG 2.0 drawing program format; a:Idraw drawing program format; z:VRML Virtual Reality Markup Language file; n:PCX 640x350 file format (for drawing programs); p:PCX 800x600 file format (for drawing programs); q:PCX 1024x768 file format (for drawing programs); k:TeKtronix 4010 graphics terminal; x:X Bitmap format; v:POVRAY 3D rendering program file; r:Rayshade 3D rendering program file; h:Hewlett-Packard pen plotter (HPGL file format); d:DEC ReGIS graphics (VT240 terminal); e:Epson MX-80 dot-matrix printer; c:Prowriter/Imagewriter dot-matrix printer; t:Toshiba 24-pin dot-matrix printer; o:Okidata dot-matrix printer; b:Houston Instruments plotter; u:other (one you have inserted code for)" information: "Plotter or printer the tree will be drawn on" default: "l" relations: "EDAM_data:2152 Rendering parameter" ] list: previewer [ additional: "Y" minimum: "1" maximum: "1" header: "Previewing device" values: "n:Will not be previewed; I i:MSDOS graphics screen m:Macintosh screens; x:X Windows display; w:MS Windows display; k:TeKtronix 4010 graphics terminal; d:DEC ReGIS graphics (VT240 terminal); o:Other (one you have inserted code for)" information: "Previewing device" default: "x" relations: "EDAM_data:2152 Rendering parameter" ] list: iterate [ additional: "Y" minimum: "1" maximum: "1" header: "Iterate to improve tree" values: "n:No; e:Equal-Daylight algorithm; b:n-Body algorithm" information: "Iterate to improve tree" default: "e" relations: "EDAM_data:2527 Parameter" ] boolean: lengths [ additional: "Y" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] list: labeldirection [ additional: "Y" minimum: "1" maximum: "1" header: "Lable direction" values: "a:along; f:fixed; r:radial; m:middle" information: "Label direction" default: "m" relations: "EDAM_data:2152 Rendering parameter" ] float: treeangle [ information: "Angle the tree is to be plotted" default: "90.0" minimum: "-360.0" maximum: "360.0" additional: "Y" relations: "EDAM_data:2152 Rendering parameter" ] float: arc [ information: "Degrees the arc should occupy" default: "360" minimum: "0.0" maximum: "360.0" additional: "Y" relations: "EDAM_data:2152 Rendering parameter" ] float: labelrotation [ additional: "@($(style)!=c)" information: "Angle of labels (0 degrees is horizontal for a tree growing vertically)" default: "90.0" minimum: "0.0" maximum: "360.0" relations: "EDAM_data:2152 Rendering parameter" ] toggle: rescaled [ additional: "Y" default: "Y" information: "Automatically rescale branch lengths" relations: "EDAM_data:2527 Parameter" ] float: bscale [ additional: "@(!$(rescaled))" default: "1.0" information: "Centimeters per unit branch length" relations: "EDAM_data:2152 Rendering parameter" ] float: treedepth [ additional: "Y" default: "0.53" information: "Depth of tree as fraction of its breadth" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2152 Rendering parameter" ] float: xmargin [ additional: "@($(plotter)!=r)" default: "1.65" minimum: "0.1" information: "Horizontal margin (cm)" relations: "EDAM_data:2152 Rendering parameter" ] float: ymargin [ additional: "@($(plotter)!=r)" default: "2.16" minimum: "0.1" information: "Vertical margin (cm)" relations: "EDAM_data:2152 Rendering parameter" ] float: xrayshade [ additional: "@($(plotter)==r)" default: "1.65" minimum: "0.1" information: "Horizontal margin (pixels)" relations: "EDAM_data:2152 Rendering parameter" ] float: yrayshade [ additional: "@($(plotter)==r)" default: "2.16" minimum: "0.1" information: "Vertical margin (pixels)" relations: "EDAM_data:2152 Rendering parameter" ] float: paperx [ additional: "Y" default: "20.63750" information: "Paper width" relations: "EDAM_data:2152 Rendering parameter" ] float: papery [ additional: "Y" default: "26.98750" information: "Paper height" minimum: "0.1" relations: "EDAM_data:2152 Rendering parameter" ] float: pagesheight [ additional: "Y" default: "1" information: "Number of trees across height of page" minimum: "1" relations: "EDAM_data:2152 Rendering parameter" ] float: pageswidth [ additional: "Y" default: "1" information: "Number of trees across width of page" minimum: "1" relations: "EDAM_data:2152 Rendering parameter" ] float: hpmargin [ additional: "Y" default: "0.41275" information: "Horizontal overlap (cm)" minimum: "0.001" relations: "EDAM_data:2152 Rendering parameter" ] float: vpmargin [ additional: "Y" default: "0.53975" information: "Vertical overlap (cm)" minimum: "0.001" relations: "EDAM_data:2152 Rendering parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/frestboot.acd0000664000175000017500000000726211727433154015063 00000000000000application: frestboot [ documentation: "Bootstrapped restriction sites algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0552 Phylogenetic tree bootstrapping" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ characters: "01+-?" parameter: "Y" help: "File containing one or more sets of restriction data" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(infile.discretelength)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: test [ additional: "y" minimum: "1" maximum: "1" header: "Test" values: "b:Bootstrap; j:Jackknife; c:Permute species for each character; o:Permute character order; s:Permute within species; r:Rewrite data" information: "Choose test" default: "b" relations: "EDAM_data:2527 Parameter" ] toggle: regular [ additional: "@( $(test) == { b | j } )" information: "Altered sampling fraction" default: "N" relations: "EDAM_data:2527 Parameter" ] float: fracsample [ additional: "@(!$(regular))" information: "Samples as percentage of sites" default: "100.0" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2527 Parameter" ] list: rewriteformat [ additional: "@($(test)==r)" minimum: "1" maximum: "1" header: "test" values: "p:PHYLIP; n:NEXUS; x:XML" information: "Output format" default: "p" relations: "EDAM_identifier:2129 File format name" ] integer: blocksize [ information: "Block size for bootstraping" additional: "@($(test) == b)" default: "1" minimum: "1" relations: "EDAM_data:1249 Sequence length" ] integer: reps [ additional: "@($(test) != r)" information: "How many replicates" minimum: "1" default: "100" relations: "EDAM_data:2527 Parameter" ] list: justweights [ additional: "@( $(test) == { b | j } )" minimum: "1" maximum: "1" header: "Write out datasets or just weights" values: "d:Datasets; w:Weights" information: "Write out datasets or just weights" default: "d" relations: "EDAM_data:2527 Parameter" ] boolean: enzymes [ additional: "Y" information: "Is the number of enzymes present in input file" default: "N" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "@($(test) != r)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "seqbootrest output" information: "Phylip seqboot_rest program output file" relations: "EDAM_data:2245 Sequence set (bootstrapped)" ] boolean: printdata [ additional: "Y" default: "N" information: "Print out the data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "$(printdata)" default: "Y" information: "Use dot-differencing" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdollop.acd0000664000175000017500000000721311727433154014507 00000000000000application: fdollop [ documentation: "Dollo and polymorphism parsimony algorithm" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" help: "File containing one or more data sets" knowntype: "discrete characters" information: "Phylip character discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Phylip weights file (optional)" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: ancfile [ additional: "Y" characters: "01?" information: "Ancestral states file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "Y" minimum: "1" maximum: "1" header: "Method" values: "d:Dollo; p:Polymorphism" information: "Parsimony method" default: "d" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" maximum: "32767" minimum: "1" default: "1" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "Y" minimum: "0" information: "Threshold value" default: "$(infile.discretesize)" relations: "EDAM_data:2146 Threshold" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dollop output" information: "Phylip dollop program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print states at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print out steps in each character" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdiscboot.acd0000664000175000017500000001152611727433154015026 00000000000000application: fdiscboot [ documentation: "Bootstrapped discrete sites algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0552 Phylogenetic tree bootstrapping" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: mixfile [ additional: "Y" characters: "" information: "File of mixtures" nullok: "Y" size: "1" length: "$(infile.discretelength)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: ancfile [ additional: "Y" characters: "" information: "File of ancestors" nullok: "Y" size: "1" length: "$(infile.discretelength)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(infile.discretelength)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: factorfile [ additional: "Y" information: "Factors file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: test [ additional: "y" minimum: "1" maximum: "1" header: "Test" values: "b:Bootstrap; j:Jackknife; c:Permute species for each character; o:Permute character order; s:Permute within species; r:Rewrite data" information: "Choose test" default: "b" relations: "EDAM_data:2527 Parameter" ] toggle: regular [ additional: "@( $(test) == { b | j } )" information: "Altered sampling fraction" default: "N" relations: "EDAM_data:2527 Parameter" ] float: fracsample [ additional: "@(!$(regular))" information: "Samples as percentage of sites" default: "100.0" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2527 Parameter" ] list: morphseqtype [ additional: "@($(test) == r )" minimum: "1" maximum: "1" header: "Output format" values: "p:PHYLIP; n:NEXUS" information: "Output format" default: "p" relations: "EDAM_identifier:2129 File format name" ] integer: blocksize [ information: "Block size for bootstraping" additional: "@($(test) == b)" default: "1" minimum: "1" relations: "EDAM_data:2527 Parameter" ] integer: reps [ additional: "@($(test) != r)" information: "How many replicates" minimum: "1" default: "100" relations: "EDAM_data:2527 Parameter" ] list: justweights [ additional: "@( $(test) == { b | j } )" minimum: "1" maximum: "1" header: "Write out datasets or just weights" values: "d:Datasets; w:Weights" information: "Write out datasets or just weights" default: "d" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "@($(test) != r)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "seqbootdisc output" information: "Phylip seqboot_disc program output file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] outfile: outancfile [ parameter: "Y" knowntype: "phylip ancestor" extension: "ancfile" information: "Phylip ancestor data output file (optional)" nullok: "@(!$(ancfile.isdefined))" relations: "EDAM_data:1427 Phylogenetic discrete data" ] outfile: outmixfile [ parameter: "Y" knowntype: "phylip mix" extension: "mixfile" information: "Phylip mix data output file (optional)" nullok: "@(!$(mixfile.isdefined))" relations: "EDAM_data:1427 Phylogenetic discrete data" ] outfile: outfactfile [ parameter: "Y" extension: "factfile" knowntype: "phylip factor" information: "Phylip factor data output file (optional)" nullok: "@(!$(factorfile.isdefined))" relations: "EDAM_data:1427 Phylogenetic discrete data" ] boolean: printdata [ additional: "Y" default: "N" information: "Print out the data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "$(printdata)" default: "Y" information: "Use dot-differencing" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fmix.acd0000664000175000017500000001016211727433154014010 00000000000000application: fmix [ documentation: "Mixed parsimony algorithm" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" help: "File containing one or more data sets" knowntype: "discrete characters" information: "Phylip character discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Weights file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: ancfile [ additional: "Y" characters: "01?" information: "Ancestral states file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: mixfile [ additional: "Y" characters: "CSW" information: "Mixture file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "y" minimum: "1" maximum: "1" header: "Method" values: "w:Wagner; c:Camin-Sokal; m:Mixed;" information: "Choose the method to use" default: "Wagner" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" maximum: "32767" minimum: "1" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(infile.discretesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "Y" minimum: "1" information: "Threshold value" default: "$(infile.discretesize)" relations: "EDAM_data:2146 Threshold" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "mix output" information: "Phylip mix program output file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print states at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print out steps in each character" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnamlk.acd0000664000175000017500000001607511727433154014472 00000000000000application: fdnamlk [ documentation: "Estimates nucleotide phylogeny by maximum likelihood" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" help: "File containing one or more sequence alignments" aligned: "Y" relations: "EDAM_data:1383 Sequence alignment (nucleic acid)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] integer: ncategories [ additional: "Y" default: "1" minimum: "1" maximum: "9" information: "Number of substitution rate categories" relations: "EDAM_data:2527 Parameter" ] array: rate [ additional: "@($(ncategories) > 1)" information: "Rate for each category" size: "$(ncategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] properties: categories [ additional: "@($(ncategories) > 1)" characters: "1-$(ncategories)" information: "File of substitution rate categories" nullok: "@($(ncategories) == 1)" size: "1" length: "$(sequence.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" nullok: "Y" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] float: ttratio [ additional: "Y" default: "2.0" minimum: "0.001" information: "Transition/transversion ratio" relations: "EDAM_data:2527 Parameter" ] toggle: freqsfrom [ additional: "Y" default: "Y" information: "Use empirical base frequencies from seqeunce input" relations: "EDAM_data:2527 Parameter" ] array: basefreq [ additional: "@(!$(freqsfrom))" size: "4" minimum: "0.0" maximum: "1.0" default: "0.25 0.25 0.25 0.25" information: "Base frequencies for A C G T/U (use blanks to separate)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] list: gammatype [ additional: "Y" minimum: "1" maximum: "1" header: "Rate variation among sites" values: "g:Gamma distributed rates; i:Gamma+invariant sites; h:User defined HMM of rates; n:Constant rate" information: "Rate variation among sites" default: "Constant rate" relations: "EDAM_data:2527 Parameter" ] float: gammacoefficient [ additional: "@($(gammatype)==g)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ngammacat [ additional: "@($(gammatype)==g)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9)" relations: "EDAM_data:2527 Parameter" ] float: invarcoefficient [ additional: "@($(gammatype)==i)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ninvarcat [ additional: "@($(gammatype)==i)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9) including one for invariant sites" relations: "EDAM_data:2527 Parameter" ] float: invarfrac [ additional: "@($(gammatype)==i)" information: "Fraction of invariant sites" default: "0.0" minimum: "0.0" maximum: "0.9999" relations: "EDAM_data:2527 Parameter" ] integer: nhmmcategories [ additional: "@($(gammatype)==h)" default: "1" minimum: "1" maximum: "9" information: "Number of HMM rate categories" relations: "EDAM_data:2527 Parameter" ] array: hmmrates [ additional: "@($(nhmmcategories) > 1)" information: "HMM category rates" default: "1.0" minimum: "0.0" size: "$(nhmmcategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] array: hmmprobabilities [ additional: "@($(nhmmcategories) > 1)" information: "Probability for each HMM category" default: "1.0" minimum: "0.0" maximum: "1.0" size: "$(nhmmcategories)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] boolean: adjsite [ additional: "@($(gammatype)!=n)" default: "N" information: "Rates at adjacent sites correlated" relations: "EDAM_data:2527 Parameter" ] float: lambda [ additional: "$(adjsite)" default: "1.0" information: "Mean block length of sites having the same rate" minimum: "1.0" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dnamlk output" information: "Phylip dnamlk program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: hypstate [ additional: "Y" default: "N" information: "Reconstruct hypothetical sequence" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fretree.acd0000664000175000017500000000372511727433154014510 00000000000000application: fretree [ documentation: "Interactive tree rearrangement" groups: "Phylogeny:Tree drawing" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0326 Phylogenetic tree editing" ] section: input [ information: "Input section" type: "page" ] integer: spp [ information: "Number of species" parameter: "Y" relations: "EDAM_data:2527 Parameter" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: initialtree [ additional: "Y" minimum: "1" maximum: "1" header: "Initial tree" values: "a:Arbitary; u:User; s:Specify" information: "Initial tree" default: "Arbitary" relations: "EDAM_data:2527 Parameter" ] list: format [ additional: "Y" minimum: "1" maximum: "1" header: "test" values: "p:PHYLIP; n:NEXUS; x:XML" information: "Format to write trees" default: "p" relations: "EDAM_identifier:2129 File format name" ] integer: screenwidth [ additional: "Y" default: "80" information: "Width of terminal screen in characters" relations: "EDAM_data:2152 Rendering parameter" ] integer: vscreenwidth [ additional: "Y" default: "80" information: "Width of plotting area in characters" relations: "EDAM_data:2152 Rendering parameter" ] integer: screenlines [ additional: "Y" default: "24" information: "Number of lines on screen" relations: "EDAM_data:2152 Rendering parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outtreefile [ parameter: "Y" extension: "treefile" knowntype: "newick tree" information: "Phylip tree output file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/ftreedist.acd0000664000175000017500000000405411727433154015041 00000000000000application: ftreedist [ documentation: "Calculate distances between trees" groups: "Phylogeny:Consensus" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0557 Phylogenetic tree distances calculation" ] section: input [ information: "Input section" type: "page" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: dtype [ additional: "Y" information: "Distance type" values: "s:Symmetric difference; b:Branch score distance" default: "b" relations: "EDAM_data:2527 Parameter" ] list: pairing [ additional: "Y" information: "Tree pairing method" values: "a:Distances between adjacent pairs in tree file; p:Distances between all possible pairs in tree file" default: "a" relations: "EDAM_data:2527 Parameter" ] list: style [ additional: "Y" information: "Distances output option" values: "f:Full matrix; v:Verbose, one pair per line; s:Sparse, one pair per line" default: "v" relations: "EDAM_data:2527 Parameter" ] boolean: noroot [ additional: "Y" default: "N" information: "Trees to be treated as rooted" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "treedist output" information: "Phylip treedist program output file" relations: "EDAM_data:1442 Phylogenetic tree report (tree distances)" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdrawgram.acd0000664000175000017500000001632311727433154015024 00000000000000application: fdrawgram [ documentation: "Plots a cladogram- or phenogram-like rooted tree diagram" groups: "Phylogeny:Tree drawing" batch: "no" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_topic:0092 Data visualisation" relations: "EDAM_operation:0567 Phylogenetic tree rendering" ] section: input [ information: "Input section" type: "page" ] string: fontfile [ default: "font1" information: "Fontfile name" knowntype: "phylip font" relations: "EDAM_identifier:1050 File name" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: plotfile [ parameter: "Y" knowntype: "drawgram output" information: "Phylip drawgram output file" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: grows [ additional: "Y" default: "Y" information: "Tree grows horizontally" relations: "EDAM_data:2527 Parameter" ] list: style [ additional: "Y" minimum: "1" maximum: "1" header: "Tree style" values: "c:cladogram (v-shaped); p:phenogram (branches are square); v:curvogram (branches are 1/4 out of an ellipse); e:eurogram (branches angle outward, then up); s:swooporam (branches curve outward then reverse); o:circular tree" information: "Tree style output" default: "c" relations: "EDAM_data:2527 Parameter" ] list: plotter [ additional: "Y" minimum: "1" maximum: "1" header: "Plotter or printer" values: "l:Postscript printer file format; m:PICT format (for drawing programs); j:HP 75 DPI Laserjet PCL file format; s:HP 150 DPI Laserjet PCL file format; y:HP 300 DPI Laserjet PCL file format; w:MS-Windows Bitmap; f:FIG 2.0 drawing program format; a:Idraw drawing program format; z:VRML Virtual Reality Markup Language file; n:PCX 640x350 file format (for drawing programs); p:PCX 800x600 file format (for drawing programs); q:PCX 1024x768 file format (for drawing programs); k:TeKtronix 4010 graphics terminal; x:X Bitmap format; v:POVRAY 3D rendering program file; r:Rayshade 3D rendering program file; h:Hewlett-Packard pen plotter (HPGL file format); d:DEC ReGIS graphics (VT240 terminal); e:Epson MX-80 dot-matrix printer; c:Prowriter/Imagewriter dot-matrix printer; t:Toshiba 24-pin dot-matrix printer; o:Okidata dot-matrix printer; b:Houston Instruments plotter; u:other (one you have inserted code for)" information: "Plotter or printer the tree will be drawn on" default: "l" relations: "EDAM_data:2152 Rendering parameter" ] list: previewer [ additional: "Y" minimum: "1" maximum: "1" header: "Previewing device" values: "n:Will not be previewed; I i:MSDOS graphics screen m:Macintosh screens; x:X Windows display; w:MS Windows display; k:TeKtronix 4010 graphics terminal; d:DEC ReGIS graphics (VT240 terminal); o:Other (one you have inserted code for)" information: "Previewing device" default: "x" relations: "EDAM_data:2152 Rendering parameter" ] boolean: lengths [ additional: "Y" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] float: labelrotation [ additional: "@($(style)!=c)" information: "Angle of labels (0 degrees is horizontal for a tree growing vertically)" default: "90.0" minimum: "0.0" maximum: "360.0" relations: "EDAM_data:2152 Rendering parameter" ] toggle: rescaled [ additional: "Y" default: "Y" information: "Automatically rescale branch lengths" relations: "EDAM_data:2527 Parameter" ] float: bscale [ additional: "@(!$(rescaled))" default: "1.0" information: "Centimeters per unit branch length" relations: "EDAM_data:2152 Rendering parameter" ] float: treedepth [ additional: "Y" default: "0.53" information: "Depth of tree as fraction of its breadth" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2152 Rendering parameter" ] float: stemlength [ additional: "Y" default: "0.05" information: "Stem length as fraction of tree depth" minimum: "0.01" maximum: "100.0" relations: "EDAM_data:2152 Rendering parameter" ] float: nodespace [ additional: "Y" default: "0.3333" information: "Character height as fraction of tip spacing" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2152 Rendering parameter" ] list: nodeposition [ additional: "Y" minimum: "1" maximum: "1" header: "Node position" values: "i:Intermediate between their immediate descendants; w:Weighted average of tip positions; c:Centered among their ultimate descendants; n:Innermost of immediate descendants; v:So tree is v shaped" information: "Position of interior nodes" default: "c" relations: "EDAM_data:2152 Rendering parameter" ] float: xmargin [ additional: "@($(plotter)!=r)" default: "1.65" minimum: "0.1" information: "Horizontal margin (cm)" relations: "EDAM_data:2152 Rendering parameter" ] float: ymargin [ additional: "@($(plotter)!=r)" default: "2.16" minimum: "0.1" information: "Vertical margin (cm)" relations: "EDAM_data:2152 Rendering parameter" ] float: xrayshade [ additional: "@($(plotter)==r)" default: "1.65" minimum: "0.1" information: "Horizontal margin (pixels) for Rayshade output" relations: "EDAM_data:2152 Rendering parameter" ] float: yrayshade [ additional: "@($(plotter)==r)" default: "2.16" minimum: "0.1" information: "Vertical margin (pixels) for Rayshade output" relations: "EDAM_data:2152 Rendering parameter" ] float: paperx [ additional: "Y" default: "20.63750" information: "Paper width" relations: "EDAM_data:2152 Rendering parameter" ] float: papery [ additional: "Y" default: "26.98750" information: "Paper height" minimum: "0.1" relations: "EDAM_data:2152 Rendering parameter" ] float: pagesheight [ additional: "Y" default: "1" information: "Number of trees across height of page" minimum: "1" relations: "EDAM_data:2152 Rendering parameter" ] float: pageswidth [ additional: "Y" default: "1" information: "Number of trees across width of page" minimum: "1" relations: "EDAM_data:2152 Rendering parameter" ] float: hpmargin [ additional: "Y" default: "0.41275" information: "Horizontal overlap (cm)" minimum: "0.001" relations: "EDAM_data:2152 Rendering parameter" ] float: vpmargin [ additional: "Y" default: "0.53975" information: "Vertical overlap (cm)" minimum: "0.001" relations: "EDAM_data:2152 Rendering parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnapars.acd0000664000175000017500000001054611727433154014651 00000000000000application: fdnapars [ documentation: "DNA parsimony algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1383 Sequence alignment (nucleic acid)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Weights file" nullok: "Y" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: maxtrees [ additional: "Y" information: "Number of trees to save" default: "10000" minimum: "1" maximum: "1000000" relations: "EDAM_data:2527 Parameter" ] toggle: thorough [ additional: "@(!$(intreefile.isdefined))" information: "More thorough search" default: "Y" relations: "EDAM_data:2527 Parameter" ] boolean: rearrange [ additional: "$(thorough)" default: "Y" information: "Rearrange on just one best tree" relations: "EDAM_data:2527 Parameter" ] boolean: transversion [ additional: "Y" information: "Use transversion parsimony" default: "N" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" default: "1" minimum: "1" maximum: "32767" information: "Random number seed between 1 and 32767 (must be odd)" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "1.0" default: "1.0" information: "Threshold value" relations: "EDAM_data:2146 Threshold" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dnapars output" information: "Phylip dnapars program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print out steps in each site" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print sequences at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "@($(ancseq) | $(printdata))" default: "Y" information: "Use dot differencing to display results" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnacomp.acd0000664000175000017500000000622411727433154014640 00000000000000application: fdnacomp [ documentation: "DNA compatibility algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:2887 Sequence record (nucleic acid)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ standard: "Y" information: "Phylip weights file (optional)" length: "$(sequence.length)" nullok: "Y" knowntype: "weights" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: outgrno [ additional: "Y" minimum: "0" default: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" information: "Number of times to randomise" minimum: "0" default: "0" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dnacomp output" information: "Phylip dnacomp program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print steps & compatibility at sites" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print sequences at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fseqbootall.acd0000664000175000017500000001305311727433154015362 00000000000000application: fseqbootall [ documentation: "Bootstrapped sequences algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0552 Phylogenetic tree bootstrapping" ] section: input [ information: "Input section" type: "page" ] seqset: infilesequences [ parameter: "Y" type: "gapany" aligned: "Y" relations: "EDAM_data:0863 Sequence alignment" ] properties: categories [ additional: "Y" characters: "" information: "File of input categories" nullok: "Y" size: "1" length: "$(infile.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: mixfile [ additional: "Y" characters: "" information: "File of mixtures" nullok: "Y" size: "1" length: "$(infile.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: ancfile [ additional: "Y" characters: "" information: "File of ancestors" nullok: "Y" size: "1" length: "$(infile.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(infile.length)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: factorfile [ additional: "Y" information: "Factors file" nullok: "Y" length: "$(infile.length)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: datatype [ additional: "y" minimum: "1" maximum: "1" header: "Datatype" values: "s:Molecular sequences; m:Discrete Morphology; r:Restriction Sites; g:Gene Frequencies" information: "Choose the datatype" default: "s" relations: "EDAM_data:2527 Parameter" ] list: test [ additional: "y" minimum: "1" maximum: "1" header: "Test" values: "b:Bootstrap; j:Jackknife; c:Permute species for each character; o:Permute character order; s:Permute within species; r:Rewrite data" information: "Choose test" default: "b" relations: "EDAM_data:2527 Parameter" ] toggle: regular [ additional: "@( $(test) == { b | j } )" information: "Altered sampling fraction" default: "N" relations: "EDAM_data:2527 Parameter" ] float: fracsample [ additional: "@(!$(regular))" information: "Samples as percentage of sites" default: "100.0" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2527 Parameter" ] list: rewriteformat [ additional: "@(@($(test)==r)&&@($(datatype)==s))" minimum: "1" maximum: "1" header: "test" values: "p:PHYLIP; n:NEXUS; x:XML" information: "Output format" default: "p" relations: "EDAM_data:2527 Parameter" ] list: seqtype [ additional: "@( @( $(datatype) == s ) & @( $(rewriteformat) == {n | x} ))" minimum: "1" maximum: "1" header: "test" values: "d:dna; p:protein; r:rna" information: "Output format" default: "d" relations: "EDAM_data:1094 Sequence type" ] list: morphseqtype [ additional: "@( @( $(datatype) == m ) & @( $(test) == r ))" minimum: "1" maximum: "1" header: "Output format" values: "p:PHYLIP; n:NEXUS" information: "Output format" default: "p" relations: "EDAM_identifier:2129 File format name" ] integer: blocksize [ information: "Block size for bootstraping" additional: "@($(test) == b)" default: "1" minimum: "1" relations: "EDAM_data:1249 Sequence length" ] integer: reps [ additional: "@($(test) != r)" information: "How many replicates" minimum: "1" default: "100" relations: "EDAM_data:2527 Parameter" ] list: justweights [ additional: "@( $(test) == { b | j } )" minimum: "1" maximum: "1" header: "Write out datasets or just weights" values: "d:Datasets; w:Weights" information: "Write out datasets or just weights" default: "d" relations: "EDAM_data:2527 Parameter" ] boolean: enzymes [ additional: "@($(datatype) == r)" information: "Is the number of enzymes present in input file" default: "N" relations: "EDAM_data:2527 Parameter" ] boolean: all [ additional: "@($(datatype) == g)" information: "All alleles present at each locus" default: "N" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "@($(test) != r)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "seqboot output" information: "Phylip seqboot program output file" relations: "EDAM_data:2245 Sequence set (bootstrapped)" ] boolean: printdata [ additional: "Y" default: "N" information: "Print out the data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "$(printdata)" default: "Y" information: "Use dot-differencing" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fcontml.acd0000664000175000017500000000637611727433154014523 00000000000000application: fcontml [ documentation: "Gene frequency and continuous character maximum likelihood" groups: "Phylogeny:Gene frequencies" gui: "yes" batch: "yes" cpu: "high" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] frequencies: infile [ parameter: "Y" help: "File containing one or more sets of data" relations: "EDAM_data:1426 Phylogenetic continuous quantitative data" ] tree: intreefile [ parameter: "Y" knowntype: "newick" nullok: "Y" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: datatype [ additional: "Y" minimum: "1" maximum: "1" header: "Input type" values: "g:Gene frequencies; i:Continuous characters" information: "Input type in infile" default: "g" relations: "EDAM_data:2527 Parameter" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" maximum: "32767" minimum: "1" default: "1" relations: "EDAM_data:2527 Parameter" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(infile.freqsize)" default: "0" failrange: "N" trueminimum: "Y" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "contml output" information: "Phylip contml program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fclique.acd0000664000175000017500000000622511727433154014502 00000000000000application: fclique [ documentation: "Largest clique program" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0546 Phylogenetic tree construction (minimum distance methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" knowntype: "discrete states" information: "Phylip discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: ancfile [ additional: "Y" characters: "01" length: "$(infile.discretelength)" knowntype: "ancestral states" nullok: "Y" information: "Phylip ancestral states file (optional)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: factorfile [ additional: "Y" characters: "" length: "$(infile.discretelength)" knowntype: "multistate factors" nullok: "Y" information: "Phylip multistate factors file (optional)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" length: "$(infile.discretelength)" knowntype: "Weights" nullok: "Y" information: "Phylip weights file (optional)" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: cliqmin [ additional: "Y" default: "0" minimum: "0" information: "Minimum clique size" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(infile.discretesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "clique output" information: "Phylip clique program output file" relations: "EDAM_data:1428 Phylogenetic character cliques" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: printcomp [ additional: "Y" default: "N" information: "Print out compatibility matrix" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdolmove.acd0000664000175000017500000000606011727433154014662 00000000000000application: fdolmove [ documentation: "Interactive Dollo or polymorphism parsimony" groups: "Phylogeny:Molecular sequence" batch: "no" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" help: "File containing data set" knowntype: "discrete characters" information: "Phylip character discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(infile.discretelength)" size: "1" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: ancfile [ additional: "Y" characters: "01?" information: "Ancestral states file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: factorfile [ additional: "Y" information: "Factors file" nullok: "Y" length: "$(infile.discretelengt)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "Y" minimum: "1" maximum: "1" header: "Method" values: "d:Dollo; p:Polymorphism" information: "Parsimony method" default: "d" relations: "EDAM_data:2527 Parameter" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "0" information: "Threshold value" default: "1" relations: "EDAM_data:2146 Threshold" ] list: initialtree [ additional: "Y" minimum: "1" maximum: "1" header: "Initial tree" values: "a:Arbitary; u:User; s:Specify" information: "Initial tree" default: "Arbitary" relations: "EDAM_data:2527 Parameter" ] integer: screenwidth [ additional: "Y" default: "80" information: "Width of terminal screen in characters" relations: "EDAM_data:2152 Rendering parameter" ] integer: screenlines [ additional: "Y" default: "24" information: "Number of lines on screen" relations: "EDAM_data:2152 Rendering parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outtreefile [ additional: "Y" extension: "treefile" knowntype: "newick tree" information: "Phylip tree output file (optional)" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/frestdist.acd0000664000175000017500000000520411727433154015055 00000000000000application: frestdist [ documentation: "Calculate distance matrix from restriction sites or fragments" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_topic:0092 Data visualisation" relations: "EDAM_operation:0289 Sequence distance matrix generation" ] section: input [ information: "Input section" type: "page" ] discretestates: data [ characters: "01+-?" parameter: "Y" help: "File containing one or more sets of restriction data" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] boolean: restsites [ additional: "Y" information: "Restriction sites (put N if you want restriction fragments)" default: "Y" relations: "EDAM_data:2527 Parameter" ] boolean: neili [ additional: "Y" information: "Use original Nei/Li model (default uses modified Nei/Li model)" default: "N" relations: "EDAM_data:2527 Parameter" ] boolean: gammatype [ additional: "@(!$(neili))" information: "Gamma distributed rates among sites" default: "N" relations: "EDAM_data:2527 Parameter" ] float: gammacoefficient [ additional: "$(gammatype)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] float: ttratio [ additional: "Y" default: "2.0" minimum: "0.001" information: "Transition/transversion ratio" relations: "EDAM_data:2527 Parameter" ] integer: sitelength [ additional: "Y" default: "6" information: "Site length" minimum: "1" relations: "EDAM_data:1249 Sequence length" ] boolean: lower [ additional: "Y" default: "N" information: "Lower triangular distance matrix" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "restdist output" information: "Phylip restdist program output file" relations: "EDAM_data:0870 Sequence distance matrix" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/Makefile0000664000175000017500000003420712171071711014031 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # emboss_acd/Makefile. Generated from Makefile.in by configure. # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/PHYLIPNEW pkglibdir = $(libdir)/PHYLIPNEW pkglibexecdir = $(libexecdir)/PHYLIPNEW am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = x86_64-unknown-linux-gnu host_triplet = x86_64-unknown-linux-gnu subdir = emboss_acd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/EMBOSS/acd ACLOCAL = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run aclocal-1.12 AMTAR = $${TAR-tar} ANT = AR = ar AUTOCONF = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoconf AUTOHEADER = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoheader AUTOMAKE = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run automake-1.12 AWK = gawk CC = gcc CCDEPMODE = depmode=gcc3 CFLAGS = -O2 CPP = gcc -E CPPFLAGS = -DAJ_LinuxLF -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 CXX = g++ CXXCPP = g++ -E CXXDEPMODE = depmode=gcc3 CXXFLAGS = -g -O2 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps DEVWARN_CFLAGS = DLLTOOL = false DSYMUTIL = DUMPBIN = ECHO_C = ECHO_N = -n ECHO_T = EGREP = /usr/bin/grep -E EXEEXT = FGREP = /usr/bin/grep -F GREP = /usr/bin/grep HAVE_MEMMOVE = HAVE_STRERROR = INSTALL = /usr/bin/install -c INSTALL_DATA = ${INSTALL} -m 644 INSTALL_PROGRAM = ${INSTALL} INSTALL_SCRIPT = ${INSTALL} INSTALL_STRIP_PROGRAM = $(install_sh) -c -s JAR = JAVA = JAVAC = JAVA_CFLAGS = JAVA_CPPFLAGS = -DNO_AUTH JAVA_LDFLAGS = LD = /usr/bin/ld -m elf_x86_64 LDFLAGS = LIBOBJS = LIBS = -lm -lhpdf -lgd -lpng -lz -lm LIBTOOL = $(SHELL) $(top_builddir)/libtool LIPO = LN_S = ln -s LTLIBOBJS = MAKEINFO = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run makeinfo MANIFEST_TOOL = : MKDIR_P = /usr/bin/mkdir -p MYSQL_CFLAGS = -I/usr/include/mysql -g -pipe -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-strict-aliasing -fwrapv -fPIC -fPIC -g -static-libgcc -fno-omit-frame-pointer -fno-strict-aliasing -DMY_PTHREAD_FASTMUTEX=1 MYSQL_CONFIG = /usr/bin/mysql_config MYSQL_CPPFLAGS = -I/usr/include/mysql MYSQL_LDFLAGS = -L/usr/lib64/mysql -lmysqlclient -lpthread -lz -lm -lrt -lssl -lcrypto -ldl MYSQL_VERSION = 5.5.32 NM = /usr/bin/nm -B NMEDIT = OBJDUMP = objdump OBJEXT = o OTOOL = OTOOL64 = PACKAGE = PHYLIPNEW PACKAGE_BUGREPORT = emboss-bug@emboss.open-bio.org PACKAGE_NAME = PHYLIPNEW PACKAGE_STRING = PHYLIPNEW 3.69.650 PACKAGE_TARNAME = PHYLIPNEW PACKAGE_URL = http://emboss.open-bio.org/ PACKAGE_VERSION = 3.69.650 PATH_SEPARATOR = : PCRE_DATE = 11-Apr-2009 PCRE_LIB_VERSION = 0:1:0 PCRE_MAJOR = 7 PCRE_MINOR = 9 PCRE_POSIXLIB_VERSION = 0:0:0 PCRE_VERSION = 7.9 POSIX_MALLOC_THRESHOLD = -DPOSIX_MALLOC_THRESHOLD=10 POSTGRESQL_CFLAGS = -I/usr/include POSTGRESQL_CONFIG = /usr/bin/pg_config POSTGRESQL_CPPFLAGS = -I/usr/include POSTGRESQL_LDFLAGS = -L/usr/lib64 -lpq POSTGRESQL_VERSION = 9.2.4 RANLIB = ranlib SED = /usr/bin/sed SET_MAKE = SHELL = /bin/sh STRIP = strip VERSION = 3.69.650 WARN_CFLAGS = XLIB = -lX11 -lXaw -lXt XMKMF = X_CFLAGS = X_EXTRA_LIBS = X_LIBS = X_PRE_LIBS = -lSM -lICE abs_builddir = /data/scratch/embossdist/embassy/phylipnew/emboss_acd abs_srcdir = /data/scratch/embossdist/embassy/phylipnew/emboss_acd abs_top_builddir = /data/scratch/embossdist/embassy/phylipnew abs_top_srcdir = /data/scratch/embossdist/embassy/phylipnew ac_ct_AR = ar ac_ct_CC = gcc ac_ct_CXX = g++ ac_ct_DUMPBIN = am__include = include am__leading_dot = . am__quote = am__tar = $${TAR-tar} chof - "$$tardir" am__untar = $${TAR-tar} xf - bindir = ${exec_prefix}/bin build = x86_64-unknown-linux-gnu build_alias = build_cpu = x86_64 build_os = linux-gnu build_vendor = unknown builddir = . datadir = ${datarootdir} datarootdir = ${prefix}/share docdir = ${datarootdir}/doc/${PACKAGE_TARNAME} dvidir = ${docdir} embprefix = /usr/local exec_prefix = ${prefix} host = x86_64-unknown-linux-gnu host_alias = host_cpu = x86_64 host_os = linux-gnu host_vendor = unknown htmldir = ${docdir} includedir = ${prefix}/include infodir = ${datarootdir}/info install_sh = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/install-sh libdir = ${exec_prefix}/lib libexecdir = ${exec_prefix}/libexec localedir = ${datarootdir}/locale localstatedir = ${prefix}/var mandir = ${datarootdir}/man mkdir_p = $(MKDIR_P) oldincludedir = /usr/include pdfdir = ${docdir} prefix = /usr/local program_transform_name = s,x,x, psdir = ${docdir} sbindir = ${exec_prefix}/sbin sharedstatedir = ${prefix}/com srcdir = . sysconfdir = ${prefix}/etc target_alias = top_build_prefix = ../ top_builddir = .. top_srcdir = .. pkgdata_DATA = *.acd all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_acd/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_acd/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_acd/fprotpars.acd0000664000175000017500000001010411727433154015061 00000000000000application: fprotpars [ documentation: "Protein parsimony algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapproteinphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1384 Sequence alignment (protein)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Phylip weights file (optional)" nullok: "Y" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "1" information: "Threshold value" default: "1" relations: "EDAM_data:2146 Threshold" ] list: whichcode [ additional: "Y" default: "Universal" minimum: "1" maximum: "1" header: "Genetic codes" values: "U:Universal,M:Mitochondrial,V:Vertebrate mitochondrial,F:Fly mitochondrial,Y:Yeast mitochondrial" delimiter: "," codedelimiter: ":" information: "Use which genetic code" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "protpars output" information: "Phylip protpars program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print steps at each site" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print sequences at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "@($(printdata) | $(ancseq))" default: "Y" information: "Use dot differencing to display results" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdolpenny.acd0000664000175000017500000000715211727433154015050 00000000000000application: fdolpenny [ documentation: "Penny algorithm Dollo or polymorphism" groups: "Phylogeny:Discrete characters" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" help: "File containing one or more data sets" knowntype: "discrete characters" information: "Phylip character discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Weights file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: ancfile [ additional: "Y" characters: "01?" information: "Ancestral states file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] toggle: dothreshold [ additional: "Y" default: "N" information: "Use threshold parsimony" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "$(dothreshold)" minimum: "0" information: "Threshold value" default: "1" relations: "EDAM_data:2146 Threshold" ] integer: howmany [ additional: "Y" information: "How many groups of trees" default: "1000" relations: "EDAM_data:2527 Parameter" ] integer: howoften [ additional: "Y" information: "How often to report, in trees" default: "100" relations: "EDAM_data:2527 Parameter" ] boolean: simple [ additional: "Y" information: "Branch and bound is simple" default: "Y" relations: "EDAM_data:2527 Parameter" ] list: method [ additional: "Y" minimum: "1" maximum: "1" header: "Method" values: "d:Dollo; p:Polymorphism" information: "Parsimony method" default: "d" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dolpenny output" information: "Phylip dolpenny program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: ancseq [ additional: "Y" default: "N" information: "Print states at all nodes of tree" relations: "EDAM_data:2527 Parameter" ] boolean: stepbox [ additional: "Y" default: "N" information: "Print out steps in each character" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/frestml.acd0000664000175000017500000000722311727433154014525 00000000000000application: frestml [ documentation: "Restriction site maximum likelihood method" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: data [ characters: "01+-?" parameter: "Y" help: "File containing one or more sets of restriction data" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Phylip weights file (optional)" help: "Weights file" length: "$(data.length)" size: "1" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] integer: njumble [ additional: "Y" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(data.size)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] boolean: allsites [ additional: "Y" default: "Y" information: "All sites detected" relations: "EDAM_data:2527 Parameter" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use lengths from user trees" relations: "EDAM_data:2527 Parameter" ] integer: sitelength [ additional: "Y" default: "6" minimum: "1" maximum: "8" information: "Site length" relations: "EDAM_data:1249 Sequence length" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] boolean: rough [ additional: "@(!$(intreefile.isdefined))" default: "Y" information: "Speedier but rougher analysis" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "restml output" information: "Phylip restml program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file" knowntype: "newick tree" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnainvar.acd0000664000175000017500000000351711727433154015023 00000000000000application: fdnainvar [ documentation: "Nucleic acid sequence invariants method" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0551 Phylogenetic tree analysis (shape)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1383 Sequence alignment (nucleic acid)" ] properties: weights [ standard: "Y" length: "$(sequence.length)" nullok: "Y" knowntype: "weights" information: "Phylip weights file (optional)" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "dnainvar output" information: "Phylip dnainvar program output file" relations: "EDAM_data:1429 Phylogenetic invariants" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "$(printdata)" default: "Y" information: "Use dot-differencing to display results" relations: "EDAM_data:2527 Parameter" ] boolean: printpattern [ additional: "Y" default: "Y" information: "Print counts of patterns" relations: "EDAM_data:2527 Parameter" ] boolean: printinvariant [ additional: "Y" default: "Y" information: "Print invariants" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/ffitch.acd0000664000175000017500000000757711727433154014330 00000000000000application: ffitch [ documentation: "Fitch-Margoliash and least-squares distance methods" groups: "Phylogeny:Distance matrix" gui: "yes" batch: "yes" cpu: "high" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0546 Phylogenetic tree construction (minimum distance methods)" ] section: input [ information: "Input section" type: "page" ] distances: datafile [ parameter: "Y" help: "File containing one or more distance matrices" knowntype: "distance matrix" information: "Phylip distance matrix file" relations: "EDAM_data:0870 Sequence distance matrix" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: matrixtype [ additional: "Y" minimum: "1" maximum: "1" header: "Matrix type" values: "s:Square; u:Upper triangular; l:Lower triangular" information: "Type of input data matrix" default: "s" relations: "EDAM_data:2527 Parameter" ] boolean: minev [ additional: "Y" information: "Minimum evolution" default: "N" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(datafile.distancesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] float: power [ additional: "Y" default: "2.0" information: "Power" relations: "EDAM_data:2527 Parameter" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] boolean: negallowed [ additional: "@(!$(minev))" default: "N" information: "Negative branch lengths allowed" relations: "EDAM_data:2527 Parameter" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] boolean: replicates [ additional: "Y" default: "N" information: "Subreplicates" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "fitch output" information: "Phylip fitch program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fmove.acd0000664000175000017500000000623111727433154014163 00000000000000application: fmove [ documentation: "Interactive mixed method parsimony" groups: "Phylogeny:Discrete characters" batch: "no" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0545 Phylogenetic tree construction (parsimony methods)" ] section: input [ information: "Input section" type: "page" ] discretestates: infile [ parameter: "Y" characters: "01PB?" help: "File containing data set" knowntype: "discrete characters" information: "Phylip character discrete states file" relations: "EDAM_data:1427 Phylogenetic discrete data" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(infile.discretelength)" size: "1" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] properties: ancfile [ additional: "Y" characters: "01?" information: "Ancestral states file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: factorfile [ additional: "Y" information: "Factors file" nullok: "Y" length: "$(infile.discretelength)" size: "1" relations: "EDAM_data:1427 Phylogenetic discrete data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "y" minimum: "1" maximum: "1" header: "Method" values: "w:Wagner; c:Camin-Sokal; m:Mixed;" information: "Choose the method to use" default: "Wagner" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(infile.discretesize)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] float: threshold [ additional: "Y" minimum: "0" information: "Threshold value" default: "$(infile.discretesize)" relations: "EDAM_data:2527 Parameter" ] list: initialtree [ additional: "Y" minimum: "1" maximum: "1" header: "Initial tree" values: "a:Arbitary; u:User; s:Specify" information: "Initial tree" default: "Arbitary" relations: "EDAM_data:2527 Parameter" ] integer: screenwidth [ additional: "Y" default: "80" information: "Width of terminal screen in characters" relations: "EDAM_data:2152 Rendering parameter" ] integer: screenlines [ additional: "Y" default: "24" information: "Number of lines on screen" relations: "EDAM_data:2152 Rendering parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outtreefile [ additional: "Y" extension: "treefile" knowntype: "newick tree" nullok: "Y" information: "Phylip tree output file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fgendist.acd0000664000175000017500000000315611727433154014655 00000000000000application: fgendist [ documentation: "Compute genetic distances from gene frequencies" groups: "Phylogeny:Gene frequencies" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0289 Sequence distance matrix generation" ] section: input [ information: "Input section" type: "page" ] frequencies: infile [ parameter: "Y" help: "File containing one or more sets of data" knowntype: "gendist input" information: "Phylip gendist program input file" relations: "EDAM_data:1426 Phylogenetic continuous quantitative data" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "y" minimum: "1" maximum: "1" header: "Distance methods" values: "n:Nei genetic distance; c:Cavalli-Sforza chord measure; r:Reynolds genetic distance" information: "Which method to use" default: "n" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "gendist output" information: "Phylip gendist program output file" relations: "EDAM_data:0870 Sequence distance matrix" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: lower [ additional: "Y" default: "N" information: "Lower triangular distance matrix" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fproml.acd0000664000175000017500000001626411727433154014355 00000000000000application: fproml [ documentation: "Protein phylogeny by maximum likelihood" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0547 Phylogenetic tree construction (maximum likelihood and Bayesian methods)" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapproteinphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:1384 Sequence alignment (protein)" ] tree: intreefile [ parameter: "Y" nullok: "Y" knowntype: "newick" information: "Phylip tree file (optional)" relations: "EDAM_data:0872 Phylogenetic tree" ] integer: ncategories [ additional: "Y" default: "1" minimum: "1" maximum: "9" information: "Number of substitution rate categories" relations: "EDAM_data:2527 Parameter" ] array: rate [ additional: "@($(ncategories) > 1)" information: "Rate for each category" size: "$(ncategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] properties: categories [ additional: "@($(ncategories) > 1)" characters: "1-$(ncategories)" information: "File of substitution rate categories" nullok: "@($(ncategories) == 1)" size: "1" length: "$(sequence.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(sequence.length)" size: "@(@($(sequence.multicount)>1) ? 1:0)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] boolean: lengths [ additional: "$(intreefile.isdefined)" default: "N" information: "Use branch lengths from user trees" relations: "EDAM_data:2527 Parameter" ] list: model [ additional: "Y" minimum: "1" maximum: "1" header: "Probability model" values: "j:Jones-Taylor-Thornton; h:Henikoff/Tillier PMBs; d:Dayhoff PAM" information: "Probability model for amino acid change" default: "Jones-Taylor-Thornton" relations: "EDAM_data:2527 Parameter" ] list: gammatype [ additional: "Y" minimum: "1" maximum: "1" header: "Rate variation among sites" values: "g:Gamma distributed rates; i:Gamma+invariant sites; h:User defined HMM of rates; n:Constant rate" information: "Rate variation among sites" default: "Constant rate" relations: "EDAM_data:2527 Parameter" ] float: gammacoefficient [ additional: "@($(gammatype)==g)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ngammacat [ additional: "@($(gammatype)==g)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9)" relations: "EDAM_data:2527 Parameter" ] float: invarcoefficient [ additional: "@($(gammatype)==i)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] integer: ninvarcat [ additional: "@($(gammatype)==i)" minimum: "1" maximum: "9" default: "1" information: "Number of categories (1-9) including one for invariant sites" relations: "EDAM_data:2527 Parameter" ] float: invarfrac [ additional: "@($(gammatype)==i)" information: "Fraction of invariant sites" default: "0.0" minimum: "0.0" maximum: "0.9999" relations: "EDAM_data:2527 Parameter" ] integer: nhmmcategories [ additional: "@($(gammatype)==h)" default: "1" minimum: "1" maximum: "9" information: "Number of HMM rate categories" relations: "EDAM_data:2527 Parameter" ] array: hmmrates [ additional: "@($(nhmmcategories) > 1)" information: "HMM category rates" default: "1.0" minimum: "0.0" size: "$(nhmmcategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] array: hmmprobabilities [ additional: "@($(nhmmcategories) > 1)" information: "Probability for each HMM category" default: "1.0" minimum: "0.0" maximum: "1.0" size: "$(nhmmcategories)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] boolean: adjsite [ additional: "@($(gammatype)!=n)" default: "N" information: "Rates at adjacent sites correlated" relations: "EDAM_data:2527 Parameter" ] float: lambda [ additional: "$(adjsite)" default: "1.0" information: "Mean block length of sites having the same rate" minimum: "1.0" relations: "EDAM_data:2527 Parameter" ] integer: njumble [ additional: "@(!$(intreefile.isdefined))" default: "0" minimum: "0" information: "Number of times to randomise, choose 0 if you don't want to randomise" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "$(njumble)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] boolean: global [ additional: "@(!$(intreefile.isdefined))" default: "N" information: "Global rearrangements" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" maximum: "$(sequence.count)" failrange: "N" trueminimum: "Y" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] boolean: rough [ additional: "Y" default: "Y" information: "Speedier but rougher analysis" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "proml output" information: "Phylip proml program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: hypstate [ additional: "Y" default: "N" information: "Reconstruct hypothetical sequence" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fdnadist.acd0000664000175000017500000001067511727433154014652 00000000000000application: fdnadist [ documentation: "Nucleic acid sequence distance matrix program" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0289 Sequence distance matrix generation" ] section: input [ information: "Input section" type: "page" ] seqsetall: sequence [ parameter: "Y" type: "gapdnaphylo" aligned: "Y" help: "File containing one or more sequence alignments" relations: "EDAM_data:2887 Sequence record (nucleic acid)" ] list: method [ standard: "y" minimum: "1" maximum: "1" header: "Distance methods" values: "f:F84 distance model; k:Kimura 2-parameter distance; j:Jukes-Cantor distance; l:LogDet distance; s:Similarity table" information: "Choose the method to use" default: "F84 distance model" relations: "EDAM_data:2527 Parameter" ] list: gammatype [ additional: "@( $(method) == { f | k | j } )" minimum: "1" maximum: "1" header: "Gamma distribution" values: "g:Gamma distributed rates; i:Gamma+invariant sites; n:No distribution parameters used" information: "Gamma distribution" default: "No distribution parameters used" relations: "EDAM_data:2527 Parameter" ] integer: ncategories [ additional: "@(@($(method) == { f | k | j } ) & @($(gammatype) == n))" default: "1" minimum: "1" maximum: "9" information: "Number of substitution rate categories" relations: "EDAM_data:2527 Parameter" ] array: rate [ additional: "@($(ncategories) > 1)" information: "Category rates" default: "1.0" minimum: "0.0" size: "$(ncategories)" sumtest: "N" relations: "EDAM_data:2527 Parameter" ] properties: categories [ additional: "@($(ncategories) > 1)" characters: "1-$(ncategories)" help: "File of substitution rate categories" nullok: "Y" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" information: "Weights file" length: "$(sequence.length)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] float: gammacoefficient [ additional: "@($(gammatype) != n)" information: "Coefficient of variation of substitution rate among sites" minimum: "0.001" default: "1" relations: "EDAM_data:2527 Parameter" ] float: invarfrac [ additional: "@($(gammatype)==i)" information: "Fraction of invariant sites" default: "0.0" minimum: "0.0" maximum: "0.9999" relations: "EDAM_data:2527 Parameter" ] float: ttratio [ additional: "@( @($(method) == f) | @($(method) == k))" information: "Transition/transversion ratio" default: "2.0" minimum: "0.001" relations: "EDAM_data:2527 Parameter" ] toggle: freqsfrom [ additional: "@($(method) == f)" default: "Y" information: "Use empirical base frequencies from seqeunce input" relations: "EDAM_data:2527 Parameter" ] array: basefreq [ additional: "@(!$(freqsfrom))" size: "4" minimum: "0.0" maximum: "1.0" default: "0.25 0.25 0.25 0.25" information: "Base frequencies for A C G T/U (use blanks to separate)" sum: "1.0" relations: "EDAM_data:2527 Parameter" ] boolean: lower [ additional: "Y" default: "N" information: "Output as a lower triangular distance matrix" relations: "EDAM_data:2527 Parameter" ] boolean: humanreadable [ additional: "Y" default: "@($(method)==s?Y:N)" information: "Output as a human-readable distance matrix" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "distance matrix" information: "Phylip distance matrix output file" relations: "EDAM_data:0870 Sequence distance matrix" ] boolean: printdata [ additional: "Y" default: "N" information: "Print data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/Makefile.am0000664000175000017500000000007407712247476014442 00000000000000 pkgdata_DATA = *.acd pkgdatadir=$(prefix)/share/EMBOSS/acd PHYLIPNEW-3.69.650/emboss_acd/Makefile.in0000664000175000017500000003276712171071677014462 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = emboss_acd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/EMBOSS/acd ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ pkgdata_DATA = *.acd all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_acd/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_acd/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_acd/fseqboot.acd0000664000175000017500000000762211727433154014676 00000000000000application: fseqboot [ documentation: "Bootstrapped sequences algorithm" groups: "Phylogeny:Molecular sequence" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0552 Phylogenetic tree bootstrapping" ] section: input [ information: "Input section" type: "page" ] seqset: sequence [ parameter: "Y" type: "gapany" aligned: "Y" relations: "EDAM_data:0863 Sequence alignment" ] properties: categories [ additional: "Y" characters: "" information: "File of input categories" nullok: "Y" size: "1" length: "$(sequence.length)" relations: "EDAM_data:1427 Phylogenetic discrete data" ] properties: weights [ additional: "Y" characters: "01" information: "Weights file" help: "Weights file" length: "$(sequence.length)" nullok: "Y" relations: "EDAM_data:2994 Phylogenetic character weights" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: test [ additional: "y" minimum: "1" maximum: "1" header: "Test" values: "b:Bootstrap; j:Jackknife; c:Permute species for each character; o:Permute character order; s:Permute within species; r:Rewrite data" information: "Choose test" default: "b" relations: "EDAM_data:2527 Parameter" ] toggle: regular [ additional: "@( $(test) == { b | j } )" information: "Altered sampling fraction" default: "N" relations: "EDAM_data:2527 Parameter" ] float: fracsample [ additional: "@(!$(regular))" information: "Samples as percentage of sites" default: "100.0" minimum: "0.1" maximum: "100.0" relations: "EDAM_data:2527 Parameter" ] list: rewriteformat [ additional: "@($(test)==r)" minimum: "1" maximum: "1" header: "test" values: "p:PHYLIP; n:NEXUS; x:XML" information: "Output format" default: "p" relations: "EDAM_identifier:2129 File format name" ] list: seqtype [ additional: "@( $(rewriteformat) == {n | x} )" minimum: "1" maximum: "1" header: "test" values: "d:dna; p:protein; r:rna" information: "Output format" default: "d" relations: "EDAM_data:1094 Sequence type" ] integer: blocksize [ information: "Block size for bootstraping" additional: "@($(test) == b)" default: "1" minimum: "1" relations: "EDAM_data:1249 Sequence length" ] integer: reps [ additional: "@($(test) != r)" information: "How many replicates" minimum: "1" default: "100" relations: "EDAM_data:2527 Parameter" ] list: justweights [ additional: "@( $(test) == { b | j } )" minimum: "1" maximum: "1" header: "Write out datasets or just weights" values: "d:Datasets; w:Weights" information: "Write out datasets or just weights" default: "d" relations: "EDAM_data:2527 Parameter" ] integer: seed [ additional: "@($(test)!= r)" information: "Random number seed between 1 and 32767 (must be odd)" minimum: "1" maximum: "32767" default: "1" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "seqbootseq output" information: "Phylip seqboot_seq program output file" relations: "EDAM_data:2245 Sequence set (bootstrapped)" ] boolean: printdata [ additional: "Y" default: "N" information: "Print out the data at start of run" relations: "EDAM_data:2527 Parameter" ] boolean: dotdiff [ additional: "$(printdata)" default: "Y" information: "Use dot-differencing" relations: "EDAM_data:2527 Parameter" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/fconsense.acd0000664000175000017500000000506111727433154015032 00000000000000application: fconsense [ documentation: "Majority-rule and strict consensus tree" groups: "Phylogeny:Consensus" embassy: "phylipnew" relations: "EDAM_topic:0084 Phylogenetics" relations: "EDAM_operation:0555 Phylogenetic tree construction (consensus)" ] section: input [ information: "Input section" type: "page" ] tree: intreefile [ parameter: "Y" knowntype: "newick" information: "Phylip tree file" relations: "EDAM_data:0872 Phylogenetic tree" ] endsection: input section: additional [ information: "Additional section" type: "page" ] list: method [ additional: "Y" minimum: "1" maximum: "1" information: "Consensus method" values: "s:strict consensus tree; mr:Majority Rule; mre:Majority Rule (extended); ml:Minimum fraction (0.5 to 1.0)" default: "mre" relations: "EDAM_data:2527 Parameter" ] float: mlfrac [ additional: "@($(method)==ml)" minimum: "0.5" maximum: "1.0" default: "0.5" information: "Fraction (l) of times a branch must appear" relations: "EDAM_data:2527 Parameter" ] toggle: root [ additional: "Y" default: "N" information: "Trees to be treated as Rooted" relations: "EDAM_data:2527 Parameter" ] integer: outgrno [ additional: "Y" minimum: "0" default: "0" information: "Species number to use as outgroup" relations: "EDAM_data:2527 Parameter" ] endsection: additional section: output [ information: "Output section" type: "page" ] outfile: outfile [ parameter: "Y" knowntype: "fconsense output" information: "Phylip consense program output file" relations: "EDAM_data:0872 Phylogenetic tree" ] toggle: trout [ additional: "Y" default: "Y" information: "Write out trees to tree file" relations: "EDAM_data:2527 Parameter" ] outfile: outtreefile [ additional: "$(trout)" extension: "treefile" information: "Phylip tree output file (optional)" knowntype: "newick tree" nullok: "Y" relations: "EDAM_data:0872 Phylogenetic tree" ] boolean: progress [ additional: "Y" default: "Y" information: "Print indications of progress of run" relations: "EDAM_data:2527 Parameter" ] boolean: treeprint [ additional: "Y" default: "Y" information: "Print out tree" relations: "EDAM_data:2527 Parameter" ] boolean: prntsets [ additional: "Y" default: "Y" information: "Print out the sets of species" relations: "EDAM_data:2527 Parameter" ] endsection: output PHYLIPNEW-3.69.650/emboss_acd/.cvsignore0000664000175000017500000000002511326104676014370 00000000000000Makefile Makefile.in PHYLIPNEW-3.69.650/AUTHORS0000664000175000017500000000123711253743723011350 00000000000000This is an EMBOSS port of the PHYLIP 3.69 package from Joe Felsenstein at the University of Washington. The original version of PHYLIP os available from http://evolution.gs.washington.edu/phylip/software.html The EMBOSS port involves replacing the user interface with an EMBOSS ACD file and using EMBOSS to parse the input data. Once data has been loaded, all the algorithms and outputs use the original PHYLIP code. In a future release we may add EMBOSS output code so that the user has a choice of output formats. The EMBOSS versions of the programs all have 'f' added to the start of the name so you can run native PHYLIP on the same system to check your results. PHYLIPNEW-3.69.650/configure.in0000664000175000017500000006435111774774007012626 00000000000000# -*- Autoconf -*- # Configure template for the EMBOSS package. # Process this file with autoconf to produce a configure script. AC_PREREQ([2.64]) AC_INIT([PHYLIPNEW], [3.69.650], [emboss-bug@emboss.open-bio.org], [PHYLIPNEW], [http://emboss.open-bio.org/]) AC_REVISION([$Revision: 1.39 $]) AC_CONFIG_SRCDIR([src/dnadist.c]) AC_CONFIG_HEADERS([src/config.h]) AC_CONFIG_MACRO_DIR([m4]) # Make sure CFLAGS is defined to stop AC_PROG_CC adding -g. CFLAGS="${CFLAGS} " # Checks for programs. AC_PROG_AWK AC_PROG_CC([icc gcc cc]) AC_PROG_CXX([icpc g++]) AC_PROG_CPP AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET AC_PROG_MKDIR_P AM_INIT_AUTOMAKE # Use libtool to make a shared library. LT_INIT # Check if 64 bit pointer support is required on 32 bit machines # Disabled by default AC_ARG_ENABLE([64], [AS_HELP_STRING([--enable-64], [64 bit pointers on 32 bit machines])]) AS_IF([test "x${enable_64}" = "xyes"], [ AC_MSG_CHECKING([for 64bit compilation support]) AS_CASE([${host_os}], [aix*], [ CPPFLAGS="-DAJ_AIX64 ${CPPFLAGS}" AS_CASE([${CC}], [gcc], [], [ AS_VAR_APPEND([CC], [" -q64"]) ]) NM="nm -B -X 64" AR="ar -X 64" ], [hpux*], [ AS_CASE([${CC}], [gcc], [], [ AS_VAR_APPEND([CC], [" +DD64"]) ]) AC_DEFINE([HPUX64PTRS], [1], [Set to 1 if HPUX 64bit ptrs on 32 bit m/c]) ]) AC_MSG_RESULT([done]) ]) # Compiler optimisations # The Solaris 64bit ptr check has to be done here owing to param order AC_ARG_WITH([optimisation], [AS_HELP_STRING([--without-optimisation], [Disable compiler optimisation])]) AS_IF([test "x${with_optimisation}" != "xno"], [ AS_CASE([${CC}], [gcc], [ # Intel MacOSX requires reduced optimisation for PCRE code # other OSs just use -O2 AS_CASE([${host_os}], [darwin*], [ AS_IF([test "x${host_cpu}" = "xi386"], [AS_VAR_APPEND([CFLAGS], [" -O1"])], [AS_VAR_APPEND([CFLAGS], [" -O2"])]) ], [ AS_VAR_APPEND([CFLAGS], [" -O2"]) ]) ], [ AS_CASE([${host_os}], [aix*], [ AS_VAR_APPEND([CFLAGS], [" -O3 -qstrict -qarch=auto -qtune=auto"]) ], [irix*], [ LD="/usr/bin/ld -IPA" AS_VAR_APPEND([CFLAGS], [" -O3"]) ], [hpux*], [ AS_VAR_APPEND([CFLAGS], [" -fast"]) ], [osf*], [ AS_VAR_APPEND([CFLAGS], [" -fast -U_FASTMATH"]) ], [solaris*], [ AS_VAR_APPEND([CFLAGS], [" -O"]) # test for 64 bit ptr here (see Solaris 64bit above) AS_IF([test "x${enable_64}" = "xyes"], [AS_VAR_APPEND([CFLAGS], [" -xtarget=ultra -xarch=v9"])]) ], [linux*], [ # Default optimisation for non-gcc compilers under Linux AS_VAR_APPEND([CFLAGS], [" -O2"]) ], [freebsd*], [ AS_VAR_APPEND([CFLAGS], [" -O2"]) ]) ]) ]) # Compiler warning settings: --enable-warnings, defines WARN_CFLAGS AC_ARG_ENABLE([warnings], [AS_HELP_STRING([--enable-warnings], [compiler warnings])]) AS_IF([test "x${enable_warnings}" = "xyes"], [ AS_CASE([${CC}], [gcc], [ # -Wall priovides: # -Waddress # -Warray-bounds (only with -O2) # -Wc++0x-compat # -Wchar-subscripts # -Wenum-compare (in C/Objc; this is on by default in C++) # -Wimplicit-int (C and Objective-C only) # -Wimplicit-function-declaration (C and Objective-C only) # -Wcomment # -Wformat # -Wmain (only for C/ObjC and unless -ffreestanding) # -Wmissing-braces # -Wnonnull # -Wparentheses # -Wpointer-sign # -Wreorder # -Wreturn-type # -Wsequence-point # -Wsign-compare (only in C++) # -Wstrict-aliasing # -Wstrict-overflow=1 # -Wswitch # -Wtrigraphs # -Wuninitialized # -Wunknown-pragmas # -Wunused-function # -Wunused-label # -Wunused-value # -Wunused-variable # -Wvolatile-register-var AS_VAR_SET([WARN_CFLAGS], ["-Wall -fno-strict-aliasing"]) ]) ]) AC_SUBST([WARN_CFLAGS]) # Compiler developer warning settings: --enable-devwarnings, # sets DEVWARN_CFLAGS AC_ARG_ENABLE([devwarnings], [AS_HELP_STRING([--enable-devwarnings], [strict compiler warnings for developers])]) AS_IF([test "x${enable_devwarnings}" = "xyes"], [ AS_CASE([${CC}], [gcc], [ # Only -Wstrict-prototypes and -Wmissing-prototypes are set in this # EMBASSY module. AS_VAR_SET([DEVWARN_CFLAGS], ["-Wstrict-prototypes -Wmissing-prototypes"]) # Diagnostic options for the GNU GCC compiler version 4.6.1. # http://gcc.gnu.org/onlinedocs/gcc-4.6.1/gcc/Warning-Options.html # # -Wextra: more warnings beyond what -Wall provides # -Wclobbered # -Wempty-body # -Wignored-qualifiers # -Wmissing-field-initializers # -Wmissing-parameter-type (C only) # -Wold-style-declaration (C only) # -Woverride-init # -Wsign-compare # -Wtype-limits # -Wuninitialized # -Wunused-parameter (only with -Wunused or -Wall) # -Wunused-but-set-parameter (only with -Wunused or -Wall) # AS_VAR_SET([DEVWARN_CFLAGS], ["-Wextra"]) # Warn if a function is declared or defined without specifying the # argument types. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wstrict-prototypes"]) # Warn if a global function is defined without a previous prototype # declaration. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-prototypes"]) # Warn for obsolescent usages, according to the C Standard, # in a declaration. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wold-style-definition"]) # Warn if a global function is defined without a previous declaration. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-declarations"]) # When compiling C, give string constants the type const char[length] # so that copying the address of one into a non-const char * pointer # will get a warning. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wwrite-strings"]) # Warn whenever a local variable or type declaration shadows another # variable, parameter, type, or class member (in C++), or whenever a # built-in function is shadowed. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wshadow"]) # Warn when a declaration is found after a statement in a block. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wdeclaration-after-statement"]) # Warn if an undefined identifier is evaluated in an `#if' directive. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wundef"]) # Warn about anything that depends on the "size of" a function type # or of void. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wpointer-arith"]) # Warn whenever a pointer is cast so as to remove a type qualifier # from the target type. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcast-qual"]) # Warn whenever a pointer is cast such that the required alignment # of the target is increased. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcast-align"]) # Warn whenever a function call is cast to a non-matching type. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wbad-function-cast"]) # Warn when a comparison between signed and unsigned values could # produce an incorrect result when the signed value is converted to # unsigned. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wsign-compare"]) # Warn if a structure's initializer has some fields missing. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-field-initializers"]) # An alias of the new option -Wsuggest-attribute=noreturn # Warn for cases where adding an attribute may be beneficial. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-noreturn"]) # Warn if an extern declaration is encountered within a function. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wnested-externs"]) # Warn if anything is declared more than once in the same scope, # even in cases where multiple declaration is valid and changes # nothing. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wredundant-decls"]) # Warn if the loop cannot be optimized because the compiler could not # assume anything on the bounds of the loop indices. # -Wunsafe-loop-optimizations objects to loops with increments more # than 1 because if the end is at INT_MAX it could run forever ... # rarely # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wunsafe-loop-optimizations"]) # Warn for implicit conversions that may alter a value. # -Wconversion is brain-damaged - complains about char arguments # every time # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wconversion"]) # Warn about certain constructs that behave differently in traditional # and ISO C. # -Wtraditional gives #elif and #error msgs # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wtraditional"]) # Warn if floating point values are used in equality comparisons. # -Wfloat-equal will not allow tests for values still 0.0 # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wfloat-equal"]) # This option is only active when -ftree-vrp is active # (default for -O2 and above). It warns about subscripts to arrays # that are always out of bounds. # -Warray-bounds gives false positives in gcc 4.6.0 # Disable rather than use a non-portable pragma # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wno-array-bounds"]) ], [icc], [ # Diagnostic options for the Intel(R) C++ compiler version 11.1. # http://software.intel.com/en-us/articles/intel-c-compiler-professional-edition-for-linux-documentation/ # This option specifies the level of diagnostic messages to be # generated by the compiler. AS_VAR_SET([DEVWARN_CFLAGS], ["-w2"]) # This option determines whether a warning is issued if generated # code is not C++ ABI compliant. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wabi"]) # This option tells the compiler to display errors, warnings, and # remarks. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wall"]) # This option tells the compiler to display a shorter form of # diagnostic output. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wbrief"]) # This option warns if cast is used to override pointer type # qualifier AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcast-qual"]) # This option tells the compiler to perform compile-time code # checking for certain code. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcheck"]) # This option determines whether a warning is issued when /* # appears in the middle of a /* */ comment. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcomment"]) # Set maximum number of template instantiation contexts shown in # diagnostic. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcontext-limit=n"]) # This option enables warnings for implicit conversions that may # alter a value. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wconversion"]) # This option determines whether warnings are issued for deprecated # features. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wdeprecated"]) # This option enables warnings based on certain C++ programming # guidelines. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Weffc++"]) # This option changes all warnings to errors. # Alternate: -diag-error warn # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Werror"]) # This option changes all warnings and remarks to errors. # Alternate: -diag-error warn, remark # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Werror-all"]) # This option determines whether warnings are issued about extra # tokens at the end of preprocessor directives. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wextra-tokens"]) # This option determines whether argument checking is enabled for # calls to printf, scanf, and so forth. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wformat"]) # This option determines whether the compiler issues a warning when # the use of format functions may cause security problems. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wformat-security"]) # This option enables diagnostics about what is inlined and what is # not inlined. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Winline"]) # This option determines whether a warning is issued if the return # type of main is not expected. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmain"]) # This option determines whether warnings are issued for global # functions and variables without prior declaration. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-declarations"]) # Determines whether warnings are issued for missing prototypes. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-prototypes"]) # This option enables warnings if a multicharacter constant # ('ABC') is used. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmultichar"]) # Issue a warning when a class appears to be polymorphic, # yet it declares a non-virtual one. # This option is supported in C++ only. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wnon-virtual-dtor"]) # This option warns about operations that could result in # integer overflow. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Woverflow"]) # This option tells the compiler to display diagnostics for 64-bit # porting. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wp64"]) # Determines whether warnings are issued for questionable pointer # arithmetic. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wpointer-arith"]) # his option determines whether a warning is issued about the # use of #pragma once. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wpragma-once"]) # Issue a warning when the order of member initializers does not # match the order in which they must be executed. # This option is supported with C++ only. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wreorder"]) # This option determines whether warnings are issued when a function # uses the default int return type or when a return statement is # used in a void function. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wreturn-type"]) # This option determines whether a warning is issued when a variable # declaration hides a previous declaration. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wshadow"]) # This option warns for code that might violate the optimizer's # strict aliasing rules. Warnings are issued only when using # -fstrict-aliasing or -ansi-alias. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wstrict-aliasing"]) # This option determines whether warnings are issued for functions # declared or defined without specified argument types. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wstrict-prototypes"]) # This option determines whether warnings are issued if any trigraphs # are encountered that might change the meaning of the program. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wtrigraphs"]) # This option determines whether a warning is issued if a variable # is used before being initialized. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wuninitialized"]) # This option determines whether a warning is issued if an unknown # #pragma directive is used. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wunknown-pragmas"]) # This option determines whether a warning is issued if a declared # function is not used. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wunused-function"]) # This option determines whether a warning is issued if a local or # non-constant static variable is unused after being declared. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wunused-variable"]) # This option issues a diagnostic message if const char* is # converted to (non-const) char *. AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wwrite-strings"]) # Disable warning #981 operands are evaluated in unspecified order # http://software.intel.com/en-us/articles/cdiag981/ AS_VAR_APPEND([DEVWARN_CFLAGS], [" -diag-disable 981"]) ]) ]) AC_SUBST([DEVWARN_CFLAGS]) # Compiler extra developer warning settings: --enable-devextrawarnings, # appends DEVWARN_CFLAGS # Will only have an effect if --enable-devwarnings also given AC_ARG_ENABLE([devextrawarnings], [AS_HELP_STRING([--enable-devextrawarnings], [add extra warnings to devwarnings])]) AS_IF([test "x${enable_devwarnings}" = "xyes" && test "x${enable_devextrawarnings}" = "xyes"], [ AS_CASE([${CC}], [gcc], [ # flags used by Ubuntu 8.10 to check open has 2/3 arguments etc. AC_DEFINE([_FORTIFY_SOURCE], [2], [Set to 2 for open args]) # compiler flags CPPFLAGS="-fstack-protector ${CPPFLAGS}" # warnings used by Ubuntu 8.10 # -Wall already includes: # -Wformat AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wformat-security -Wl,-z,relro"]) # -Wpadded means moving char to end of structs - but also flags # end of struct so need to add padding at end AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wpadded"]) ]) ]) # Compile deprecated functions still used in the book text for 6.2.0 AC_ARG_ENABLE([buildbookdeprecated], [AS_HELP_STRING([--enable-buildbookdeprecated], [build deprecated functions used in books for 6.2.0])]) # Compile all deprecated functions AC_ARG_ENABLE([buildalldeprecated], [AS_HELP_STRING([--enable-buildalldeprecated], [build all deprecated functions])]) AS_IF([test "x${enable_buildbookdeprecated}" = "xyes" || test "x${enable_buildalldeprecated}" = "xyes"], [ AC_DEFINE([AJ_COMPILE_DEPRECATED_BOOK], [1], [Define to 1 to compile deprecated functions used in book texts for 6.2.0]) ]) AS_IF([test "x${enable_buildalldeprecated}" = "xyes"], [ AC_DEFINE([AJ_COMPILE_DEPRECATED], [1], [Define to 1 to compile all deprecated functions]) ]) # Add extensions to Solaris for some reentrant functions AS_CASE([${host_os}], [solaris*], [AS_VAR_APPEND([CFLAGS], [" -D__EXTENSIONS__"])]) # Test whether --with-sgiabi given for IRIX (n32m3 n32m4 64m3 64m4) AS_CASE([${host_os}], [irix*], [ AS_CASE([${CC}], [gcc], [], [cc], [CHECK_SGI]) ]) dnl PCRE library definitions - see the MAJOR and MINOR values dnl to see which version's configure.in these lines come from dnl Provide the current PCRE version information. Do not use numbers dnl with leading zeros for the minor version, as they end up in a C dnl macro, and may be treated as octal constants. Stick to single dnl digits for minor numbers less than 10. There are unlikely to be dnl that many releases anyway. PCRE_MAJOR="7" PCRE_MINOR="9" PCRE_DATE="11-Apr-2009" PCRE_VERSION="${PCRE_MAJOR}.${PCRE_MINOR}" dnl Default values for miscellaneous macros POSIX_MALLOC_THRESHOLD="-DPOSIX_MALLOC_THRESHOLD=10" dnl Provide versioning information for libtool shared libraries that dnl are built by default on Unix systems. PCRE_LIB_VERSION="0:1:0" PCRE_POSIXLIB_VERSION="0:0:0" dnl FIXME: This does no longer seem required with Autoconf 2.67? dnl Intel MacOSX 10.6 puts X11 in a non-standard place dnl AS_IF([test "x${with_x}" != "xno"], dnl [ dnl AS_CASE([${host_os}], dnl [darwin*], dnl [ dnl OSXX=`sw_vers -productVersion | sed 's/\(10\.[[0-9]]*\).*/\1/'` dnl AS_IF([test ${OSXX} '>' '10.4'], dnl [AS_VAR_APPEND([CFLAGS], [" -I/usr/X11/include -L/usr/X11/lib"])]) dnl ]) dnl ]) # Checks for header files. AC_PATH_XTRA AC_HEADER_DIRENT AC_HEADER_STDC AC_CHECK_HEADERS([unistd.h TargetConfig.h]) # Checks for typedefs, structures, and compiler characteristics. AC_C_BIGENDIAN AC_C_CONST AC_C_INLINE AC_TYPE_PID_T AC_TYPE_SIZE_T AC_STRUCT_TM # Checks for library functions. AC_FUNC_GETPGRP AC_FUNC_STRFTIME AC_FUNC_FORK AC_FUNC_VPRINTF AC_CHECK_FUNCS([strdup strstr strchr erand48 memmove]) AS_IF([test "x${with_x}" != "xno"], [ LF_EMBOSS_PATH_XLIB havexawh="1" AC_CHECK_HEADER([X11/Xaw/Label.h], [], [havexawh="0"]) AS_IF([test "x${havexawh}" = "x0"], [ ### FIXME: Should be an error condition. AC_MSG_NOTICE([You need to install the Xaw development files for your system]) exit $? ]) havexawlib="1" AC_CHECK_LIB([Xaw], [XawInitializeWidgetSet], [XLIB="$XLIB -lXaw"], [havexawlib="0"], [${XLIB}]) AS_IF([test "x${havexawlib}" = "x0"], [ ### FIXME: Should be an error condition. AC_MSG_NOTICE([You need to install the Xaw library files for your system]) exit $? ]) havextlib="1" AC_CHECK_LIB([Xt], [XtToolkitInitialize], [XLIB="$XLIB -lXt"], [havextlib="0"], [${XLIB}]) AS_IF([test "x${havextlib}" = "x0"], [ ### FIXME: Should be an error condition. AC_MSG_NOTICE([You need to install the Xt library files for your system]) exit $? ]) ### FIXME: This is already defined in the Autoconf module libraries.m4 # AC_SUBST(XLIB) ]) # Library checks. AC_CHECK_LIB([c], [socket], [LIBS="${LIBS}"], [LIBS="${LIBS} -lsocket"]) AC_CHECK_LIB([m], [main]) # GD for FreeBSD requires libiconv AS_CASE([${host_os}], [freebsd*], [ AS_IF([test "x${with_pngdriver}" != "xno"], [AC_CHECK_LIB([iconv], [main], [LIBS="${LIBS}"], [LIBS="-liconv ${LIBS}"])]) ]) AM_CONDITIONAL([AMPNG], [false]) AM_CONDITIONAL([AMPDF], [false]) CHECK_GENERAL CHECK_JAVA CHECK_HPDF CHECK_PNGDRIVER AX_LIB_MYSQL AX_LIB_POSTGRESQL dnl "Export" these variables for PCRE AC_SUBST([HAVE_MEMMOVE]) AC_SUBST([HAVE_STRERROR]) AC_SUBST([PCRE_MAJOR]) AC_SUBST([PCRE_MINOR]) AC_SUBST([PCRE_DATE]) AC_SUBST([PCRE_VERSION]) AC_SUBST([PCRE_LIB_VERSION]) AC_SUBST([PCRE_POSIXLIB_VERSION]) AC_SUBST([POSIX_MALLOC_THRESHOLD]) dnl Test if --enable-localforce given locallink="no" embprefix="/usr/local" AC_ARG_ENABLE([localforce], [AS_HELP_STRING([--enable-localforce], [force compile/link against /usr/local])]) AS_IF([test "x${enable_localforce}" = "xyes"], [embprefix="/usr/local"]) AS_IF([test "x${prefix}" = "xNONE"], [ AS_IF([test "x${enable_localforce}" != "xyes"], [locallink="yes"], [ locallink="no" embprefix="/usr/local" ]) ], [ embprefix="${prefix}" ]) AM_CONDITIONAL([LOCALLINK], [test "x${locallink}" = "xyes"]) AC_SUBST([embprefix]) # Enable debugging: --enable-debug, sets CFLAGS AC_ARG_ENABLE([debug], [AS_HELP_STRING([--enable-debug], [debug (-g option on compiler)])]) AS_IF([test "x${enable_debug}" = "xyes"], [AS_VAR_APPEND([CFLAGS], [" -g"])]) # Turn off irritating linker warnings in IRIX AS_CASE([${host_os}], [irix*], [ CFLAGS="-Wl,-LD_MSG:off=85:off=84:off=16:off=134 ${CFLAGS}" ]) # Enable the large file interface: --enable-large, appends CPPFLAGS AC_ARG_ENABLE([large], [AS_HELP_STRING([--enable-large], [over 2Gb file support @<:@default=yes@:>@])]) AC_MSG_CHECKING([for large file support]) AS_IF([test "x${enable_large}" = "xno"], [ AC_MSG_RESULT([no]) ], [ AS_CASE([${host_os}], [linux*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_LinuxLF"]) AS_VAR_APPEND([CPPFLAGS], [" -D_LARGEFILE_SOURCE"]) AS_VAR_APPEND([CPPFLAGS], [" -D_LARGEFILE64_SOURCE"]) AS_VAR_APPEND([CPPFLAGS], [" -D_FILE_OFFSET_BITS=64"]) ], [freebsd*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_FreeBSDLF"]) ], [solaris*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_SolarisLF"]) AS_VAR_APPEND([CPPFLAGS], [" -D_LARGEFILE_SOURCE"]) AS_VAR_APPEND([CPPFLAGS], [" -D_FILE_OFFSET_BITS=64"]) ], [osf*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_OSF1LF"]) ], [irix*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_IRIXLF"]) AS_VAR_APPEND([CPPFLAGS], [" -D_LARGEFILE64_SOURCE"]) ], [aix*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_AIXLF"]) AS_VAR_APPEND([CPPFLAGS], [" -D_LARGE_FILES"]) ], [hpux*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_HPUXLF"]) AS_VAR_APPEND([CPPFLAGS], [" -D_LARGEFILE_SOURCE"]) AS_VAR_APPEND([CPPFLAGS], [" -D_FILE_OFFSET_BITS=64"]) ], [darwin*], [ AS_VAR_APPEND([CPPFLAGS], [" -DAJ_MACOSXLF"]) ]) AC_MSG_RESULT([yes]) ]) # Enable libraries provided by the system rather than EMBOSS: # --enable-systemlibs, sets ESYSTEMLIBS AC_ARG_ENABLE([systemlibs], [AS_HELP_STRING([--enable-systemlibs], [utility for RPM/dpkg bundles])]) AM_CONDITIONAL([ESYSTEMLIBS], [test "x${enable_systemlibs}" = "xyes"]) # Enable the purify tool: --enable-purify, sets CC and LIBTOOL AC_ARG_ENABLE([purify], [AS_HELP_STRING([--enable-purify], [purify])]) AC_MSG_CHECKING([for purify]) AS_IF([test "x${enable_purify}" = "xyes"], [ dnl if(purify -version) < /dev/null > /dev/null 2>&1; then CC="purify --chain-length=20 -best-effort -windows=yes gcc -g" LIBTOOL="${LIBTOOL} --tag=CC" AC_MSG_RESULT([yes]) dnl fi ], [ AC_MSG_RESULT([no]) ]) dnl Set extra needed compiler flags if test "x${CC}" = "xcc"; then case "${host}" in alpha*-dec-osf*) CFLAGS="${CFLAGS} -ieee";; esac fi AM_CONDITIONAL([PURIFY], [test "x${enable_purify}" = "xyes"]) dnl Test for cygwin to set AM_LDFLAGS in library & apps Makefile.ams dnl Replaces original version which used 'expr' and so wasn't entirely dnl portable. platform_cygwin="no" AC_MSG_CHECKING([for cygwin]) case "${host}" in *-*-mingw*|*-*-cygwin*) platform_cygwin="yes" ;; *) platform_cygwin="no" ;; esac AC_MSG_RESULT([${platform_cygwin}]) AM_CONDITIONAL([ISCYGWIN], [test "x${platform_cygwin}" = "xyes"]) dnl Tests for AIX dnl If shared needs -Wl,-G in plplot,ajax,nucleus, -lX11 in plplot, dnl and -Wl,brtl -Wl,-bdynamic in emboss dnl We therefore need a static test as well needajax="no" AS_CASE([${host_os}], [aix*], [AM_CONDITIONAL([ISAIXIA64], [true])], [AM_CONDITIONAL([ISAIXIA64], [false])]) AM_CONDITIONAL([ISSHARED], [test "x${enable_shared}" = "xyes"]) AS_CASE([${host_os}], [aix*], [ AS_IF([test -d ajax/.libs], [AS_ECHO(["AIX ajax/.libs exists"])], [mkdir ajax/.libs]) AS_CASE([${host_os}], [aix5*], [needajax="no"], [aix4.3.3*], [needajax="yes"], [needajax="no"]) ]) AM_CONDITIONAL([NEEDAJAX], [test "x${needajax}" = "xyes"]) # HP-UX needs -lsec for shadow passwords AS_CASE([${host_os}], [hpux*], [AS_VAR_APPEND([LDFLAGS], [" -lsec"])]) # GNU mcheck functions: --enable-mcheck, defines HAVE_MCHECK AC_ARG_ENABLE([mcheck], [AS_HELP_STRING([--enable-mcheck], [mcheck and mprobe memory allocation test])]) AS_IF([test "x${enable_mcheck}" = "xyes"], [AC_CHECK_FUNCS([mcheck])]) # Collect AJAX statistics: --enable-savestats, defines AJ_SAVESTATS AC_ARG_ENABLE([savestats], [AS_HELP_STRING([--enable-savestats], [save AJAX statistics and print with debug output])]) AC_MSG_CHECKING([for savestats]) AS_IF([test "x${enable_savestats}" = "xyes"], [ AC_DEFINE([AJ_SAVESTATS], [1], [Define to 1 to collect AJAX library usage statistics.]) AC_MSG_RESULT([yes]) ], [ AC_MSG_RESULT([no]) ]) AC_CONFIG_FILES([Makefile src/Makefile data/Makefile emboss_acd/Makefile emboss_doc/Makefile emboss_doc/html/Makefile emboss_doc/text/Makefile ]) AC_OUTPUT PHYLIPNEW-3.69.650/doc/0002775000175000017500000000000012171071713011115 500000000000000PHYLIPNEW-3.69.650/doc/restdist.html0000664000175000017500000004216707712247475013613 00000000000000 restdist
version 3.6

RESTDIST -- Program to compute distance matrix
from restriction sites or fragments

© Copyright 2000-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

Restdist reads the same restriction sites format as RESTML and computes a restriction sites distance. It can also compute a restriction fragments distance. The original restriction fragments and restriction sites distance methods were introduced by Nei and Li (1979). Their original method for restriction fragments is also available in this program, although its default methods are my modifications of the original Nei and Li methods.

These two distances assume that the restriction sites are accidental byproducts of random change of nucleotide sequences. For my restriction sites distance the DNA sequences are assumed to be changing according to the Kimura 2-parameter model of DNA change (Kimura, 1980). The user can set the transition/transversion rate for the model. For my restriction fragments distance there is there is an implicit assumption of a Jukes-Cantor (1969) model of change, The user can also set the parameter of a correction for unequal rates of evolution between sites in the DNA sequences, using a Gamma distribution of rates among sites. The Jukes-Cantor model is also implicit in the restriction fragments distance of Nei and Li(1979). It does not allow us to correct for a Gamma distribution of rates among sites.

Restriction Sites Distance

The restriction sites distances use data coded for the presence of absence of individual restriction sites (usually as + and - or 0 and 1). My distance is based on the proportion, out of all sites observed in one species or the other, which are present in both species. This is done to correct for the ascertainment of sites, for the fact that we are not aware of many sites because they do not appear in any species.

My distance starts by computing from the particular pair of species the fraction

                 n++
   f =  ---------------------
         n++ + 1/2 (n+- + n-+)
where n++ is the number of sites contained in both species, n+- is the number of sites contained in the first of the two species but not in the second, and n-+ is the number of sites contained in the second of the two species but not in the first. This is the fraction of sites that are present in one species which are present in both. Since the number of sites present in the two species will often differ, the denominator is the average of the number of sites found in the two species.

If each restriction site is s nucleotides long, the probability that a restriction site is present in the other species, given that it is present in a species, is

      Qs,
where Q is the probability that a nucleotide has no net change as one goes from the one species to the other. It may have changed in between; we are interested in the probability that that nucleotide site is in the same base in both species, irrespective of what has happened in between.

The distance is then computed by finding the branch length of a two-species tree (connecting these two species with a single branch) such that Q equals the s-th root of f. For this the program computes Q for various values of branch length, iterating them by a Newton-Raphson algorithm until the two quantities are equal.

The resulting distance should be numerically close to the original restriction sites distance of Nei and Li (1979). It is inspired by theirs, but theirs differs by implicitly assuming a symmetric Jukes-Cantor (1969) model of nucleotide change, and theirs does not include a correction for Gamma distribution of rate of change among nucleotide sites.

Restriction Fragments Distance

For restriction fragments data we use a different distance. If we average over all restriction fragment lengths, each at its own expected frequency, the probability that the fragment will still be in existence after a certain amount of branch length, we must take into account the probability that the two restriction sites at the ends of the fragment do not mutate, and the probability that no new restriction site occurs within the fragment in that amount of branch length. The result for a restriction site length of s is:

                Q2s
          f = --------
               2 - Qs
(The details of the derivation will be given in my forthcoming book Inferring Phylogenies (to be published by Sinauer Associates in 2001). Given the observed fraction of restriction sites retained, f, we can solve a quadratic equation from the above expression for Qs. That makes it easy to obtain a value of Q, and the branch length can then be estimated by adjusting it so the probability of a base not changing is equal to that value.

Alternatively, if we use the Nei and Li (1979) restriction fragments distance, this involves solving for g in the nonlinear equation

       g  =  [ f (3 - 2g) ]1/4
and then the distance is given by
       d  =  - (2/r) loge(g)
where r is the length of the restriction site.

Comparing these two restriction fragments distances in a case where their underlying DNA model is the same (which is when the transition/transversion ratio of the modified model is set to 0.5), you will find that they are very close to each other, differing very little at small distances, with the modified distance become smaller than the Nei/Li distance at larger distances. It will therefore matter very little which one you use.

A Comment About RAPDs and AFLPs

Although these distances are designed for restriction sites and restriction fragments data, they can be applied to RAPD and AFLP data as well. RAPD (Randomly Amplified Polymorphic DNA) and AFLP (Amplified Fragment Length Polymorphism) data consist of presence or absence of individual bands on a gel. The bands are segments of DNA with PCR primers at each end. These primers are defined sequences of known length (often about 10 nucleotides each). For AFLPs the reolevant length is the primer length, plus three nucleotides. Mutation in these sequences makes them no longer be primers, just as in the case of restriction sites. Thus a pair of 10-nucleotide primers will behave much the same as a 20-nucleotide restriction site. You can use the restriction sites distance as the distance between RAPD or AFLP patterns if you set the proper value for the total length of the site to the total length of the primers (plus 6 in the case of AFLPs). Of course there are many possible sources of noise in these data, including confusing fragments of similar length for each other and having primers near each other in the genome, and these are not taken into account in the statistical model used here.

INPUT FORMAT AND OPTIONS

The input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites, but there is also a third number, which is the number of different restriction enzymes that were used to detect the restriction sites. Thus a data set with 10 species and 35 different sites, representing digestion with 4 different enzymes, would have the first line of the data file look like this:

   10   35    4

The site data are in standard form. Each species starts with a species name whose maximum length is given by the constant "nmlngth" (whose value in the program as distributed is 10 characters). The name should, as usual, be padded out to that length with blanks if necessary. The sites data then follows, one character per site (any blanks will be skipped and ignored). Like the DNA and protein sequence data, the restriction sites data may be either in the "interleaved" form or the "sequential" form. Note that if you are analyzing restriction sites data with the programs DOLLOP or MIX or other discrete character programs, at the moment those programs do not use the "aligned" or "interleaved" data format. Therefore you may want to avoid that format when you have restriction sites data that you will want to feed into those programs.

The presence of a site is indicated by a "+" and the absence by a "-". I have also allowed the use of "1" and "0" as synonyms for "+" and "-", for compatibility with MIX and DOLLOP which do not allow "+" and "-". If the presence of the site is unknown (for example, if the DNA containing it has been deleted so that one does not know whether it would have contained the site) then the state "?" can be used to indicate that the state of this site is unknown.

The options are selected using an interactive menu. The menu looks like this:


Restriction site or fragment distances, version 3.6a3

Settings for this run:
  R           Restriction sites or fragments?  Sites
  G  Gamma distribution of rates among sites?  No
  T            Transition/transversion ratio?  2.000000
  S                              Site length?  6.0
  L                  Form of distance matrix?  Square
  M               Analyze multiple data sets?  No
  I              Input sequences interleaved?  Yes
  0       Terminal type (IBM PC, ANSI, none)?  (none)
  1       Print out the data at start of run?  No
  2     Print indications of progress of run?  Yes

  Y to accept these or type the letter for one to change

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The R option toggles between a restriction sites distance, which is the default setting, and a restriction fragments distance. In the latter case, another option appears, the N (Nei/Li) option. This allows the user to choose the original Nei and Li (1979) restriction fragments distance rather than my modified Nei/Li distance, which is the default.

If the G (Gamma distribution) option is selected, the user will be asked to supply the coefficient of variation of the rate of substitution among sites. This is different from the parameters used by Nei and Jin, who introduced Gamma distribution of rates in DNA distances, but related to their parameters: their parameter a is also known as "alpha", the shape parameter of the Gamma distribution. It is related to the coefficient of variation by

     CV = 1 / a1/2

or

     a = 1 / (CV)2

(their parameter b is absorbed here by the requirement that time is scaled so that the mean rate of evolution is 1 per unit time, which means that a = b). As we consider cases in which the rates are less variable we should set a larger and larger, as CV gets smaller and smaller.

The Gamma distribution option is not available when using the original Nei/Li restriction fragments distance.

The T option is the Transition/transversion option. The user is prompted for a real number greater than 0.0, as the expected ratio of transitions to transversions. Note that this is the resulting expected ratio of transitions to transversions. The default value of the T parameter if you do not use the T option is 2.0. The T option is not available when you choose the original Nei/Li restriction fragment distance, which assumes a Jukes-Cantor (1969) model of DNA change, for which the transition/transversion ratio is in effect fixed at 0.5.

The S option selects the site length. This is set to a default value of 6. It can be set to any positive integer. While in the RESTML program there is an upper limit on the restriction site length (set by memory limitations), in RESTDIST there is no effective limit on the size of the restriction sites. A value of 20, which might be appropriate in many cases for RAPD or AFLP data, is typically not practical in RESTML, but it is useable in RESTDIST.

Option L specifies that the output file will have a square matrix of distances. It can be used to change to lower-triangular data matrices. This will usually not be necessary, but if the distance matrices are going to be very large, this alternative can reduce their size by half. The programs which are to use them should then of course be informed that they can expect lower-triangular distance matrices.

The M, I, and 0 options are the usual Multiple data set, Interleaved input, and screen terminal type options. These are described in the main documentation file.

Option 1 specifies that the input data will be written out on the output file before the distances. This is off by default. If it is done, it will make the output file unusable as input to our distance matrix programs.

Option 2 turns off or on the indications of the progress of the run. The program prints out a row of dots (".") indicating the calculation of individual distances. Since the distance matrix is symmetrical, the program only computes the distances for the upper triangle of the distance matrix, and then duplicates the distance to the other corner of the matrix. Thus the rows of dots start out of full length, and then egt shorter and shorter.

OUTPUT FORMAT

The output file contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. If the L option is used, the matrix or distances is in lower triangular form, so that only the distances to the other species that precede each species are printed. Otherwise the distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs.

If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters.

The distances printed out are scaled in terms of expected numbers of substitutions per DNA site, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0. Thus when the G option is used, the rate of change at one site may be higher than at another, but their mean is expected to be 1.

PROGRAM CONSTANTS

The constants available to be changed are "initialv" and "iterationsr". The constant "initialv" is the starting value of the distance in the iterations. This will typically not need to be changed. The constant "iterationsr" is the number of times that the Newton-Raphson method which is used to solve the equations for the distances is iterated. The program can be speeded up by reducing the number of iterations from the default value of 20, but at the possible risk of computing the distance less accurately.

FUTURE OF THE PROGRAM

The present program does not compute the original distance of Nei and Li (1979) for restriction sites (though it does have an option to compute their original distance for restriction fragments). I hope to add their restriction sites distance in the near future.


TEST DATA SET

   5   13   2
Alpha     ++-+-++--+++-
Beta      ++++--+--+++-
Gamma     -+--+-++-+-++
Delta     ++-+----++---
Epsilon   ++++----++---


CONTENTS OF OUTPUT FILE (with all numerical options on)

(Note that when the options for displaying the input data are turned off, the output is in a form suitable for use as an input file in the distance matrix programs).


    5 Species,   13 Sites

Name            Sites
----            -----

Alpha        ++-+-++--+ ++-
Beta         ++++--+--+ ++-
Gamma        -+--+-++-+ -++
Delta        ++-+----++ ---
Epsilon      ++++----++ ---


Alpha       0.0000  0.0224  0.1077  0.0688  0.0826
Beta        0.0224  0.0000  0.1077  0.0688  0.0442
Gamma       0.1077  0.1077  0.0000  0.1765  0.1925
Delta       0.0688  0.0688  0.1765  0.0000  0.0197
Epsilon     0.0826  0.0442  0.1925  0.0197  0.0000
PHYLIPNEW-3.69.650/doc/clique.html0000664000175000017500000002213107712247475013221 00000000000000 clique

version 3.6

CLIQUE -- Compatibility Program

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

Note: Clique is an Old Style program. This means that it takes some of its options information, notably the Weights, Ancestral states and Factors options from the input file rather than from separate files of their own as the New Style programs in this version of PHYLIP do.

This program uses the compatibility method for unrooted two-state characters to obtain the largest cliques of characters and the trees which they suggest. This approach originated in the work of Le Quesne (1969), though the algorithms were not precisely specified until the later work of Estabrook, Johnson, and McMorris (1976a, 1976b). These authors proved the theorem that a group of two-state characters which were pairwise compatible would be jointly compatible. This program uses an algorithm inspired by the Kent Fiala - George Estabrook program CLINCH, though closer in detail to the algorithm of Bron and Kerbosch (1973). I am indebted to Kent Fiala for pointing out that paper to me, and to David Penny for decribing to me his branch-and-bound approach to finding largest cliques, from which I have also borrowed. I am particularly grateful to Kent Fiala for catching a bug in versions 2.0 and 2.1 which resulted in those versions failing to find all of the cliques which they should. The program computes a compatibility matrix for the characters, then uses a recursive procedure to examine all possible cliques of characters.

After one pass through all possible cliques, the program knows the size of the largest clique, and during a second pass it prints out the cliques of the right size. It also, along with each clique, prints out a the tree suggested by that clique.

INPUT, OUTPUT, AND OPTIONS

Input to the algorithm is standard, but the "?", "P", and "B" states are not allowed. This is a serious limitation of this program. If you want to find large cliques in data that has "?" states, I recommend that you use MIX instead with the T (Threshold) option and the value of the threshold set to 2.0. The theory underlying this is given in my paper on character weighting (Felsenstein, 1981b).

The options are chosen from a menu, which looks like this:


Largest clique program, version 3.6a3

Settings for this run:
  A   Use ancestral states in input file?  No
  C          Specify minimum clique size?  No
  O                        Outgroup root?  No, use as outgroup species  1
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  none
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3        Print out compatibility matrix  No
  4                        Print out tree  Yes
  5       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

The A (Ancestors), O (Outgroup) and M (Multiple Data Sets) options are the usual ones, described in the main documentation file. However, Clique being an Old Style program, the options information for the Ancestors, Factors, and Weights options must be specified, not in separate files, but in the input data file. This is done by putting the letters A, F, or W on the first line of the input file, separated by blanks from the number of characters. The options information then follows, with the first line of each option's information starting with its letter, followed by at least 9 spaces or other characters to fill out to the length of a species name before the options information occurs: You can continue to a new line within the options information at any time. Here is a simple example:

     5    6 FAW
WEIGHTS   111101
ANCESTORS 001111
FACTORS   112234
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

If you use option A (Ancestors) you should also choose it in the menu. The compatibility matrix calculation in effect assumes if the Ancestors option is invoked that there is in the data another species that has all the ancestral states. This changes the compatibility patterns in the proper way. The Ancestors option also requires information on the ancestral states of each character to be in the input file.

The Outgroup option will take effect only if the tree is not rooted by the Ancestral States option.

The C (Clique Size) option indicates that you wish to specify a minimum clique size and print out all cliques (and their associated trees) greater than or equal to than that size. The program prompts you for the minimum clique size.

Note that this allows you to list all cliques (each with its tree) by simply setting the minimum clique size to 1. If you do one run and find that the largest clique has 23 characters, you can do another run with the minimum clique size set at 18, thus listing all cliques within 5 characters of the largest one.

Output involves a compatibility matrix (using the symbols "." and "1") and the cliques and trees.

If you have used the F option there will be two lists of characters for each clique, one the original multistate characters and the other the binary characters. It is the latter that are shown on the tree. When the F option is not used the output and the cliques reflect only the binary characters.

The trees produced have it indicated on each branch the points at which derived character states arise in the characters that define the clique. There is a legend above the tree showing which binary character is involved. Of course if the tree is unrooted you can read the changes as going in either direction.

The program runs very quickly but if the maximum number of characters is large it will need a good deal of storage, since the compatibility matrix requires ActualChars x ActualChars boolean variables, where ActualChars is the number of characters (in the case of the factors option the total number of true multistate characters.

ASSUMPTIONS

Basically the following assumptions are made:

  1. Each character evolves independently.
  2. Different lineages evolve independently.
  3. The ancestral state is not known.
  4. Each character has a small chance of being one which evolves so rapidly, or is so thoroughly misinterpreted, that it provides no information on the tree.
  5. The probability of a single change in a character (other than in the high rate characters) is low but not as low as the probability of being one of these "bad" characters.
  6. The probability of two changes in a low-rate character is much less than the probability that it is a high-rate character.
  7. The true tree has segments which are not so unequal in length that two changes in a long are as easy to envisage as one change in a short segment.

The assumptions of compatibility methods have been treated in several of my papers (1978b, 1979, 1981b, 1988b), especially the 1981 paper. For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

A constant available for alteration at the beginning of the program is the form width, "FormWide", which you may want to change to make it as large as possible consistent with the page width available on your output device, so as to avoid the output of cliques and of trees getting wrapped around unnecessarily.


TEST DATA SET

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110


TEST SET OUTPUT (with all numerical options on)


Largest clique program, version 3.6a3


Character Compatibility Matrix (1 if compatible)
--------- ------------- ------ -- -- -----------

                     111..1
                     111..1
                     111..1
                     ...111
                     ...111
                     111111


Largest Cliques
------- -------


Characters: (  1  2  3  6)


  Tree and characters:

     2  1  3  6
     0  0  1  1

             +1-Delta     
       +0--1-+
  +--0-+     +--Epsilon   
  !    !
  !    +--------Gamma     
  !
  +-------------Alpha     
  !
  +-------------Beta      

remember: this is an unrooted tree!


PHYLIPNEW-3.69.650/doc/penny.html0000664000175000017500000006110107712247475013070 00000000000000 penny

version 3.6

PENNY - Branch and bound to find
all most parsimonious trees

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

PENNY is a program that will find all of the most parsimonious trees implied by your data. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion).

There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW.

The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times).

The Algorithm

The search strategy used by PENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of steps. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated.

Actually the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. Its number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups):

     Make tree of first two species     (A,B)
          Add C in first place     ((A,B),C)
               Add D in first place     (((A,D),B),C)
               Add D in second place     ((A,(B,D)),C)
               Add D in third place     (((A,B),D),C)
               Add D in fourth place     ((A,B),(C,D))
               Add D in fifth place     (((A,B),C),D)
          Add C in second place: ((A,C),B)
               Add D in first place     (((A,D),C),B)
               Add D in second place     ((A,(C,D)),B)
               Add D in third place     (((A,C),D),B)
               Add D in fourth place     ((A,C),(B,D))
               Add D in fifth place     (((A,C),B),D)
          Add C in third place     (A,(B,C))
               Add D in first place     ((A,D),(B,C))
               Add D in second place     (A,((B,D),C))
               Add D in third place     (A,(B,(C,D)))
               Add D in fourth place     (A,((B,C),D))
               Add D in fifth place     ((A,(B,C)),D)

Among these fifteen trees you will find all of the four-species rooted bifurcating trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of steps is evaluated. In addition a prediction is made as to how many steps will be added, at a minimum, as further species are added.

This is done by counting how many binary characters which are invariant in the data up the species most recently added will ultimately show variation when further species are added. Thus if 20 characters vary among species A, B, and C and their root, and if tree ((A,C),B) requires 24 steps, then if there are 8 more characters which will be seen to vary when species D is added, we can immediately say that no matter how we add D, the resulting tree can have no less than 24 + 8 = 32 steps. The point of all this is that if a previously-found tree such as ((A,B),(C,D)) required only 30 steps, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther.

The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time.

The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible.

The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer steps than these, it clears out the list and restarts the list with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems.

Controlling Run Times

Among the quantities available to be set at the beginning of a run of PENNY, two (howoften and howmany) are of particular importance. As PENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 1000, the default settings. Every time 100 trees have been examined, PENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far.

When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften times howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to wait for. In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften and nothing will be printed out except some headings and then the final trees.

The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate.

A constant that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if "maxtrees" is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If "maxtrees" is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that "maxtrees" be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, "maxtrees" is set to 100 in the distribution copy.

Methods and Options

The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in MIX, and thus the remainder of this document will be nearly identical to the MIX document. MIX is a general parsimony program which carries out the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified. The program defaults to carrying out Wagner parsimony.

The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin-Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969).

Here are the assumptions of these two methods:

  1. Ancestral states are known (Camin-Sokal) or unknown (Wagner).
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner).
  5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

The input for PENNY is the standard input for discrete characters programs, described above in the documentation file for the discrete-characters programs. States "?", "P", and "B" are allowed.

Most of the options are selected using a menu:


Penny algorithm, version 3.6a3
 branch-and-bound to find all most parsimonious trees

Settings for this run:
  X                     Use Mixed method?  No
  P                     Parsimony method?  Wagner
  F        How often to report, in trees:  100
  H        How many groups of  100 trees:  1000
  O                        Outgroup root?  No, use as outgroup species  1
  S           Branch and bound is simple?  Yes
  T              Use Threshold parsimony?  No, use ordinary parsimony
  A   Use ancestral states in input file?  No
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4     Print out steps in each character  No
  5     Print states at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The options X, O, T, A, and M are the usual Mixed Methods, Outgroup, Threshold, Ancestral States, and Multiple Data Sets options. They are described in the Main documentation file and in the Discrete Characters Programs documentation file. The O option is only acted upon if the final tree is unrooted.

The option P toggles between the Camin-Sokal parsimony criterion and the Wagner parsimony criterion. Options F and H reset the variables howoften (F) and howmany (H). The user is prompted for the new values. By setting these larger the program will report its progress less often (howoften) and will run longer (howmany times howoften). These values default to 100 and 1000 which guarantees a search of 100,000 trees, but these can be changed. Note that option F in this program is not the Factors option available in some of the other programs in this section of the package.

The A (Ancestral states) option works in the usual way, described in the Discrete Characters Programs documentation file. If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will be printed out showing the best guesses of which are the ancestral states in each character.

The S (Simple) option alters a step in PENNY which reconsiders the order in which species are added to the tree. Normally the decision as to what species to add to the tree next is made as the first tree is being constructed; that ordering of species is not altered subsequently. The S option causes it to be continually reconsidered. This will probably result in a substantial increase in run time, but on some data sets of intermediate messiness it may help. It is included in case it might prove of use on some data sets.

The F (Factors) option is not available in this program, as it would have no effect on the result even if that information were provided in the input file.

The final output is standard: a set of trees, which will be printed as rooted or unrooted depending on which is appropriate, and if the user elects to see them, tables of the number of changes of state required in each character. If the Wagner option is in force for a character, it may not be possible to unambiguously locate the places on the tree where the changes occur, as there may be multiple possibilities. A table is available to be printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the Camin-Sokal parsimony method (option C or S) is invoked and the A option is also used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest state changes. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use MOVE to display the tree and examine its interior states, as the algorithm in MOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in PENNY gives up more easily on displaying these states.

If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will be printed out showing the best guesses of which are the ancestral states in each character. You will find it useful to understand the difference between the Camin-Sokal parsimony criterion with unknown ancestral state and the Wagner parsimony criterion.

At the beginning of the program are a series of constants, which can be changed to help adapt the program to different computer systems. Two are the initial values of howmany and howoften, constants "often" and "many". Constant "maxtrees" is the maximum number of tied trees that will be stored.


TEST DATA SET

    7    6
Alpha1    110110
Alpha2    110110
Beta1     110000
Beta2     110000
Gamma1    100110
Delta     001001
Epsilon   001110


TEST SET OUTPUT (with all numerical options turned on)


Penny algorithm, version 3.6a3
 branch-and-bound to find all most parsimonious trees

 7 species,   6 characters
Wagner parsimony method


Name         Characters
----         ----------

Alpha1       11011 0
Alpha2       11011 0
Beta1        11000 0
Beta2        11000 0
Gamma1       10011 0
Delta        00100 1
Epsilon      00111 0



requires a total of              8.000

    3 trees in all found




  +-----------------Alpha1    
  !  
  !        +--------Alpha2    
--1        !  
  !  +-----4     +--Epsilon   
  !  !     !  +--6  
  !  !     +--5  +--Delta     
  +--2        !  
     !        +-----Gamma1    
     !  
     !           +--Beta2     
     +-----------3  
                 +--Beta1     

  remember: this is an unrooted tree!


steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                11011 0
  1    Alpha1       no     ..... .
  1       2         no     ..... .
  2       4         no     ..... .
  4    Alpha2       no     ..... .
  4       5         yes    .0... .
  5       6         yes    0.1.. .
  6    Epsilon      no     ..... .
  6    Delta        yes    ...00 1
  5    Gamma1       no     ..... .
  2       3         yes    ...00 .
  3    Beta2        no     ..... .
  3    Beta1        no     ..... .




  +-----------------Alpha1    
  !  
--1  +--------------Alpha2    
  !  !  
  !  !           +--Epsilon   
  +--2        +--6  
     !  +-----5  +--Delta     
     !  !     !  
     +--4     +-----Gamma1    
        !  
        !        +--Beta2     
        +--------3  
                 +--Beta1     

  remember: this is an unrooted tree!


steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                11011 0
  1    Alpha1       no     ..... .
  1       2         no     ..... .
  2    Alpha2       no     ..... .
  2       4         no     ..... .
  4       5         yes    .0... .
  5       6         yes    0.1.. .
  6    Epsilon      no     ..... .
  6    Delta        yes    ...00 1
  5    Gamma1       no     ..... .
  4       3         yes    ...00 .
  3    Beta2        no     ..... .
  3    Beta1        no     ..... .




  +-----------------Alpha1    
  !  
  !           +-----Alpha2    
--1  +--------2  
  !  !        !  +--Beta2     
  !  !        +--3  
  +--4           +--Beta1     
     !  
     !           +--Epsilon   
     !        +--6  
     +--------5  +--Delta     
              !  
              +-----Gamma1    

  remember: this is an unrooted tree!


steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                11011 0
  1    Alpha1       no     ..... .
  1       4         no     ..... .
  4       2         no     ..... .
  2    Alpha2       no     ..... .
  2       3         yes    ...00 .
  3    Beta2        no     ..... .
  3    Beta1        no     ..... .
  4       5         yes    .0... .
  5       6         yes    0.1.. .
  6    Epsilon      no     ..... .
  6    Delta        yes    ...00 1
  5    Gamma1       no     ..... .

PHYLIPNEW-3.69.650/doc/contrast.html0000664000175000017500000002544707712247475013611 00000000000000 contchar

version 3.6

CONTRAST -- Computes contrasts for comparative method


© Copyright 1991-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the contrasts calculation described in my 1985 paper on the comparative method (Felsenstein, 1985d). It reads in a data set of the standard quantitative characters sort, and also a tree from the treefile. It then forms the contrasts between species that, according to that tree, are statistically independent. This is done for each character. The contrasts are all standardized by branch lengths (actually, square roots of branch lengths).

The method is explained in the 1985 paper. It assumes a Brownian motion model. This model was introduced by Edwards and Cavalli-Sforza (1964; Cavalli-Sforza and Edwards, 1967) as an approximation to the evolution of gene frequencies. I have discussed (Felsenstein, 1973b, 1981c, 1985d, 1988b) the difficulties inherent in using it as a model for the evolution of quantitative characters. Chief among these is that the characters do not necessarily evolve independently or at equal rates. This program allows one to evaluate this, if there is independent information on the phylogeny. You can compute the variance of the contrasts for each character, as a measure of the variance accumulating per unit branch length. You can also test covariances of characters.

The input file is as described in the continuous characters documentation file above, for the case of continuous quantitative characters (not gene frequencies). Options are selected using a menu:


Continuous character comparative analysis, version 3.6a3

Settings for this run:
  W        within-population variation in data?  No, species values are means
  R     Print out correlations and regressions?  Yes
  A      LRT test of no phylogenetic component?  Yes, with and without VarA
  C                        Print out contrasts?  No
  M                     Analyze multiple trees?  No
  0         Terminal type (IBM PC, ANSI, none)?  (none)
  1          Print out the data at start of run  No
  2        Print indications of progress of run  Yes

  Y to accept these or type the letter for one to change

Option W makes the program expect not means of the phenotypes in each species, but phenotypes of individual specimens. The details of the input file format in that case are given below. In that case the program estimates the covariances of the phenotypic change, as well as covariances of within-species phenotypic variation. The model used is similar to (but not identical to) that of Lynch (1990). The algorithms used differ from the ones he gives in that paper. They will be described in a forthcoming paper by me. In the case that has within-species samples contrasts are used by the program, but it does not make sense to write them out to an output file for direct analysis. They are of two kinds, contrasts within species and contrasts between species. The former are affected only by the within-species phenotypic covariation, but the latter are affected by both within- and between-species covariation. CONTRAST infers these two kinds of covariances and writes the estimates out.

M is similar to the usual multiple data sets input option, but is used here to allow multiple trees to be read from the treefile, not multiple data sets to be read from the input file. In this way you can use bootstrapping on the data that estimated these trees, get multiple bootstrap estimates of the tree, and then use the M option to make multiple analyses of the contrasts and the covariances, correlations, and regressions. In this way (Felsenstein, 1988b) you can assess the effect of the inaccuracy of the trees on your estimates of these statistics.

R allows you to turn off or on the printing out of the statistics. If it is off only the contrasts will be printed out (unless option 1 is selected). With only the contrasts printed out, they are in a simple array that is in a form that many statistics packages should be able to read. The contrasts are rows, and each row has one contrast for each character. Any multivariate statistics package should be able to analyze these (but keep in mind that the contrasts have, by virtue of the way they are generated, expectation zero, so all regressions must pass through the origin). If the W option has been set to analyze within-species as well as between-species variation, the R option does not appear in the menu as the regression and correlation statistics should always be computed in that case.

As usual, the tree file has the default name intree. It should contain the desired tree or trees. These can be either in bifurcating form, or may have the bottommost fork be a trifurcation (it should not matter which of these ways you present the tree). The tree must, of course, have branch lengths.

If you have a molecular data set (for example) and also, on the same species, quantitative measurements, here is how you can allow for the uncertainty of yor estimate of the tree. Use SEQBOOT to generate multiple data sets from your molecular data. Then, whichever method you use to analyze it (the relevant ones are those that produce estimates of the branch lengths: DNAML, DNAMLK, FITCH, KITSCH, and NEIGHBOR -- the latter three require you to use DNADIST to turn the bootstrap data sets into multiple distance matrices), you should use the Multiple Data Sets option of that program. This will result in a tree file with many trees on it. Then use this tree file with the input file containing your continuous quantitative characters, choosing the Multiple Trees (M) option. You will get one set of contrasts and statistics for each tree in the tree file. At the moment there is no overall summary: you will have to tabulate these by hand. A similar process can be followed if you have restriction sites data (using RESTML) or gene frequencies data.

The statistics that are printed out include the covariances between all pairs of characters, the regressions of each character on each other (column j is regressed on row i), and the correlations between all pairs of characters. In assessing degress of freedom it is important to realize that each contrast was taken to have expectation zero, which is known because each contrast could as easily have been computed xi-xj instead of xj-xi. Thus there is no loss of a degree of freedom for estimation of a mean. The degrees of freedom is thus the same as the number of contrasts, namely one less than the number of species (tips). If you feed these contrasts into a multivariate statistics program make sure that it knows that each variable has expectation exactly zero.

Within-species variation

With the W option selected, CONTRAST analyzes data sets with variation within species, using a model like that proposed by Michael Lynch (1990). If you select the W option for within-species variation, the data set should have this structure (on the left are the data, on the right my comments:

   10    5              
Alpha        2          
 2.01 5.3 1.5  -3.41 0.3
 1.98 4.3 2.1  -2.98 0.45
Gammarus     3
 6.57 3.1 2.0  -1.89 0.6
 7.62 3.4 1.9  -2.01 0.7
 6.02 3.0 1.9  -2.03 0.6
...
number of species, number of characters
name of 1st species, # of individuals
data for individual #1
data for individual #2
name of 2nd species, # of individuals
data for individual #1
data for individual #2
data for individual #3
(and so on)

The covariances, correlations, and regressions for the "additive" (between-species evolutionary variation) and "environmental" (within-species phenotypic variation) are printed out (the maximum likelihood estimates of each). The program also estimates the within-species phenotypic variation in the case where the between-species evolutionary covariances are forced to be zero. The log-likelihoods of these two cases are compared and a likelihood ratio test (LRT) is carried out. The program prints the result of this test as a chi-square variate, and gives the number of degrees of freedom of the LRT. You have to look up the chi-square variable on a table of the chi-square distribution.

The log-likelihood of the data under the models with and without between-species For the moment the program cannot handle the case where within-species variation is to be taken into account but where only species means are available. (It can handle cases where some species have only one member in their sample).

We hope to fix this soon. We are also on our way to incorporating full-sib, half-sib, or clonal groups within species, so as to do one analysis for within-species genetic and between-species phylogenetic variation.

The data set used as an example below is the example from a paper by Michael Lynch (1990), his characters having been log-transformed. In the case where there is only one specimen per species, Lynch's model is identical to our model of within-species variation (for multiple individuals per species it is not a subcase of his model).


TEST SET INPUT

    5   2
Homo        4.09434  4.74493
Pongo       3.61092  3.33220
Macaca      2.37024  3.36730
Ateles      2.02815  2.89037
Galago     -1.46968  2.30259


TEST SET INPUT TREEFILE

((((Homo:0.21,Pongo:0.21):0.28,Macaca:0.49):0.13,Ateles:0.62):0.38,Galago:1.00);


TEST SET OUTPUT (with all numerical options on )


Continuous character contrasts analysis, version 3.6a3

   5 Populations,    2 Characters

Name                       Phenotypes
----                       ----------

Homo         4.09434   4.74493
Pongo        3.61092   3.33220
Macaca       2.37024   3.36730
Ateles       2.02815   2.89037
Galago      -1.46968   2.30259


Covariance matrix
---------- ------

    4.1991    1.3844
    1.3844    0.7125

Regressions (columns on rows)
----------- -------- -- -----

    1.0000    0.3297
    1.9430    1.0000

Correlations
------------

    1.0000    0.8004
    0.8004    1.0000

PHYLIPNEW-3.69.650/doc/drawtree.html0000664000175000017500000003773107712247475013570 00000000000000 drawtree

version 3.6

DRAWTREE

© Copyright 1990-2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DRAWTREE interactively plots an unrooted tree diagram, with many options including orientation of tree and branches, label sizes and angles, margin sizes. Particularly if you can use your computer screen to preview the plot, you can very effectively adjust the details of the plotting to get just the kind of plot you want.

To understand the working of DRAWGRAM and DRAWTREE, you should first read the Tree Drawing Programs web page in this documentation.

As with DRAWGRAM, to run DRAWTREE you need a compiled copy of the program, a font file, and a tree file. The tree file has a default name of intree. The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Note that the program will get confused if the input tree file has the number of trees on the first line of the file, so that numbr may have to be removed.

Once these choices have been made you will see the central menu of the program, which looks like this:


Unrooted tree plotting program version 3.6a3

Here are the settings: 

 0  Screen type (IBM PC, ANSI)?  (none)
 P       Final plotting device:  Postscript printer
 V           Previewing device:  X Windows display
 B          Use branch lengths:  Yes
 L             Angle of labels:  branch points to Middle of label
 R            Rotation of tree:  90.0
 A       Angle of arc for tree:  360.0
 I     Iterate to improve tree:  Equal-Daylight algorithm
 D  Try to avoid label overlap?  No
 S      Scale of branch length:  Automatically rescaled
 C   Relative character height:  0.3333
 F                        Font:  Times-Roman
 M          Horizontal margins:  1.65 cm
 M            Vertical margins:  2.16 cm
 #           Page size submenu:  one page per tree

 Y to accept these or type the letter for one to change

These are the settings that control the appearance of the tree, which has already been read in. You can either accept these as is, in which case you would answer Y to the question and press the Return or Enter key, or you can answer N if you want to change one, or simply type the character corresponding to the one you want to change (if you answer N it will just immediately ask you for that number anyway).

For a first run, particularly if previewing is available, you might accept these default values and see what the result looks like. The program will then tell you it is about to preview the tree and ask you to press Return or Enter when you are ready to see this (you will probably have to press it twice). If you are on a Windows system (and have its graphics selected as your previewing option), on a Unix or Linux system and are using X windows for previewing, or are on a Macintosh system, a new window will open with the preview in it. If you are using the Tektronix preview option the preview will appear in the window where the menu was.

On X Windows, Macintosh, and Windows you can resize the preview window, though for some of these you may have to ask the system to redraw the preview to see it at the new window size.

Once you are finished looking at the preview, you will want to specify whether the program should make the final plot or change some of the settings. This is done differently on the different previews:

  • In X Windows you should make the menu window the active window. You may need to move the mouse over it, or click in it, or click on its top bar. You do not need to try to close the preview window yourself, and usually if you do this will cause trouble.
  • In Windows use the File menu in the preview window and choose either the Change Parameters menu item, or if you are ready to make the final plot, choose the Plot menu item.
  • On a Macintosh system, you can simply use the little box in the corner of the preview window to close it. The text window for the menu will then be active.
  • In PC graphics press on the Enter key. The screen with the preview should disappear and the settings menu reappear.
  • With a Tektronix preview, you may need to change your screen from a Tektronix-compatible mode to see the menu again.

Except with the Macintosh preview, the program will now ask you if the tree is now ready to be plotted. If you answer Y (for Yes) (or choose this option in the File menu of the preview window in the case of Windows) the program will usually write a plot file (with some plot options it will draw the tree on the screen). Then it will terminate.

But if you do not say that you are ready to plot the tree, it will go back to the above menu, allow you to change more options, and go through the whole process again. The easiest way to learn the meaning of the options is to try them, particularly if previewing is available. Below I will describe them one by one; you may prefer to skip reading this unless you are puzzled about one of them.

THE OPTIONS

O
This is an option that allows you to change the menu window to be an ANSI terminal or an IBM PC terminal. Generally you will not want to change this.

P
This allows you to choose the Plotting device or file format. We have discussed the possible choices in the draw programs documentation web page.

V
This allows you to change the type of preView window (or even turn off previewing. We have discussed the different possible choices in the draw programs documentation web page.

B
Whether the tree has Branch lengths that are being used in the diagram. If the tree that was read in had a full set of branch lengths, it will be assumed as a default that you want to use them in the diagram, but you can specify that they are not to be used. If the tree does not have a full set of branch lengths then this will be indicated, and if you try to use branch lengths the program will refuse to allow you to do so.

L
The angle of the Labels. Initially the branches connected to the tips will point to the middles of the labels. If you want to change the way the labels are drawn, the program will offer you a choice between Fixed, Middle, Radial, and Along as the ways the angles of the labels are to be determined. If you choose Fixed (the default), you will be asked if you want labels to be at some fixed angle, between 90.0 and -90.0 degrees, you can specify that. You may have to try different angles to find one that keeps the labels from colliding: I have not guarded against this. However there are additional options. The other systems for determining angles of labels are Middle (M), Radial (R) and Along (A). Middle has the branch connected to that tip point to the midpoint of the label. Radial indicates that the labels are all aligned to as to point toward the root node of the tree. Along aligns them to have the same angle as the branch connected to that tip. This is particularly likely to keep the labels from colliding, but it may give a misleading impression that the final branch is long. Note that with the Radial option, if you do not like the point from which the labels appear to radiate, you might try re-rooting the tree (option 7).

R
The rotation of the tree. This is initially 90.0 degrees. The angle is read out counterclockwise from the right side of the tree, so that increasing this angle will rotate the tree counterclockwise, and decreasing it will rotate it clockwise. The meaning of this angle is explained further under option A. As you rotate the tree, the appearance (and size) may change, but the labels will not rotate if they are drawn at a Fixed angle.

A
The Angle through which the tree is plotted. This is by default 360.0 degrees. The tree is in the shape of an old-fashioned hand fan. The tree fans out from its root node, each of the subtrees being allocated part of this angle, a part proportional to how many tips the subtree contains. If the rotation of the tree is (say) 90.0 degrees (the default under option R), the fan starts at +270 degrees and runs clockwise around to -90 degrees (i.e., it starts at the bottom of the plot and runs clockwise around until it returns to the bottom. Thus the center of the fan runs from the root upwards (which is why we say it is rotated to 90.0 degrees). By changing option R we can change the direction of the fan, and by changing option A we can change the width of the fan without changing its center line. If you want the tree to fan out in a semicircle, a value of a bit greater than 180 degrees would be appropriate, as the tree will not completely fill the fan. Note that using either of the iterative improvement methods mentioned below is impossible if the angle is not 360 degrees.

I
Whether the tree angles will be Iteratively improved. There are three methods available:

Equal Arc
This method, invented by Christopher Meacham in PLOTREE, the predecessor to this program, starts from the root of the tree and allocates arcs of angle to each subtree proportional to the number of tips in it. This continues as one moves out to other nodes of the tree and subdivides the angle allocated to them into angles for each of that node's dependent subtrees. This method is fast, and never results in lines of the tree crossing. It is the method used to make a starting tree for the other two methods.

Equal Daylight
This iteratively improves an initial tree by successively going to each interior node, looking at the subtrees (often there are 3 of them) visible from there, and swinging them so that the arcs of "daylight" visible between them are equal. This is not as fast as Equal Arc but should never result in lines crossing. It gives particularly good-looking trees, and it is the default method for this program. It will be described in a future paper by me. This method has also been licensed to David Swofford for use in his program PAUP*

N-Body
This assumes that there are electrical charges located at the ends of all the branches, and that they repel each other with a force that varies (as electrical repulsion would) as the inverse square of the distance between them. The tree adjusts its shape until the forces balance. This can be computationally slow, and can result in lines crossing. I find the trees inferior to Equal Daylight, but it is worth a try.

D
Whether the program tries to avoiD overlap of the labels. We have left this off by default, because it is a rather feeble option that is frequently unsuccessful, and often make the trees look wierd. Nevertheless it may be worth a try.

S
On what Scale the branch lengths will be translated into distances on the output device. Note that when branch lengths have not been provided, there are implicit branch lengths of 1.0 per branch. This option will toggle back and forth between automatic adjustment of branch lengths so that the diagram will just fit into the margins, and you specifying how many centimeters there will be per unit branch length. This is included so that you can plot different trees to a common scale, showing which ones have longer or shorter branches than others. Note that if you choose too large a value for centimeters per unit branch length, the tree will be so big it will overrun the plotting area and may cause failure of the diagram to display properly. Too small a value will cause the tree to be a nearly invisible dot.

C
The Character height, measured as a fraction of a quantity which is the horizontal space available for the tree, divided by one less than the number of tips. You need not worry about exactly what this is: you can always change the value (which is initially 0.3333) to make the labels larger or smaller. On output devices where line thicknesses can be varied, the thickness of the tree lines will automatically be adjusted to be proportional to the character height, which is an additional reason you may want to change character height.

F
Allows you to select the name of the Font that you will use for the species names. This is allowed for some of the plotter drivers (this menu item does not appear for the others). You can select the name of any font that is available for your plotter, for example "Courier-Bold" or "Helvetica". The label will then be printed using that font rather than being drawn line-by-line as it is in the default Hershey font. In the preview of the tree, the Hershey font is always used (which means that it may look different from the final font). The size of the characters in the species names is scaled according to the label heights you have selected in the menu, whether plotter fonts or the Hershey font are used. Note that for some plotter drivers (particular Xfig and PICT) fonts can be used only if the species labels are horizontal or vertical (at angles of 0 degrees or 90 degrees).

M
The horizontal and vertical Margins in centimeters. You can enter new margins (you must enter new values for both horizontal and vertical margins, though these need not be different from the old values). For the moment I do not allow you to specify left and right margins separately, or top and bottom margins separately. In a future release I hope to do so.

G
If iterative improvement is not turned on in option I (so that we are employing the Equal Arc method), this option appears in the menu. It controls whether the angles of lines will be "regularized". Regularization is on by default. It takes the angles of the branches coming out from each node, and changes them so that they are "rounded off". This process (which I will not fully describe) will make the lines vertical if they are close to vertical, horizontal if they are close to horizontal, 45 degrees if they are close to that, and so on. It will lead to a tree in which angles look very regular. You may or may not want that. If you are unhappy with the appearance of the tree when using this option, you could try rotating it slightly (option R) as that may cause some branches to change their angle by a large amount, by having the angles be "rounded off" to a different value.

#
The number of pages per tree. Defaults to one, but if you need a physically large tree you may want to choose a larger number. For example, to make a big tree for a poster, choose a larger number of pages horizontally and vertically (the program will ask you for these numbers), get out your scissors and paste or tape, and go to work.

I recommend that you try all of these options (particularly if you can preview the trees). It is of particular use to try trees with different iteration methods (option I) and with regularization (option G). You will find that variety of effects can be achieved.

I would appreciate suggestions for improvements in DRAWTREE, but please be aware that the source code is already very large and I may not be able to implement all suggestions. PHYLIPNEW-3.69.650/doc/main.html0000664000175000017500000072200207712247475012667 00000000000000 main

PHYLIP

Phylogeny Inference Package

PHYLIP Logo

Version 3.6(alpha3)

July, 2002

by Joseph Felsenstein


Department of Genome Sciences
University of Washington
Box 357730
Seattle, WA   98195-7730
USA

E-mail address: joe@gs.washington.edu


Contents of this document


Contents of this document
A Brief Description of the Programs
Copyright Notice for PHYLIP
The Documentation Files and How to Read Them
What The Programs Do
Running the Programs
      A word about input files
      Running the programs on a Windows machine
      Running the programs on a Macintosh
      Running the programs on a Unix system
      Running the programs in MSDOS
      Running the programs in background or under control of a command file
Preparing Input Files
      Input and output files
      Data file format
The Menu
The Output File
The Tree File
The Options and How To Invoke Them
      Common options in the menu
        The U (User tree) option
        The G (Global) option
        The J (Jumble) option
        The O (Outgroup) option
        The T (Threshold) option
        The M (Multiple data sets) option
        The W (Weights) option
        The option to write out the trees into a tree file
        The (0) terminal type option
The Algorithm for Constructing Trees
      Local Rearrangements
      Global Rearrangements
      Multiple Jumbles
      Saving multiple tied trees
      Strategy for Finding the Best Tree
A Warning on Interpreting Results
Relative Speed of Different Programs and Machines
      Relative speed of the different programs
      Speed with different numbers of species
      Relative speed of different machines
General Comments on Adapting the Package to Different Computer Systems
Compiling the programs
      Unix and Linux
      Macintosh PowerMacs
           Compiling with Metrowerks Codewarrior
      On Windows systems
           Compiling with Microsoft Visual C++
           Compiling with Borland C++
           Compiling with Metrowerks Codewarrior for Windows
           Compiling with Cygnus Gnu C++
      VMS VAX systems
      Parallel computers
      Other computer systems
Frequently Asked Questions
      How to make it do various things
      Background information needed:
      Questions about distribution and citation:
      Questions about documentation
      Additional Frequently Asked Questions, or: "Why didn't it occur to you to ...
      (Fortunately) obsolete questions
New Features in This Version
Coming Attractions, Future Plans
Endorsements
      From the pages of Cladistics
      ... and in the pages of other journals:
References for the Documentation Files
Credits
Other Phylogeny Programs Available Elsewhere
      PAUP*
      MacClade
      MEGA
      MOLPHY
      PAML
      TREE-PUZZLE
      DAMBE
      Hennig86
      RnA
      NONA
      TNT
How You Can Help Me
In Case of Trouble


A Brief Description of the Programs

PHYLIP, the Phylogeny Inference Package, is a package of programs for inferring phylogenies (evolutionary trees). It has been distributed since 1980, and has over 10,000 registered users, making it the most widely distributed package of phylogeny programs. It is available free, from its web site:

PHYLIP is available as source code in C, and also as executables for some common computer systems. It can infer phylogenies by parsimony, compatibility, distance matrix methods, and likelihood. It can also compute consensus trees, compute distances between trees, draw trees, resample data sets by bootstrapping or jackknifing, edit trees, and compute distance matrices. It can handle data that are nucleotide sequences, protein sequences, gene frequencies, restriction sites, restriction fragments, distances, discrete characters, and continuous characters.



Copyright Notice for PHYLIP

The following copyright notice is intended to cover all source code, all documentation, and all executable programs of the PHYLIP package.

© Copyright 1980-2002. University of Washington and Joseph Felsenstein. All rights reserved. Permission is granted to reproduce, perform, and modify these programs and documentation files. Permission is granted to distribute or provide access to these programs provided that this copyright notice is not removed, the programs are not integrated with or called by any product or service that generates revenue, and that your distribution of these materials program are free. Any modified versions of these materials that are distributed or accessible shall indicate that they are based on these program. Institutions of higher education are granted permission to distribute this material to their students and staff for a fee to recover distribution costs. Permission requests for any other distribution of this program should be directed to license@u.washington.edu.



The Documentation Files and How to Read Them

PHYLIP comes with an extensive set of documentation files. These include the main documentation file (this one), which you should read fairly completely. In addition there are files for groups of programs, including ones for the molecular sequence programs, the distance matrix programs, the gene frequency and continuous characters programs, the discrete characters programs, and the tree drawing programs. Finally, each program has its own documentation file. References for the documentation files are all gathered together in this main documentation file. A good strategy is to:

  1. Read this main documentation file.
  2. Tentatively decide which programs are of interest to you.
  3. Read the documentation files for the groups of programs that contain those.
  4. Read the documentation files for those individual programs.


What The Programs Do

Here is a short description of each of the programs. For more detailed discussion you should definitely read the documentation file for the individual program and the documentation file for the group of programs it is in. In this list the name of each program is a link which will take you to the documentation file for that program. Note that there is no program in the PHYLIP package called PHYLIP.

PROTPARS
Estimates phylogenies from protein sequences (input using the standard one-letter code for amino acids) using the parsimony method, in a variant which counts only those nucleotide changes that change the amino acid, on the assumption that silent changes are more easily accomplished.
DNAPARS
Estimates phylogenies by the parsimony method using nucleic acid sequences. Allows use the full IUB ambiguity codes, and estimates ancestral nucleotide states. Gaps treated as a fifth nucleotide state. Can use 0/1 weights, reconstruct ancestral states, and infer branch lengths.
DNAMOVE
Interactive construction of phylogenies from nucleic acid sequences, with their evaluation by parsimony and compatibility and the display of reconstructed ancestral bases. This can be used to find parsimony or compatibility estimates by hand.
DNAPENNY
Finds all most parsimonious phylogenies for nucleic acid sequences by branch-and-bound search. This may not be practical (depending on the data) for more than 15 species or so.
DNACOMP
Estimates phylogenies from nucleic acid sequence data using the compatibility criterion, which searches for the largest number of sites which could have all states (nucleotides) uniquely evolved on the same tree. Compatibility is particularly appropriate when sites vary greatly in their rates of evolution, but we do not know in advance which are the less reliable ones.
DNAINVAR
For nucleic acid sequence data on four species, computes Lake's and Cavender's phylogenetic invariants, which test alternative tree topologies. The program also tabulates the frequencies of occurrence of the different nucleotide patterns. Lake's invariants are the method which he calls "evolutionary parsimony".
DNAML
Estimates phylogenies from nucleotide sequences by maximum likelihood. The model employed allows for unequal expected frequencies of the four nucleotides, for unequal rates of transitions and transversions, and for different (prespecified) rates of change in different categories of sites, with the program inferring which sites have which rates. It also allows different rates of change at known sites.
DNAMLK
Same as DNAML but assumes a molecular clock. The use of the two programs together permits a likelihood ratio test of the molecular clock hypothesis to be made.
PROML
Estimates phylogenies from protein amino acid sequences by maximum likelihood. The PAM or JTTF models can be employed. The program can allow for different (prespecified) rates of change in different categories of amino acid positions, with the program inferring which posiitons have which rates. It also allows different rates of change at known sites.
PROMLK
Same as PROML but assumes a molecular clock. The use of the two programs together permits a likelihood ratio test of the molecular clock hypothesis to be made.
DNADIST
Computes four different distances between species from nucleic acid sequences. The distances can then be used in the distance matrix programs. The distances are the Jukes-Cantor formula, one based on Kimura's 2-parameter method, Jin and Nei's distance which allows for rate variation from site to site, and a maximum likelihood method using the model employed in DNAML. The latter method of computing distances can be very slow.
PROTDIST
Computes a distance measure for protein sequences, using maximum likelihood estimates based on the Dayhoff PAM matrix, Kimura's 1983 approximation to it, or a model based on the genetic code plus a constraint on changing to a different category of amino acid. Rate variation from site to site is also allowed. The distances can be used in the distance matrix programs.
RESTDIST
Distances calculated from restriction sites data or restriction fragments data. The restriction sites option is the one to use to also make distances for RAPDs or AFLPs.
RESTML
Estimation of phylogenies by maximum likelihood using restriction sites data (not restriction fragments but presence/absence of individual sites). It employs the Jukes-Cantor symmetrical model of nucleotide change, which does not allow for differences of rate between transitions and transversions. This program is very slow.
SEQBOOT
Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development.
FITCH
Estimates phylogenies from distance matrix data under the "additive tree model" according to which the distances are expected to equal the sums of branch lengths between the species. Uses the Fitch-Margoliash criterion and some related least squares criteria. Does not assume an evolutionary clock. This program will be useful with distances computed from molecular sequences, restriction sites or fragments distances, with DNA hybridization measurements, and with genetic distances computed from gene frequencies.
KITSCH
Estimates phylogenies from distance matrix data under the "ultrametric" model which is the same as the additive tree model except that an evolutionary clock is assumed. The Fitch-Margoliash criterion and other least squares criteria are assumed. This program will be useful with distances computed from molecular sequences, restriction sites or fragments distances, with distances from DNA hybridization measurements, and with genetic distances computed from gene frequencies.
NEIGHBOR
An implementation by Mary Kuhner and John Yamato of Saitou and Nei's "Neighbor Joining Method," and of the UPGMA (Average Linkage clustering) method. Neighbor Joining is a distance matrix method producing an unrooted tree without the assumption of a clock. UPGMA does assume a clock. The branch lengths are not optimized by the least squares criterion but the methods are very fast and thus can handle much larger data sets.
CONTML
Estimates phylogenies from gene frequency data by maximum likelihood under a model in which all divergence is due to genetic drift in the absence of new mutations. Does not assume a molecular clock. An alternative method of analyzing this data is to compute Nei's genetic distance and use one of the distance matrix programs. This program can also do maximum likelihoodn analysis of continuous charactersn that evolve by a Brownian Motion model, but it assumes that the characters evolve at equal rates and in an uncorrelated fashion, so that it does not take into account the usual correlations of characters.
GENDIST
Computes one of three different genetic distance formulas from gene frequency data. The formulas are Nei's genetic distance, the Cavalli-Sforza chord measure, and the genetic distance of Reynolds et. al. The former is appropriate for data in which new mutations occur in an infinite isoalleles neutral mutation model, the latter two for a model without mutation and with pure genetic drift. The distances are written to a file in a format appropriate for input to the distance matrix programs.
CONTRAST
Reads a tree from a tree file, and a data set with continuous characters data, and produces the independent contrasts for those characters, for use in any multivariate statistics package. Will also produce covariances, regressions and correlations between characters for those contrasts. Can also correct for within-species sampling variation when individual phenotypes are available within a population.
PARS
Multistate discrete-characters parsimony method. Up to 8 states (as well as "?") are allowed. Cannot do Camin-Sokal or Dollo Parsimony. Can reconstruct ancestral states, use character weights, and infer branch lengths.
MIX
Estimates phylogenies by some parsimony methods for discrete character data with two states (0 and 1). Allows use of the Wagner parsimony method, the Camin-Sokal parsimony method, or arbitrary mixtures of these. Also reconstructs ancestral states and allows weighting of characters (does not infer branch lengths).
MOVE
Interactive construction of phylogenies from discrete character data with two states (0 and 1). Evaluates parsimony and compatibility criteria for those phylogenies and displays reconstructed states throughout the tree. This can be used to find parsimony or compatibility estimates by hand.
PENNY
Finds all most parsimonious phylogenies for discrete-character data with two states, for the Wagner, Camin-Sokal, and mixed parsimony criteria using the branch-and-bound method of exact search. May be impractical (depending on the data) for more than 10-11 species.
DOLLOP
Estimates phylogenies by the Dollo or polymorphism parsimony criteria for discrete character data with two states (0 and 1). Also reconstructs ancestral states and allows weighting of characters. Dollo parsimony is particularly appropriate for restriction sites data; with ancestor states specified as unknown it may be appropriate for restriction fragments data.
DOLMOVE
Interactive construction of phylogenies from discrete character data with two states (0 and 1) using the Dollo or polymorphism parsimony criteria. Evaluates parsimony and compatibility criteria for those phylogenies and displays reconstructed states throughout the tree. This can be used to find parsimony or compatibility estimates by hand.
DOLPENNY
Finds all most parsimonious phylogenies for discrete-character data with two states, for the Dollo or polymorphism parsimony criteria using the branch-and-bound method of exact search. May be impractical (depending on the data) for more than 10-11 species.
CLIQUE
Finds the largest clique of mutually compatible characters, and the phylogeny which they recommend, for discrete character data with two states. The largest clique (or all cliques within a given size range of the largest one) are found by a very fast branch and bound search method. The method does not allow for missing data. For such cases the T (Threshold) option of PARS or MIX may be a useful alternative. Compatibility methods are particular useful when some characters are of poor quality and the rest of good quality, but when it is not known in advance which ones are which.
FACTOR
Takes discrete multistate data with character state trees and produces the corresponding data set with two states (0 and 1). Written by Christopher Meacham. This program was formerly used to accomodate multistate characters in MIX, but this is less necessary now that PARS is available.
DRAWGRAM
Plots rooted phylogenies, cladograms, and phenograms in a wide variety of user-controllable formats. The program is interactive and allows previewing of the tree on PC or Macintosh graphics screens, and Tektronix or Digital graphics terminals. Final output can be to a file formatted for one of the drawing programs, on a laser printer (such as Postscript or PCL-compatible printers), on graphics screens or terminals, on pen plotters (Hewlett-Packard or Houston Instruments) or on dot matrix printers capable of graphics (Epson, Okidata, Imagewriter, or Toshiba).
DRAWTREE
Similar to DRAWGRAM but plots unrooted phylogenies.
TREEDIST
Computes the Robinson-Foulds symmetric difference distance between trees, which allows for differences in tree topology (but does not use branch lengths).
CONSENSE
Computes consensus trees by the majority-rule consensus tree method, which also allows one to easily find the strict consensus tree. Is not able to compute the Adams consensus tree. Trees are input in a tree file in standard nested-parenthesis notation, which is produced by many of the tree estimation programs in the package. This program can be used as the final step in doing bootstrap analyses for many of the methods in the package.
RETREE
Reads in a tree (with branch lengths if necessary) and allows you to reroot the tree, to flip branches, to change species names and branch lengths, and then write the result out. Can be used to convert between rooted and unrooted trees, and to write the tree into a preliminary version of a new XML tree file format which is under development.


Running the Programs

This section assumes that you have obtained PHYLIP as compiled executables (for Windows, Macintosh, or DOS), or have obtained the source code and compiled it yourself (for Linux, Unix, or OpenVMS). For machines for which compiled executables are available, there will usually be no need for you to have a compiler or compile the programs yourself. This section describes how to run the programs. Later in this document we will discuss how to download and install PHYLIP (in case you are somehow reading this without yet having done that). Normally you will only read this document after downloading and installing PHYLIP.

A word about input files.

For all of these types of machines, it is important to have the input files for the programs (typically data files) prepared in advance. They can be prepared in any editor, but it is important that they be saved in Text Only ("flat ASCII") format, not in the format that word processors such as Microsoft Word want to write. It is up to you to read the PHYLIP documentation files which describe the files formats that are needed. There is a partial description in the next section of this document. The input files can also be obtained by running a program that produces output files in PHYLIP format (some of these programs do, and so do programs by others such as sequence alignment programs such as ClustalW and sequence format conversion programs such as Readseq). There is not any input file editor available in any program in PHYLIP (you should not simply start running one of the programs and then expect to click a mouse somewhere to start creating a data file).

When they start running, the programs look first for input files with particular names (such as infile, treefile, intree, or fontfile). Exactly which file names they look for varies a bit from program to program, and you should read the documentation file for the particular program to find out. If you have files with those names the programs will use them and not ask you for the file name. If they do not find files of those names, the programs will say that they cannot find a file of that name, and ask you to type in the file name. For example, if DnaML looks for the file infile and does not find one of that name, it prints the message:

dnaml: can't find input file "infile"
Please enter a new file name>

This does not mean that an error has occurred. All you need to do is to type in the name of the file.

The program looks for the input files in the same directory that the program is in (a directory is the same thing as a "folder"). In Windows, Linux, Unix, or MSDOS, if you are asked for the file name you can type in the path to the file, as part of the name (thus, if the file is in the directory above the current one, you can type in a file name such as ../myfile.dna). If you do not know what a "directory" is, or what "above" means, then you are a member of the new generation who just clicks the mouse and assumes that a list of file names will magically appear. (Typically members of this generation have no idea where the files are on their system, and accumulate enormous amounts of unnecessary clutter in their file systems.) In this case you should ask someone to explain directories to you.

Running the programs on a Windows machine.

Double-click on the icon for the program. A window should open with a menu in it. Further dialog with the program occurs by typing on the keyboard in response to what you see in the window. The programs can be interrupted either by typing Control-C (which means to press down on the Ctrl key while typing the letter C), or by using the mouse to open the File menu in the upper-left corner of the program's window area and then select Quit. Other than this, most PHYLIP programs make no use of the mouse. The tree-drawing programs Drawtree and Drawgram do allow use of the mouse to select some options.

Running the programs on a Macintosh.

Double-click on the icon for the program. A window should open. Further dialog with the program occurs by typing on the keyboard in response to what you see in the window. The programs can be interrupted by using the mouse to open the File menu in the upper-left corner of the program's window area and then select Quit. Alternatively, you can use the Command-Q key combination.

When you use Quit, the program will ask you whether you want to save a file whose name is the program name (often followed by .out -- for example, if you are using DNAML it will ask you if you want to save file Dnaml.out. This file is simply a record of everything that displayed on the program window, and you usually will not want to save it. Pressing the Enter key or selecting the Do Not Save button with the mouse will keep this from being saved.

If you encounter memory limitations on a Macintosh, and determine that this is not due to a problem with the format of the input file, as it often will be, you may be able to solve it by raising the limits of the stack and heap sizes of the program. To do this click on the program and then select Get Info from the Finder File menu. This will open a window which can be made to show the memory limits of the program. These can be changed by selecting them and typing in larger numbers. This may relieve nagging memory problems. If it does not, consult your local documentation and suspect problems with your input file format.

Running the programs on a Unix system.

Type the name of the program in lower-case letters (such as dnaml). To interrupt the program while it is running, type Control-C (which means to press down on the Ctrl key while typing the letter C).

Running the programs in MSDOS.

Type the name of the program in lower-case letters (such as dnaml). To interrupt the program while it is running, type Control-C (which means to press down on the Ctrl key while typing the letter C).

Running the programs in background or under control of a command file

In running the programs, you may sometimes want to put them in background so you can proceed with other work. On systems with a windowing environment they can be put in their own window, and commands like the Unix and Linux nice command used to make them have lower priority so that they do not interfere with interactive applications in other windows. This part of the discussion will assume either a Windows system or a Unix or Linux system. I will note when the commands work on one of these systems but not the other. Running jobs in background on Macintosh systems is an arcane art into whose mysteries I have not been initiated (or perhaps no one has been initiated).

If there is no windowing environment, on a Unix or Linux system you will want to use an ampersand (&) after the command file name when invoking it to put the job in the background. You will have to put all the responses to the interactive menu of the program into a file and tell the background job to take its input from that file. On Windows systems there is no & or nice command but input and output redirection and command files work fine, with the sole difference that the a file of commands must have a name ending in .BAT, such as FOOFILE.BAT.

For example: suppose you want to run DNAPARS in a background, taking its input data from a file called sequences.dat, putting its interactive output to file called screenout, and using a file called input as the place to store the interactive input. The file input need only contain two lines:

sequences.dat
Y

which is what you would have typed to run the program interactively, in response to the program's request for an input file name if it did not find a file named infile, in in response the the menu.

To run the program in background, in Unix or Linux you would simply give the command:

dnapars < input > screenout &

These run the program with input responses coming from input and interactive output being put into file screenout. The usual output file and tree file will also be created by this run (keep that in mind as if you run any other PHYLIP program from the same directory while this one is running in background you may overwrite the output file from one program with that from the other!).

If you wanted to give the program lower priority, so that it would not interfere with other work, and you have Berkeley Unix type job control facilities in your Unix or Linux (and you usually do), you can use the nice command:

nice +10 dnapars < input > screenout &

which lowers the priority of the run. To also time the run and put the timing at the end of screenout, you can do this:

nice +10 ( time dnapars < input ) >& screenout &

which I will not attempt to explain.

On Unix or Linux systems you may also want to explore putting the interactive output into the null file /dev/null so as to not be bothered with it (but then you cannot look at it to see why something went wrong). If you have problems with creating output files that are too large, you may want to explore carefully the turning off of options in the programs you run.

If you are doing several runs in one, as for example when you do a bootstrap analysis using SEQBOOT, DNAPARS (say), and CONSENSE, you can use an editor to create a "command file" with these commands:

seqboot < input1 > screenout
mv outfile infile
dnapars < input2 >> screenout
mv outtree intree
consense < input3 >> screenout

This is the Unix or Linux version -- in the MSDOS version, the renaming of files and the appending of output to the file screenout is handled differently.

On Unix or Linux the command file might be named something like foofile, and on Windows systems might be named foofile.bat.

On Unix or Linux the command file must be given execute permission by using the command chmod +x foofile followed by the command rehash. The job that foofile describes can be run in background on Unix or Linux by giving the command

foofile &

On Windows systems it can be run by clicking on the icon of the command file. Its icon will have a little gear symbol.

Note that you must also have the interactive input commands for SEQBOOT (including the random number seed), DNAPARS, and CONSENSE in the separate files input1, input2, and input3. Note that when PHYLIP programs attempt to open a new output file (such as outfile, outtree, or plotfile, if they see a file of that name already in existence they will ask you if you want to overwrite it, and offer alternatives including writing to another file, appending information to that file, or quitting the program without writing to the file. This means that in writing batch files it is important to know whether there will be a prompt of this sort. You must know in advance whether the file will exist. You may want to put in your batch file a command that tests for the existence of a pre-existing output file and if so, removes it. You might even want to put in a command that creates a file of that name, so that you can be sure it is there! Either way, you will then know whether to put into your file of keyboard responses the proper response to the inquiry about overwriting that output file.


Preparing Input Files

The input files for PHYLIP programs must be prepared separately - there is no data editor within PHYLIP. You can use a word processor (or text editor) to prepare them yourself, or you can use a program that produces a PHYLIP-format output. Sequence alignment programs such as ClustalW commonly have an option to produce PHYLIP files as output, and some other phylogeny programs, such as MacClade and TreeView, are capable of producing a PHYLIP-format file.

The format of the input files is discussed below, and you should also read the other PHYLIP documentation relevant to the particular type of data that you are using, and the particular programs you want to run, as there will be more details there.

It is very important that the input files be in "Text Only" or "flat ASCII" format. This means that they contain only printable ASCII/ISO characters, and not any unprintable characters. Many word processors such as Microsoft Word save their files in a format that contains unprintable characters, unless you tell them not to. For Microsoft Word you can select Save As from its File menu, and choose Text Only as the file format. This can also be done in WordPad utility in Windows . Other word processors will have equivalent options. Text editors such as the vi and emacs editors on Unix and Linux, Windows Notepad, the SimpleText editor in MacOS, or the pico editor that comes with the pine mailer program, produce their files in Text Only format and should not cause any trouble.

Input and output files

For most of the PHYLIP programs, information comes from a series of input files, and ends up in a series of output files:

                   -------------------
                  |                   |
infile ---------> |                   |
                  |                   |
intree ---------> |                   | -----------> outfile
                  |                   |
weights --------> |      program      | -----------> outtree
                  |                   |
categories -----> |                   | -----------> plotfile
                  |                   |
fonftile -------> |                   |
                  |                   |
                   -------------------

The programs interact with the user by presenting a menu. Aside from the user's choices from the menu, they read all other input from files. These files have default names. The program will try to find a file of that name - if it does not, it will ask the user to supply the name of that file. Input data such as DNA sequences comes from a file whose default name is infile. If the user supplies a tree, this is in a file whose default name is intree. Values of weights for the characters are in weights, and the tree plotting program need some digitized fonts which are supplied in fontfile (all these are default names).

For example, if DnaML looks for the file infile and does not find one of that name, it prints the message:

dnaml: can't find input file "infile"
Please enter a new file name>

This simply means that it wants you to type in the name of the input file.

Two programs in the package works differently according to an older ("Old Style") system. These are CLIQUE and FACTOR. The information on ancestral states is supplied in the data file whose default name is infile, and for FACTOR the Factors information is written into the output file rather than being put into a separate file called factors. See the documentation page for CLIQUE and the documentation page for FACTOR for information on these differences. By the time of the final 3.6 release we hope to have these last Old Style programs converted to the new system.

Data file format

I have tried to adhere to a rather stereotyped input and output format. For the parsimony, compatibility and maximum likelihood programs, excluding the distance matrix methods, the simplest version of the input data file looks something like this:

   6   13
Archaeopt CGATGCTTAC CGC
HesperorniCGTTACTCGT TGT
BaluchitheTAATGTTAAT TGT
B. virginiTAATGTTCGT TGT
BrontosaurCAAAACCCAT CAT
B.subtilisGGCAGCCAAT CAC

The first line of the input file contains the number of species and the number of characters (in this case sites). These are in free format, separated by blanks. The information for each species follows, starting with a ten-character species name (which can include blanks and some punctuation marks), and continuing with the characters for that species. The name should be on the same line as the first character of the data for that species. (I will use the term "species" for the tips of the trees, recognizing that in some cases these will actually be populations or individual gene sequences).

The name should be ten characters in length, filled out to the full ten characters by blanks if shorter. Any printable ASCII/ISO character is allowed in the name, except for parentheses ("(" and ")"), square brackets ("[" and "]"), colon (":"), semicolon (";") and comma (","). If you forget to extend the names to ten characters in length by blanks, the program will get out of synchronization with the contents of the data file, and an error message will result.

In the discrete-character programs, DNA sequence programs and protein sequence programs the characters are each a single letter or digit, sometimes separated by blanks. In the continuous-characters programs they are real numbers with decimal points, separated by blanks:

Latimeria 2.03 3.457 100.2 0.0 -3.7

The conventions about continuing the data beyond one line per species are different between the molecular sequence programs and the others. The molecular sequence programs can take the data in "aligned" or "interleaved" format, in which we first have some lines giving the first part of each of the sequences, then some lines giving the next part of each, and so on. Thus the sequences might look like this:

    6   39
Archaeopt CGATGCTTAC CGCCGATGCT
HesperorniCGTTACTCGT TGTCGTTACT
BaluchitheTAATGTTAAT TGTTAATGTT
B. virginiTAATGTTCGT TGTTAATGTT
BrontosaurCAAAACCCAT CATCAAAACC
B.subtilisGGCAGCCAAT CACGGCAGCC

TACCGCCGAT GCTTACCGC
CGTTGTCGTT ACTCGTTGT
AATTGTTAAT GTTAATTGT
CGTTGTTAAT GTTCGTTGT
CATCATCAAA ACCCATCAT
AATCACGGCA GCCAATCAC

Note that in these sequences we have a blank every ten sites to make them easier to read: any such blanks are allowed. The blank line which separates the two groups of lines (the ones containing sites 1-20 and ones containing sites 21-39) may or may not be present, but if it is, it should be a line of zero length and not contain any extra blank characters (this is because of a limitation of the current versions of the programs). It is important that the number of sites in each group be the same for all species (i.e., it will not be possible to run the programs successfully if the first species line contains 20 bases, but the first line for the second species contains 21 bases).

Alternatively, an option can be selected in the menu to take the data in "sequential" format, with all of the data for the first species, then all of the characters for the next species, and so on. This is also the way that the discrete characters programs and the gene frequencies and quantitative characters programs want to read the data. They do not allow the interleaved format.

In the sequential format, the character data can run on to a new line at any time (except in the middle of a species name or, in the case of continuous character and distance matrix programs where you cannot go to a new line in the middle of a real number). Thus it is legal to have:

Archaeopt 001100
1101

or even:

Archaeopt
0011001101

though note that the full ten characters of the species name must then be present: in the above case there must be a blank after the "t". In all cases it is possible to put internal blanks between any of the character values, so that

Archaeopt 0011001101 0111011100

is allowed.

Note that you can convert molecular sequence data between the interleaved and the sequential data formats by using the Rewrite option of the D menu item in SEQBOOT.

If you make an error in the format of the input file, the programs can sometimes detect that they have been fed an illegal character or illegal numerical value and issue an error message such as BAD CHARACTER STATE:, often printing out the bad value, and sometimes the number of the species and character in which it occurred. The program will then stop shortly after. One of the things which can lead to a bad value is the omission of something earlier in the file, or the insertion of something superfluous, which cause the reading of the file to get out of synchronization. The program then starts reading things it didn't expect, and concludes that they are in error. So if you see this error message, you may also want to look for the earlier problem that may have led to the program becoming confused about what it is reading.

Some options are described below, but you should also read the documentation for the groups of the programs and for the individual programs.


The Menu

The menu is straightforward. It typically looks like this (this one is for DNAPARS):

DNA parsimony algorithm, version 3.6

Setting for this run:
  U                 Search for best tree?  Yes
  S                        Search option?  More thorough search
  V              Number of trees to save?  100
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  T              Use Threshold parsimony?  No, use ordinary parsimony
  N           Use Transversion parsimony?  No, count all steps
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4          Print out steps in each site  No
  5  Print sequences at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

If you want to accept the default settings (they are shown in the above case) you can simply type Y followed by pressing on the Enter key. If you want to change any of the options, you should type the letter shown to the left of its entry in the menu. For example, to set a threshold type T. Lower-case letters will also work. For many of the options the program will ask for supplementary information, such as the value of the threshold.

Note the Terminal type entry, which you will find on all menus. It allows you to specify which type of terminal your screen is. The options are an IBM PC screen, an ANSI standard terminal, or none. Choosing zero (0) toggles among these three options in cyclical order, changing each time the 0 option is chosen. If one of them is right for your terminal the screen will be cleared before the menu is displayed. If none works, the none option should probably be chosen. The programs should start with a terminal option appropriate for your computer, but if they do not, you can change the terminal type manually. This is particularly important in program RETREE where a tree is displayed on the screen - if the terminal type is set to the wrong value, the tree can look very strange.

The other numbered options control which information the program will display on your screen or on the output files. The option to Print indications of progress of run will show information such as the names of the species as they are successively added to the tree, and the progress of rearrangements. You will usually want to see these as reassurance that the program is running and to help you estimate how long it will take. But if you are running the program "in background" as can be done on multitasking and multiuser systems, and do not have the program running in its own window, you may want to turn this option off so that it does not disturb your use of the computer while the program is running.


The Output File


Most of the programs write their output onto a file called (usually) outfile, and a representation of the trees found onto a file called outtree.

The exact contents of the output file vary from program to program and also depend on which menu options you have selected. For many programs, if you select all possible output information, the output will consist of (1) the name of the program and its version number, (2) some of the input information printed out, and (3) a series of phylogenies, some with associated information indicating how much change there was in each character or on each part of the tree. A typical rooted tree looks like this:

                                     +-------------------Gibbon
        +----------------------------2
        !                            !      +------------------Orang
        !                            +------4
        !                                   !  +---------Gorilla
  +-----3                                   +--6
  !     !                                      !    +---------Chimp
  !     !                                      +----5
--1     !                                           +-----Human
  !     !
  !     +-----------------------------------------------Mouse
  !
  +------------------------------------------------Bovine

The interpretation of the tree is fairly straightforward: it "grows" from left to right. The numbers at the forks are arbitrary and are used (if present) merely to identify the forks. For many of the programs the tree produced is unrooted. Rooted and unrooted trees are printed in nearly the same form, but the unrooted ones are accompanied by the warning message:

remember: this is an unrooted tree!

to indicate that this is an unrooted tree and to warn against taking the position of its root too seriously. Mathematicians still call an unrooted tree a tree, though some systematists unfortunately use the term "network" for an unrooted tree. This conflicts with standard mathematical usage, which reserves the name "network" for a completely different kind of graph). The root of this tree could be anywhere, say on the line leading immediately to Mouse. As an exercise, see if you can tell whether the following tree is or is not a different one from the above:

             +-----------------------------------------------Mouse
             !
   +---------4                                   +------------------Orang
   !         !                            +------3
   !         !                            !      !       +---------Chimp
---6         +----------------------------1      !  +----2
   !                                      !      +--5    +-----Human
   !                                      !         !
   !                                      !         +---------Gorilla
   !                                      !
   !                                      +-------------------Gibbon
   !
   +-------------------------------------------Bovine

   remember: this is an unrooted tree!

(it is not different). It is important also to realize that the lengths of the segments of the printed tree may not be significant: some may actually represent branches of zero length, in the sense that there is no evidence that those branches are nonzero in length. Some of the diagrams of trees attempt to print branches approximately proportional to estimated branch lengths, while in others the lengths are purely conventional and are presented just to make the topology visible. You will have to look closely at the documentation that accompanies each program to see what it presents and what is known about the lengths of the branches on the tree. The above tree attempts to represent branch lengths approximately in the diagram. But even in those cases, some of the smaller branches are likely to be artificially lengthened to make the tree topology clearer. Here is what a tree from DNAPARS looks like, when no attempt is made to make the lengths of branches in the diagram proportional to estimated branch lengths:

                 +--Human
              +--5
           +--4  +--Chimp
           !  !
        +--3  +-----Gorilla
        !  !
     +--2  +--------Orang
     !  !
  +--1  +-----------Gibbon
  !  !
--6  +--------------Mouse
  !
  +-----------------Bovine

  remember: this is an unrooted tree!

When a tree has branch lengths, it will be accompanied by a table showing for each branch the numbers (or names) of the nodes at each end of the branch, and the length of that branch. For the first tree shown above, the corresponding table is:

 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------

    1          Bovine            0.90216     (  0.50346,     1.30086) **
    1          Mouse             0.79240     (  0.42191,     1.16297) **
    1             2              0.48553     (  0.16602,     0.80496) **
    2             3              0.12113     (     zero,     0.24676) *
    3             4              0.04895     (     zero,     0.12668)
    4             5              0.07459     (  0.00735,     0.14180) **
    5          Human             0.10563     (  0.04234,     0.16889) **
    5          Chimp             0.17158     (  0.09765,     0.24553) **
    4          Gorilla           0.15266     (  0.07468,     0.23069) **
    3          Orang             0.30368     (  0.18735,     0.41999) **
    2          Gibbon            0.33636     (  0.19264,     0.48009) **

      *  = significantly positive, P < 0.05
      ** = significantly positive, P < 0.01

Ignoring the asterisks and the approximate confidence limits, which will be described in the documentation file for DNAML, we can see that the table gives a more precise idea of what the lengths of all the branches are. Similar tables exist in distance matrix and likelihood programs, as well as in the parsimony programs DNAPARS and PARS.

Some of the parsimony programs in the package can print out a table of the number of steps that different characters (or sites) require on the tree. This table may not be obvious at first. A typical example looks like this:

 steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       2   2   2   2   1   1   2   2   1
   10!   1   2   3   1   1   1   1   1   1   2
   20!   1   2   2   1   2   2   1   1   1   2
   30!   1   2   1   1   1   2   1   3   1   1
   40!   1

The numbers across the top and down the side indicate which site is being referred to. Thus site 23 is column "3" of row "20" and has 1 step in this case.

There are many other kinds of information that can appear in the output file, They vary from program to program, and we leave their description to the documentation files for the specific programs.


The Tree File

In output from most programs, a representation of the tree is also written into the tree file outtree. The tree is specified by nested pairs of parentheses, enclosing names and separated by commas. We will describe how this works below. If there are any blanks in the names, these must be replaced by the underscore character "_". Trailing blanks in the name may be omitted. The pattern of the parentheses indicates the pattern of the tree by having each pair of parentheses enclose all the members of a monophyletic group. The tree file could look like this:

((Mouse,Bovine),(Gibbon,(Orang,(Gorilla,(Chimp,Human)))));

In this tree the first fork separates the lineage leading to Mouse and Bovine from the lineage leading to the rest. Within the latter group there is a fork separating Gibbon from the rest, and so on. The entire tree is enclosed in an outermost pair of parentheses. The tree ends with a semicolon. In some programs such as DNAML, FITCH, and CONTML, the tree will be unrooted. An unrooted tree should have its bottommost fork have a three-way split, with three groups separated by two commas:

(A,(B,(C,D)),(E,F));

Here the three groups at the bottom node are A, (B,C,D), and (E,F). The single three-way split corresponds to one of the interior nodes of the unrooted tree (it can be any interior node of the tree). The remaining forks are encountered as you move out from that first node. In newer programs, some are able to tolerate these other forks being multifurcations (multi-way splits). You should check the documentation files for the particular programs you are using to see in which of these forms you can expect the user tree to be in. Note that many of the programs that actually estimate an unrooted tree (such as DNAPARS) produce trees in the treefile in rooted form! This is done for reasons of arbitrary internal bookkeeping. The placement of the root is arbitrary. We are working toward having all programs be able to read all trees, whether rooted or unrooted, multifurcating or bifurcating, and having them do the right thing with them. But this is a long-term goal and it is not yet achieved.

For programs that infer branch lengths, these are given in the trees in the tree file as real numbers following a colon, and placed immediately after the group descended from that branch. Here is a typical tree with branch lengths:

((cat:47.14069,(weasel:18.87953,((dog:25.46154,(raccoon:19.19959,
bear:6.80041):0.84600):3.87382,(sea_lion:11.99700,
seal:12.00300):7.52973):2.09461):20.59201):25.0,monkey:75.85931);

Note that the tree may continue to a new line at any time except in the middle of a name or the middle of a branch length, although in trees written to the tree file this will only be done after a comma.

These representations of trees are a subset of the standard adopted on 24 June 1986 at the annual meetings of the Society for the Study of Evolution by an informal committee (its final session in Newick's lobster restaurant - hence its name, the Newick standard) consisting of Wayne Maddison (author of MacClade), David Swofford (PAUP), F. James Rohlf (NTSYS-PC), Chris Meacham (COMPROB and the original PHYLIP tree drawing programs), James Archie, William H.E. Day, and me. This standard is a generalization of PHYLIP's format, itself based on a well-known representation of trees in terms of parenthesis patterns which is due to the famous mathematician Arthur Cayley, and which has been around for over a century. The standard is now employed by most phylogeny computer programs but unfortunately has yet to be decribed in a formal published description. Other descriptions by me and by Gary Olsen can be accessed using the Web at:


The Options and How To Invoke Them

Most of the programs allow various options that alter the amount of information the program is provided or what is done with the information. Options are selected in the menu.

Common options in the menu

A number of the options from the menu, the U (User tree), G (Global), J (Jumble), O (Outgroup), W (Weights), T (Threshold), M (multiple data sets), and the tree output options, are used so widely that it is best to discuss them in this document.

The U (User tree) option. This option toggles between the default setting, which allows the program to search for the best tree, and the User tree setting, which reads a tree or trees ("user trees") from the input tree file and evaluates them. The input tree file's default name is intree. In a few cases the trees should be preceded by a line giving the number of trees:

   3
((Alligator,Bear),((Cow,(Dog,Elephant)),Ferret));
((Alligator,Bear),(((Cow,Dog),Elephant),Ferret));
((Alligator,Bear),((Cow,Dog),(Elephant,Ferret)));

while in most cases the initial line with the number of trees is not required. This is an inconsistency in the programs that we are intending to eliminate soon. Some programs require rooted trees, some unrooted trees, and some can handle multifurcating trees. You should read the documentation for the particular program to find out which it requires. Program RETREE can be used to convert trees among these forms (on saving a tree from RETREE, you are asked whether you want it to be rooted or unrooted).

In using the user tree option, check the pattern of parentheses carefully. The programs do not always detect whether the tree makes sense, and if it does not there will probably be a crash (hopefully, but not inevitably, with an error message indicating the nature of the problem). Trees written out by programs are typically in the proper form.

Some of the programs require that the user trees be preceded by line with the number of user trees. Some require that they not be preceded by this line, and many can tolerate either. I have tried to note for each of these programs which of these forms of the user tree file is appropriate. We hope to bring all programs to the same user tree file format as soon as possible.

The G (Global) option. In the programs which construct trees (except for NEIGHBOR, the "...PENNY" programs and CLIQUE, and of course the "...MOVE" programs where you construct the trees yourself), after all species have been added to the tree a rearrangements phase ensues. In most of these programs the rearrangements are automatically global, which in this case means that subtrees will be removed from the tree and put back on in all possible ways so as to have a better chance of finding a better tree. Since this can be time consuming (it roughly triples the time taken for a run) it is left as an option in some of the programs, specifically CONTML, FITCH, and DNAML. In these programs the G menu option toggles between the default of local rearrangement and global rearrangement. The rearrangements are explained more below.

The J (Jumble) option. In most of the tree construction programs (except for the "...PENNY" programs and CLIQUE), the exact details of the search of different trees depend on the order of input of species. In these programs J option enables you to tell the program to use a random number generator to choose the input order of species. This option is toggled on and off by selecting option J in the menu. The program will then prompt you for a "seed" for the random number generator. The seed should be an integer between 1 and 32767, and should of form 4n+1, which means that it must give a remainder of 1 when divided by 4. This can be judged by looking at the last two digits of the number. Each different seed leads to a different sequence of addition of species. By simply changing the random number seed and re-running the programs one can look for other, and better trees. If the seed entered is not odd, the program will not proceed, but will prompt for another seed.

The Jumble option also causes the program to ask you how many times you want to restart the process. If you answer 10, the program will try ten different orders of species in constructing the trees, and the results printed out will reflect this entire search process (that is, the best trees found among all 10 runs will be printed out, not the best trees from each individual run).

Some people have asked what are good values of the random number seed. The random number seed is used to start a process of choosing "random" (actually pseudorandom) numbers, which behave as if they were unpredictably randomly chosen between 0 and 232-1 (which is 4,294,967,296). You could put in the number 133 and find that the next random number was 1,876,973,009. As they are effectively unpredictable, there is no such thing as a choice that is better than any other, provided that the numbers are of the form 4n+1. However if you re-use a random number seed, the sequence of random numbers that result will be the same as before, resulting in exactly the same series of choices, which may not be what you want.

The O (Outgroup) option. This specifies which species is to be used to root the tree by having it become the outgroup. This option is toggled on and off by choosing O in the menu (the alphabetic character O, not the digit 0). When it is on, the program will then prompt for the number of the outgroup (the species being taken in the numerical order that they occur in the input file). Responding by typing 6 and then an Enter character indicates that the sixth species in the data is the outgroup. Outgroup-rooting will not be attempted if the data have already established a root for the tree from some other consideration, and may not be if it is a user-defined tree, despite your invoking the option. Thus programs such as DOLLOP that produce only rooted trees do not allow the Outgroup option. It is also not available in KITSCH, DNAMLK, or CLIQUE. When it is used, the tree as printed out is still listed as being an unrooted tree, though the outgroup is connected to the bottommost node so that it is easy to visually convert the tree into rooted form.

The T (Threshold) option. This sets a threshold forn the parsimony programs such that if the number of steps counted in a character is higher than the threshold, it will be taken to be the threshold value rather than the actual number of steps. The default is a threshold so high that it will never be surpassed (in which case the steps whill simply be counted). The T menu option toggles on and off asking the user to supply a threshold. The use of thresholds to obtain methods intermediate between parsimony and compatibility methods is described in my 1981b paper. When the T option is in force, the program will prompt for the numerical threshold value. This will be a positive real number greater than 1. In programs MIX, MOVE, PENNY, PROTPARS, DNAPARS, DNAMOVE, and DNAPENNY, do not use threshold values less than or equal to 1.0, as they have no meaning and lead to a tree which depends only on considerations such as the input order of species and not at all on the character state data! In programs DOLLOP, DOLMOVE, and DOLPENNY the threshold should never be 0.0 or less, for the same reason. The T option is an important and underutilized one: it is, for example, the only way in this package (except for program DNACOMP) to do a compatibility analysis when there are missing data. It is a method of de-weighting characters that evolve rapidly. I wish more people were aware of its properties.

The M (Multiple data sets) option. In menu programs there is an M menu option which allows one to toggle on the multiple data sets option. The program will ask you how many data sets it should expect. The data sets have the same format as the first data set. Here is a (very small) input file with two five-species data sets:

      5    6
Alpha     CCACCA
Beta      CCAAAA
Gamma     CAACCA
Delta     AACAAC
Epsilon   AACCCA
5    6
Alpha     CACACA
Beta      CCAACC
Gamma     CAACAC
Delta     GCCTGG
Epsilon   TGCAAT

The main use of this option will be to allow all of the methods in these programs to be bootstrapped. Using the program SEQBOOT one can take any DNA, protein, restriction sites, gene frequency or binary character data set and make multiple data sets by bootstrapping. Trees can be produced for all of these using the M option. They will be written on the tree output file if that option is left in force. Then the program CONSENSE can be used with that tree file as its input file. The result is a majority rule consensus tree which can be used to make confidence intervals. The present version of the package allows, with the use of SEQBOOT and CONSENSE and the M option, bootstrapping of many of the methods in the package.

Programs DNAML, DNAPARS and PARS can also take multiple weights instead of multiple data sets. They can then do bootstrapping by reading in one data set, together with a file of weights that show how the characters (or sites) are reweighted in each bootstrap sample. Thus a site that is omitted in a bootstrap sample has effectively been given weight 0, while a site that has been duplicated has effectively been given weight 2. SEQBOOT has a menu selection to produce the file of weights information automatically, instead of producing a file of multiple data sets.

The W (Weights) option. This signals the program that, in addition to the data set, you want to read in a series of weights that tell how many times each character is to be counted. If the weight for a character is zero (0) then that character is in effect to be omitted when the tree is evaluated. If it is (1) the character is to be counted once. Some programs allow weights greater than 1 as well. These have the effect that the character is counted as if it were present that many times, so that a weight of 4 means that the character is counted 4 times. The values 0-9 give weights 0 through 9, and the values A-Z give weights 10 through 35. By use of the weights we can give overwhelming weight to some characters, and drop others from the analysis. In the molecular sequence programs only two values of the weights, 0 or 1 are allowed.

The weights are used to analyze subsets of the characters, and also can be used for resampling of the data as in bootstrap and jackknife resampling. For those programs that allow weights to be greater than 1, they can also be used to emphasize information from some characters more strongly than others. Of course, you must have some rationale for doing this.

The weights are provided as a sequence of digits. Thus they might be

10011111100010100011110001100

The weights are to be provided in an input file whose default name is weights. In programs such as SEQBOOT that can also output a file of weights, the input weights have a default file name of inweights, and the output file name has a default file name of outweights.

Weights can be used to analyze different subsets of characters (by weighting the rest as zero). Alternatively, in the discrete characters programs they can be used to force a certain group to appear on the phylogeny (in effect confining consideration to only phylogenies containing that group). This is done by adding an imaginary character that has 1's for the members of the group, and 0's for all the other species. That imaginary character is then given the highest weight possible: the result will be that any phylogeny that does not contain that group will be penalized by such a heavy amount that it will not (except in the most unusual circumstances) be considered. Of course, the new character brings extra steps to the tree, but the number of these can be calculated in advance and subtracted out of the total when reporting the results. This use of weights is an important one, and one sadly ignored by many users who could profit from it. In the case of molecular sequences we cannot use weights this way, so that to force a given group to appear we have to add a large extra segment of sites to the molecule, with (say) A's for that group and C's for every other species.

The option to write out the trees into a tree file. This specifies that you want the program to write out the tree not only on its usual output, but also onto a file in nested-parenthesis notation (as described above). This option is sufficiently useful that it is turned on by default in all programs that allow it. You can optionally turn it off if you wish, by typing the appropriate number from the menu (it varies from program to program). This option is useful for creating tree files that can be directly read into the programs, including the consensus tree and tree distance programs, and the tree plotting programs.

The output tree file has a default name of outtree.

The (0) terminal type option . (This is the digit 0, not the alphabetic character O). The program will default to one particular assumption about your terminal (except in the case of Macintoshes, the default will be an ANSI compatible terminal). You can alternatively select it to be either an IBM PC, or nothing. This affects the ability of the programs to clear the screen when they display their menus, and the graphics characters used to display trees in the programs DNAMOVE, MOVE, DOLMOVE, and RETREE. If you are running an MSDOS system and have the ANSI.SYS driver installed in your CONFIG.SYS file, you may find that the screen clears correctly even with the default setting of ANSI.


The Algorithm for Constructing Trees

All of the programs except FACTOR, DNADIST, GENDIST, DNAINVAR, SEQBOOT, CONTRAST, RETREE, and the plotting and consensus tree programs act to construct an estimate of a phylogeny. MOVE, DOLMOVE, and DNAMOVE let you construct it yourself by hand. All of the rest but NEIGHBOR, the "...PENNY" programs and CLIQUE make use of a common approach involving additions and rearrangements. They are trying to minimize or maximize some quantity over the space of all possible evolutionary trees. Each program contains a part that, given the topology of the tree, evaluates the quantity that is being minimized or maximized. The straightforward approach would be to evaluate all possible tree topologies one after another and pick the one which, according to the criterion being used, is best. This would not be possible for more than a small number of species, since the number of possible tree topologies is enormous. A review of the literature on the counting of evolutionary trees will be found one of my papers (Felsenstein, 1978a).

Since we cannot search all topologies, these programs are not guaranteed to always find the best tree, although they seem to do quite well in practice. The strategy they employ is as follows: the species are taken in the order in which they appear in the input file. The first two (in some programs the first three) are taken and a tree constructed containing only those. There is only one possible topology for this tree. Then the next species is taken, and we consider where it might be added to the tree. If the initial tree is (say) a rooted tree with two species and we want the resulting three-species tree to be a bifurcating tree, there are only three places where we could add the third species. Each of these is tried, and each time the resulting tree is evaluated according to the criterion. The best one is chosen to be the basis for further operations. Now we consider adding the fourth species, again at each of the five possible places that would result in a bifurcating tree. Again, the best of these is accepted.

Local Rearrangements

The process continues in this manner, with one important exception. After each species is added, and before the next is added, a number of rearrangements of the tree are tried, in an effort to improve it. The algorithms move through the tree, making all possible local rearrangements of the tree. A local rearrangement involves an internal segment of the tree in the following manner. Each internal segment of the tree is of this form (where T1, T2, and T3 are subtrees - parts of the tree that can contain further forks and tips):

            T1      T2       T3
             \      /        /
              \    /        /
               \  /        /
                \/        /
                 *       /
                  *     /
                   *   /
                    * /
                     *
                     !
                     !

the segment we are discussing being indicated by the asterisks. A local rearrangement consists of switching the subtrees T1 and T3 or T2 and T3, so as to obtain one of the following:

          T3       T2      T1            T1       T3      T2
           \       /       /              \       /       /
            \     /       /                \     /       /
             \   /       /                  \   /       /
              \ /       /                    \ /       /
               \       /                      \       /
                \     /                        \     /
                 \   /                          \   /
                  \ /                            \ /
                   !                              !
                   !                              !
                   !                              !

Each time a local rearrangement is successful in finding a better tree, the new arrangement is accepted. The phase of local rearrangements does not end until the program can traverse the entire tree, attempting local rearrangements, without finding any that improve the tree.

This strategy of adding species and making local rearrangements will look at about  (n-1)x(2n-3)  different topologies, though if rearrangements are frequently successful the number may be larger. I have been describing the strategy when rooted trees are being considered. For unrooted trees there is a precisely similar strategy, though the first tree constructed may be a three-species tree and the rearrangements may not start until after the addition of the fifth species.

Though we are not guaranteed to have found the best tree topology, we are guaranteed that no nearby topology (i. e. none accessible by a single local rearrangement) is better. In this sense we have reached a local optimum of our criterion. Note that the whole process is dependent on the order in which the species are present in the input file. We can try to find a different and better solution by reordering the species in the input file and running the program again (or, more easily, by using the J option). If none of these attempts finds a better solution, then we have some indication that we may have found the best topology, though we can never be certain of this.

Note also that a new topology is never accepted unless it is better than the previous one, so that the rearrangement process can never fall into an endless loop. This is also the way ties in our criterion are resolved, namely by sticking with the tree found first. However, the tree construction programs other than CLIQUE, CONTML, FITCH, and DNAML do keep a record of all trees found that are tied with the best one found. This gives you some immediate idea of which parts of the tree can be altered without affecting the quality of the result.

Global Rearrangements

A feature of most of the programs, such as PROTPARS, DNAPARS, DNACOMP, DNAML, DNAMLK, RESTML, KITSCH, FITCH, CONTML, MIX, and DOLLOP, is "global" optimization of the tree. In four of these (CONTML, FITCH, DNAML and DNAMLK) this is an option, G. In the others it automatically applies. When it is present there is an additional stage to the search for the best tree. Each possible subtree is removed from the tree from the tree and added back in all possible places. This process continues until all subtrees can be removed and added again without any improvement in the tree. The purpose of this extra rearrangement is to make it less likely that one or more a species gets "stuck" in a suboptimal region of the space of all possible trees. The use of global optimization results in approximately a tripling (3 x ) of the run-time, which is why I have left it as an option in some of the slower programs.

What PHYLIP calls "global" rearrangements are more properly called SPR (subtree pruning and regrafting) by Swofford et. al. (1996) as distinct from the NNI (nearest neighbor interchange) rearrangements that PHYLIP also uses, and the TBR (tree bisection and reconnection) rearrangements that it does not use.

The programs doing global optimization print out a dot "." after each group is removed and re-added to the tree, to give the user some sign that the rearrangements are proceeding. A new line of dots is started whenever a new round of global rearrangements is started following an improvement in the tree. On the line before the dots are printed there is printed a bar of the form "!---------------!" to show how many dots to expect. The dots will not be printed out at a uniform rate, but the later dots, which represent removal of larger groups from the tree and trying them consequently in fewer places, will print out more quickly. With some compilers each row of dots may not be printed out until it is complete.

It should be noted that PENNY, DOLPENNY, DNAPENNY and CLIQUE use a more sophisticated strategy of "depth-first search" with a "branch and bound" search method that guarantees that all of the best trees will be found. In the case of PENNY, DOLPENNY and DNAPENNY there can be a considerable sacrifice of computer time if the number of species is greater than about ten: it is a matter for you to consider whether it is worth it for you to guarantee finding all the most parsimonious trees, and that depends on how much free computer time you have! CLIQUE finds all largest cliques, and does so without undue burning of computer time. Although all of these problems that have been investigated fall into the category of "NP-hard" problems that in effect do not have a rapid solution, the cases that cause this trouble for the largest-cliques algorithm in CLIQUE apparently are not biologically realistic and do not occur in actual data.

Multiple Jumbles

As just mentioned, for most of these programs the search depends on the order in which the species are entered into the tree. Using the J (Jumble) option you can supply a random number seed which will allow the program to put the species in in a random order. Jumbling can be done multiple times. For example, if you tell the program to do it 10 times, it will go through the tree-building process 10 times, each with a different random order of adding species. It will keep a record of the trees tied for best over the whole process. In other words, it does not just record the best trees from each of the 10 runs, but records the best ones overall. Of course this is slow, taking 10 times longer than a single run. But it does give us a much greater chance of finding all of the most parsimonious trees. In the terminology of Maddison (1991) it can find different "islands" of trees. The present algorithms do not guarantee us to find all trees in a given "island" from a single run, so multiple runs also help explore those "islands" that are found.

Saving multiple tied trees

For the parsimony and compatibility programs, one can have a perfect tie between two or more trees. In these programs these trees are all saved. For the newer parsimony programs such as DNAPARS and PARS, global rearrangement is carried out on all of these tied trees. This can be turned off in the menu.

For trees with criteria which are real numbers, such as the distance matrix programs FITCH and KITSCH, and the likelihood programs DNAML, DNAMLK, CONTML, and RESTML, it is difficult to get an exact tie between trees. Consequently these programs save only the single best tree (even though the others may be only a tiny bit worse).

Strategy for Finding the Best Tree

In practice, it is advisable to use the Jumble option to evaluate many different orderings of the input species. It is advisable to use the Jumble option and specify that it be done many times (as many as ten) to use different orderings of the input species).

People who want a magic "black box" program whose results they do not have to question (or think about) often are upset that these programs give results that are dependent on the order in which the species are entered in the data. To me this property is an advantage, for it permits you to try different searches for better trees, simply by varying the input order of species. If you do not use the multiple Jumble option, but do multiple individual runs instead, you can easily decide which to pay most attention to - the one or ones that are best according to the criterion employed (for example, with parsimony, the one out of the runs that results in the tree with the fewest changes).

In practice, in a single run, it usually seems best to put species that are likely to be sources of confusion in the topology last, as by the time they are added the arrangement of the earlier species will have stabilized into a good configuration, and then the last few species will by fitted into that topology. There will be less chance this way of a poor initial topology that would affect all subsequent parts of the search. However, a variety of arrangements of the input order of species should be tried, as can be done if the J option is used, and no species should be kept in a fixed place in the order of input. Note that the results of the "...PENNY" programs and CLIQUE are not sensitive to the input order of species, and NEIGHBOR is only slightly sensistive to it, so that multiple Jumbling is not possible with those programs. Note also that with global search, which is standard in many programs and in others is an option, each group (including each individual species) will be removed and re-added in all possible positions, so that a species causing confusion will have more chance of moving to a new location than it would without global rearrangement.


A Warning on Interpreting Results

Probably the most important thing to keep in mind while running any of the parsimony or compatibility programs is not to overinterpret the result. Many users treat the set of most parsimonious trees as if it were a confidence interval. If a group appears in all of the most parsimonious trees then they treat it as well established. Unfortunately the confidence interval on phylogenies appears to be much larger than the set of all most parsimonious trees (Felsenstein, 1985b). Likewise, variation of result among different methods will not be a good indicator of the size of the confidence interval. Consider a simple data set in which, out of 100 binary characters, 51 recommend the unrooted tree ((A,B),(C,D)) and 49 the tree ((A,D),(B,C)). Many different methods will all give the same result on such a data set: they will estimate the tree as ((A,B),(C,D)). Nevertheless it is clear that the 51:49 margin by which this tree is favored is not statistically significantly different from 50:50. So consistency among different methods is a poor guide to statistical significance.


Relative Speed of Different
Programs and Machines

Relative speed of the different programs

C compilers differ in efficiency of the code they generate, and some deal with some features of the language better than with others. Thus a program which is unusually fast on one computer may be unusually slow on another. Nevertheless, as a rough guide to relative execution speeds, I have tested the programs on three data sets, each of which has 10 species and 40 characters. The first is an imaginary one in which all characters are compatible - ("The Willi Hennig Memorial Data Set" as J. S. Farris once called ones like it). The second is the binary recoded form of the fossil horses data set of Camin and Sokal (1965). The third data set has data that is completely random: 10 species and 20 characters that have a 50% chance that each character state is 0 or 1 (or A or G). The data sets thus range from a completely compatible one in which there is no homoplasy (paralellism or convergence), through the horses data set, which requires 29 steps where the possible minimum number would be 20, to the random data set, which requires 49 steps. We can thus see how this increasing messiness of the data affects running times. The three data sets have all had 20 sites of A's added to the end of each sequence, so as to prevent likelihood or distance matrix programs from having infinite branch lengths (the test data sets used for timing previous versions of PHYLIP wsere the same except that they lacked these 20 extra sites).

Here are the nucleotide sequence versions of the three data sets:

    10   40
A         CACACACAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAA
B         CACACAACAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAA
C         CACAACAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAA
D         CAACAAAACAAAAAAAAACAAAAAAAAAAAAAAAAAAAAA
E         CAACAAAAACAAAAAAAACAAAAAAAAAAAAAAAAAAAAA
F         ACAAAAAAAACACACAAAACAAAAAAAAAAAAAAAAAAAA
G         ACAAAAAAAACACAACAAACAAAAAAAAAAAAAAAAAAAA
H         ACAAAAAAAACAACAAAAACAAAAAAAAAAAAAAAAAAAA
I         ACAAAAAAAAACAAAACAACAAAAAAAAAAAAAAAAAAAA
J         ACAAAAAAAAACAAAAACACAAAAAAAAAAAAAAAAAAAA

    10   40
MesohippusAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
HypohippusAAACCCCCCCAAAAAAAAACAAAAAAAAAAAAAAAAAAAA
ArchaeohipCAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAA
ParahippusCAAACAACAACAAAAAAAACAAAAAAAAAAAAAAAAAAAA
MerychippuCCAACCACCACCCCACACCCAAAAAAAAAAAAAAAAAAAA
M. secunduCCAACCACCACCCACACCCCAAAAAAAAAAAAAAAAAAAA
Nannipus  CCAACCACAACCCCACACCCAAAAAAAAAAAAAAAAAAAA
NeohippariCCAACCCCCCCCCCACACCCAAAAAAAAAAAAAAAAAAAA
Calippus  CCAACCACAACCCACACCCCAAAAAAAAAAAAAAAAAAAA
PliohippusCCCACCCCCCCCCACACCCCAAAAAAAAAAAAAAAAAAAA

    10   40
A         CACACAACCAAACAAACCACAAAAAAAAAAAAAAAAAAAA
B         AAACCACACACACAAACCCAAAAAAAAAAAAAAAAAAAAA
C         ACAAAACCAAACCACCCACAAAAAAAAAAAAAAAAAAAAA
D         AAAAACACAACACACCAAACAAAAAAAAAAAAAAAAAAAA
E         AAACAACCACACACAACCAAAAAAAAAAAAAAAAAAAAAA
F         CCCAAACACCCCCAAAAAACAAAAAAAAAAAAAAAAAAAA
G         ACACCCCCACACCCACCAACAAAAAAAAAAAAAAAAAAAA
H         AAAACAACAACCACCCCACCAAAAAAAAAAAAAAAAAAAA
I         ACACAACAACACAAACAACCAAAAAAAAAAAAAAAAAAAA
J         CCAAAAACACCCAACCCAACAAAAAAAAAAAAAAAAAAAA

Here are the timings of many of the version 3.6 programs on these three data sets as run after being compiled by Gnu C and run on a 266 MHz Pentium MMX computer under Linux.

  Hennigian Data Horses Data Random Data
PROTPARS 0.133 0.167 0.308
DNAPARS 0.163 0.191 0.573
DNAPENNY 0.300 0.196 36.68
DNACOMP 0.081 0.073 0.127
DNAML 2.19 2.53 2.73
DNAMLK 5.40 6.13 7.21
PROML 44.79 90.46 68.49
PROMLK 171.01 183.61 239.34
DNAML 2.19 2.53 2.73
DNAINVAR 0.002 0.002 0.002
DNADIST 0.029 0.024 0.033
PROTDIST 1.095 1.089 1.107
RESTML 3.55 3.18 5.15
RESTDIST 0.012 0.010 0.010
FITCH 0.20 0.31 0.24
KITSCH 0.055 0.061 0.058
NEIGHBOR 0.003 0.004 0.005
CONTML 0.380 0.368 0.396
GENDIST 0.008 0.009 0.008
PARS 0.201 0.263 0.729
MIX 0.064 0.078 0.123
PENNY 0.038 0.087 15.93
DOLLOP 0.134 0.141 0.233
DOLPENNY 0.051 0.241 101.29
CLIQUE 0.010 0.015 0.020


In all cases the programs were run under the default options without compiler switches, except as specified here. The data sets used for the discrete characters programs have 0's and 1's instead of A's and C's. For CONTML the A's and C's were made into 0.0's and 1.0's and considered as 40 2-allele loci. For the distance programs 10 x 10 distance matrices were computed from the three data sets. For the restriction sites programs A and C were changed into + and -. It does not make much sense to benchmark MOVE, DOLMOVE, or DNAMOVE, although when there are many characters and many species the response time after each alteration of the tree should be proportional to the product of the number of species and the number of characters. For DNAML and DNAMLK the frequencies of the four bases were set to be equal rather than determined empirically as is the default. For RESTML the number of enzymes was set to 1.

In most cases, the benchmark was made more accurate by analyzing 10 data sets using the M (Multiple data sets) option and dividing the resulting time by 10. Times were determined as user times using the Linux time command. Several patterns will be apparent from this. The algorithms (MIX, DOLLOP, CONTML, FITCH, KITSCH, PROTPARS, DNAPARS, DNACOMP, and DNAML, DNAMLK, RESTML) that use the above-described addition strategy have run times that do not depend strongly on the messiness of the data. The only exception to this is that if a data set such as the Random data requires extra rounds of global rearrangements it takes longer. The programs differ greatly in run time: the likelihood programs RESTML, DNAML and CONTML are quite a bit slower than the others. The protein sequence parsimony program, which has to do a considerable amount of bookkeeping to keep track of which amino acids can mutate to each other, is also relatively slow.

Another class of algorithms includes PENNY, DOLPENNY, DNAPENNY and CLIQUE. These are branch-and-bound methods: in principle they should have execution times that rise exponentially with the number of species and/or characters, and they might be much more sensitive to messy data. This is apparent with PENNY, DOLPENNY, and DNAPENNY, which go from being reasonably fast with clean data to very slow with messy data. DOLPENNY is particularly slow on messy data - this is because this algorithm cannot make use of some of the lower-bound calculations that are possible with DNAPENNY and PENNY. CLIQUE is very fast on all data sets. Although in theory it should bog down if the number of cliques in the data is very large, that does not happen with random data, which in fact has few cliques and those small ones. Apparently the "worst-case" data sets that cause exponential run time are much rarer for CLIQUE than for the other branch-and-bound methods.

NEIGHBOR is quite fast compared to FITCH and KITSCH, and should make it possible to run much larger cases, although the results are expected to be a bit rougher than with those programs.

Speed with different numbers of species

How will the speed depend on the number of species and the number of characters? For the sequential-addition algorithms, the speed should be proportional to somewhere between the cube of the number of species and the square of the number of species, and to the number of characters. Thus a case that has, instead of 10 species and 20 characters, 20 species and 50 characters would take (in the cubic case) 2 x 2 x 2 x 2.5 = 20 times as long. This implies that cases with more than 20 species will be slow, and cases with more than 40 species very slow. This places a premium on working on small subproblems rather than just dumping a whole large data set into the programs.

An exception to these rules will be some of the DNA programs that use an aliasing device to save execution time. In these programs execution time will not necessarily increase proportional to the number of sites, as sites that show the same pattern of nucleotides will be detected as identical and the calculations for them will be done only once, which does not lead to more execution time. This is particularly likely to happen with few species and many sites, or with data sets that have small amounts of evolutionary divergence.

For programs FITCH and KITSCH, the distance matrix is square, so that when we double the number of species we also double the number of "characters", so that running times will go up as the fourth power of the number of species rather than the third power. Thus a 20-species case with FITCH is expected to run sixteen times more slowly than a 10-species case.

For programs like PENNY and CLIQUE the run times will rise faster than the cube of the number of species (in fact, they can rise faster than any power since these algorithms are not guaranteed to work in polynomial time). In practice, PENNY will frequently bog down above 11 species, while CLIQUE easily deals with larger numbers.

For NEIGHBOR the speed should vary only as the square of the number of species, so a case twice as large will take only four times as long. This will make it an attractive alternative to FITCH and KITSCH for large data sets.

Note: If you are unsure of how long a program will take, try it first on a few species, then work your way up until you get a feel for the speed and for what size programs you can afford to run.

Execution time is not the most important criterion for a program, particularly as computer time gets much cheaper than your time or a programmer's time. With workstations on which background jobs can be run all night, execution speed is not overwhelmingly relevant. Some of us have been conditioned by an earlier era of computing to consider execution speed paramount. But ease of use, ease of adaptation to your computer system, and ease of modification are much more important in practice, and in these respects I think these programs are adequate. Only if you are engaged in 1960's style mainframe computing, or if you have very large amounts of data is minimization of execution time paramount.

Nevertheless it would have been nice to have made the programs faster. The present speeds are a compromise between speed and effectiveness: by making them slower and trying more rearrangements in the trees, or by enumerating all possible trees, I could have made the programs more likely to find the best tree. By trying fewer rearrangements I could have speeded them up, but at the cost of finding worse trees. I could also have speeded them up by writing critical sections in assembly language, but this would have sacrificed ease of distribution to new computer systems. There are also some options included in these programs that make it harder to adopt some of the economies of bookkeeping that make other programs faster. However to some extent I have simply made the decision not to spend time trying to speed up program bookkeeping when there were new likelihood and statistical methods to be developed.

Relative speed of different machines

It is interesting to compare different machines using DNAPARS as the standard task. One can rate a machine on the DNAPARS benchmark by summing the times for all three of the data sets. Here are relative total timings over all three data sets (done with various versions of DNAPARS) for some machines, taking a Pentium MMX 266 notebook computer running Linux with gcc as the standard. Benchmarks from versions 3.4 and 3.5 of the program are included (respectively the Pascal and C versions whose timings are in parentheses. They are compared only with each other and are scaled to the rest of the timings using the joint runs on the 386SX and the Pentium MMX 266. This use of separate standards is necessary not because of different languages but because different versions of the package are being compared. Thus, the "Time" is the ratio of the Total to that for the Pentium, adjusted by the scalings of machines using 3.4 and 3.5 when appropriate. The Relative Speed is the reciprocal of the Time.

Machine Operating
System
Compiler Total Time Relative
Speed
Toshiba T1100+ MSDOS Turbo Pascal 3.01A (269) 1758.2 0.0005688
Apple Mac Plus MacOS Lightspeed Pascal 2 (175.84) 1149.3 0.0008701
Toshiba T1100+ MSDOS Turbo Pascal 5.0 (162) 1058.9 0.0009443
Macintosh Classic MacOS Think Pascal 3 (160) 1045.8 0.0009562
Macintosh Classic MacOS Think C (43.0) 795.6 0.0012569
IBM PS2/60 MSDOS Turbo Pascal 5.0 (58.76) 384.00 0.002604
80286 (12 Mhz) MSDOS Turbo Pascal 5.0 (47.09) 307.77 0.003249
Apple Mac IIcx MacOS Think Pascal 3 (42) 274.44 0.003644
Apple Mac SE/30 MacOS Think Pascal 3 (42) 274.44 0.003644
Apple Mac IIcx MacOS Lightspeed Pascal 2 (39.84) 260.44 0.003840
Apple Mac IIcx MacOS Lightspeed Pascal 2# (39.69) 259.33 0.003856
Zenith Z386 (16MHz) MSDOS Turbo Pascal 5.0 (38.27) 256.67 0.003896
Macintosh SE/30 MacOS Think C (13.6) 251.56 0.003975
386SX (16 MHz) MSDOS Turbo Pascal 6.0 (34) 222.41 0.004496
386SX (16 MHz) MSDOS Microsoft Quick C (12.01) 222.41 0.004496
Sequent-S81 DYNIX Silicon Valley Pascal (13.0) 84.89 0.011780
VAX 11/785 Unix Berkeley Pascal (11.9) 77.77 0.012857
80486-33 MSDOS Turbo Pascal 6.0 (11.46) 74.89 0.013353
Sun 3/60 SunOS Sun C (3.93) 72.67 0.013761
NeXT Cube (68030) Mach Gnu C (2.608) 48.256 0.02072
Sequent S-81 DYNIX Sequent Symmetry C (2.604) 48.182 0.02075
VAXstation 3500 Unix Berkeley Pascal (7.3) 47.777 0.02093
Sequent S-81 DYNIX Berkeley Pascal (5.6) 36.600 0.02732
Unisys 7000/40 Unix Berkeley Pascal (5.24) 34.244 0.02920
VAX 8600 VMS DEC VAX Pascal (3.96) 25.889 0.03863
Sun SPARC IPX SunOS Gnu C version 2.1 (1.28) 23.689 0.04221
VAX 6000-530 VMS DEC C (0.858) 15.867 0.06303
VAXstation 4000 VMS DEC C (0.809) 14.978 0.06677
IBM RS/6000 540 AIX XLP Pascal (2.276) 14.866 0.06726
NeXTstation(040/25) Mach Gnu C (0.75) 13.867 0.07212
Sun SPARC IPX SunOS Sun C (0.68) 12.580 0.07951
486DX (33 MHz) Linux Gnu C # (0.63) 11.666 0.08571
Sun SPARCstation-1 Unix Sun Pascal (1.7) 11.111 0.09000
DECstation 5000/200 Unix DEC Ultrix C (0.45) 8.333 0.12000
Sun SPARC 1+ SunOS Sun C (0.40) 7.400 0.13513
DECstation 3100 Unix DEC Ultrix Pascal (0.77) 5.022 0.1991
IBM 3090-300E AIX Metaware High C (0.27) 5.000 0.2000
DECstation 5000/125 Unix DEC Ultrix C (0.267) 4.933 0.2027
DECstation 5000/200 Unix DEC Ultrix C (0.256) 4.733 0.2113
Sun SPARC 4/50 SunOS Sun C (0.249) 4.607 0.2171
DEC 3000/400 AXP Unix DEC C (0.224) 4.144 0.2413
DECstation 5000/240 Unix DEC Ultrix C (0.1889) 3.496 0.2861
SGI Iris R4000 Unix SGI C (0.184) 3.404 0.2937
IBM 3090-300E VM Pascal VS (0.464) 3.022 0.3309
DECstation 5000/200 Unix DEC Ultrix Pascal (0.39) 2.533 0.3947
Pentium 120 Linux Gnu C 1.848 1.994 0.5016
Pentium Pro 180 Linux Gnu C 1.009 1.088 0.9353
Pentium 266 MMX Linux Gnu C (PHYLIP 3.5) (0.054) 1.0 1.0
Pentium 266 MMX Linux Gnu C 0.927 1.0 1.0
Pentium 200 Linux Gnu C 0.853 0.9202 1.2647
SGI PowerChallenge Irix Gnu C 0.844 0.9297 1.0756
DEC Alpha 400 4/233 DUNIX Digital C (cc -fast) 0.730 0.7875 1.2699
Pentium II 500 Linux Gnu C 0.368 0.4053 2.467
Compaq/Digital Alpha 500au DUNIX Digital C (cc -fast) 0.167 0.1805 5.541

This benchmark not only reflects integer performance of these machines (as DNAPARS has few floating-point operations) but also the efficiency of the compilers. Some of the machines (the DEC 3000/400 AXP and the IBM RS/6000, in particular) are much faster than this benchmark would indicate. The numerical programs benchmark below gives them a fairer test. The Compaq/Digital Alpha 500au times are exaggerated because, although their compiles are optimized for that processor, the Pentium compiles are not similarly optimized.

Note that parallel machines like the Sequent and the SGI PowerChallenge are not really as slow as indicated by the data here, as these runs did nothing to take advantage of their parallelism.

These benchmarks have now extended over 13 years, and in the DNAPARS benchmark they extend over a range of 8000-fold in speed! The experience of our laboratory, which seems typical, is that computer power grows by a factor of about 1.85 per year. This is roughly consistent with these benchmarks.

For a picture of speeds for a more numerically intensive program, here are benchmarks using DNAML, with the Pentium MMX 266 as the standard. Some of the timings, the ones in parentheses, are using PHYLIP version 3.5, and those are compared to that version run on the Pentium 266. Runs using the PHYLIP 3.4 Pascal version are adjusted using the 386SX timings where both were run. Numbers are total run times (total user time in the case of Unix) over all three data sets.

Machine Operating
System
Compiler Seconds Time Relative
Speed
386SX 16 Mhz PCDOS Turbo Pascal 6 (7826) 181.18 0.005519
386SX 16 Mhz PCDOS Quick C (6549.79) 181.18 0.005519
Compudyne 486DX/33 Linux Gnu C (1599.9) 44.26 0.022595
SUN Sparcstation 1+ SunOS Sun C (1402.8) 38.805 0.025770
Everex STEP 386/20 PCDOS Turbo Pascal 5.5 (1440.8) 33.356 0.029980
486DX/33 PCDOS Turbo C++ (1107.2) 30.628 0.032650
Compudyne 486DX/33 PCDOS Waterloo C/386 (1045.78) 28.929 0.034567
Sun SPARCstation IPX SunOS Gnu C (960.2) 26.562 0.037648
NeXTstation(68040/25) Mach Gnu C (916.6) 25.355 0.039439
486DX/33 PCDOS Waterloo C/386 (861.0) 23.817 0.041986
Sun SPARCstation IPX SunOS Sun C (787.7) 21.790 0.045893
486DX/33 PCDOS Gnu C (650.9) 18.006 0.05554
VAX 6000-530 VMS DEC C (637.0) 17.621 0.05675
DECstation 5000/200 Unix DEC Ultrix RISC C (423.3) 11.710 0.08540
IBM 3090-300E AIX Metaware High C (201.8) 5.582 0.17914
Convex C240/1024 Unix C (101.6) 2.8105 0.35581
DEC 3000/400 AXP Unix DEC C (98.29) 2.7189 0.36779
Pentium 120 Linux Gnu C 25.26 3.3906 0.29493
Pentium Pro 180 Linux Gnu C 18.88 2.5342 0.3946
Pentium 200 Linux Gnu C 16.51 2.2161 0.4512
SGI PowerChallenge IRIX Gnu C 12.446 1.6706 0.5985
Pentium MMX 266 Linux Gnu C (PHYLIP 3.5) (36.15) 1.0 1.0
DEC Alpha 400 4/233 Linux Gnu C (cc -fast) 8.0418 1.0792 0.9266
Pentium MMX 266 Linux Gnu C 7.45 1.0 1.0
Pentium II 500 Linux Gnu C 6.02 0.8081 1.2375
Compaq/Digital Alpha 500au Linux Gnu C (cc -fast) 0.9383 0.1259 7.940

As before, the parallel machines such as the Convex and the SGI PowerChallenge were only run using one processor, which does not take into account the gain that could be obtained by parallelizing the programs. The speed of the Compaq/Digital Alpha 500au is exaggerated because it was compiled in a way optimized for its processor, while the Pentium compiles were not.

You are invited to send me figures for your machine for inclusion in future tables. Use the data sets above and compute the total times for DNAPARS and for DNAML for the three data sets (setting the frequencies of the four bases to 0.25 each for the DNAML runs). Be sure to tell me the name and version of your compiler, and the version of PHYLIP you tested. If the times are too small to be measured accurately, obtain the times for ten data sets (the Multiple data sets option) and divide by 10.


General Comments on Adapting
the Package to Different Computer Systems

In the sections following you will find instructions on how to adapt the programs to different computers and compilers. The programs should compile without alteration on most versions of C. They use the "malloc" library or "calloc" function to allocate memory so that the upper limits on how many species or how many sites or characters they can run is set by the system memory available to that memory-allocation function.

In the document file for each program, I have supplied a small input example, and the output it produces, to help you check whether the programs are running properly.


Compiling the programs

If you have not been able to get executables for PHYLIP, you should be able to make your own. This is easy under Unix and Linux, but more difficult if you have a Macintosh or a Windows system. If you have the latter, we stringly recommend you download and use the PowerMac and Windows executables that we distribute. If you do that, you will not need to have any compiler or to do any compiling. I get a certain number of inquiries each year from confused users who are not sure what a compiler is but think they need one. After downloading the executables they contact me and complain that they did not find a compiler included in the package, and would I please e-mail them the compiler. What they really need to do is use the executables and forget about compiling them.

Some users may also need to compile the programs in order to modify them. The instructions below will help with this.

I will discuss how to compile PHYLIP using one of a number of widely-used compilers. After these I will comment on compiling PHYLIP on other, less widely-used systems.

Unix and Linux

In Unix and Linux (which is Unix in all important functional respects, if not in all legal respects) it is easy to compile PHYLIP yourself, which is why we have generally not bothered to distribute executables for Unix. Unix (and Linux) systems generally have a C compiler and have the make utility. We distribute with the PHYLIP source code a Unix-compatible Makefile.

After you have finished unpacking the Documentation and Source Code archive, you will find that you have created a directory phylip in which there are three subdirectories, called exe, src, and doc. There is also an HTML web page, phylip.html. The exe directory will be empty, src contains the source code files, including the Makefile. Directory doc contains the documentation files.

Enter the src directory. Before you compile, you will want to look at the makefile and see whether you want to alter the compilation command. There are careful instructions in the Makefile telling you how to do this. To compile all the programs just type:

make install

You will then see the compiling commands as they happen, with occasional warning messages. If these are warnings, rather than errors, they are not too serious. A typical warning would be like this:

dnaml.c:1204: warning: static declaration for re_move follows non-static

After a time the compiler will finish compiling. If you have done a make install the system will then move the executables into the exe subdirectory and also save space by erasing all the relocatable object files that were produced in the process. You should be left with useable executables in the exe directory, and the src directory should be as before. To run the executables, go into the exe directory and type the program name (say dnaml). The names of the executables will be the same as the names of the C programs, but without the .c suffix. Thus dnaml.c compiles to make an executable called dnaml.

A typical Unix or Linux installation would put the directory phylip in /usr/local. The name of the executables directory EXEDIR could be changed to be /usr/local/bin, so that the make install command puts the executables there. If the users have /usr/local/bin in their paths, the programs would be found when their names are typed. The font files font1 through font6 could also be placed there. A batch script containing the lines

      ln -s /usr/local/bin/font1 font1
      ln -s /usr/local/bin/font2 font2
      ln -s /usr/local/bin/font3 font3
      ln -s /usr/local/bin/font4 font4
      ln -s /usr/local/bin/font5 font5
      ln -s /usr/local/bin/font6 font6

could be used to establish links in the user's working directory so that Drawtree and Drawgram would find these font files when users type a name such as font1 when the program asks them for a font file name. The documentation web pages are in subdirectory doc of the main PHYLIP directory, except for one, phylip.html which is in the main PHYLIP directory. It has a table of all of the documentation pages, including this one. If users create a bookmark to that page it can be used to access all of the other documentation pages.

To compile just one program, such as DNAML, type:

make dnaml

After this compilation, dnaml will be in the src subdirectory. So will some rrelocatable object code files that were used to create the executable. These have names ending in .o - they can safely be deleted.

If you have problems with the compilation command, you can edit the Makefile. It has careful explanations at its front of how you might want to do so. For example, you might want to change the C compiler name cc to the name of the Gnu C compiler, gcc. This can be done by removing the comment character # from the front of one line, and placing it at the front of a nearby line. How to do so should be clear from the material at the beginning of the Makefile. We have included sample lines for using the gcc compiler and for using the Cygwin Gnu C++ environment on Windows, as well as the default of cc.

Some older C compilers (notably the Berkeley C compiler which is included free with some Sun systems) do not adhere to the ANSI C standard (because they were written before it was set down). They have trouble with the function prototypes which are in our programs. We have included an #ifndef preprocessor command to eliminate the problem, if you use the switch -DOLDC when compiling. Thus with these compilers you need only use this in your C flags (in the Makefile) and compilers such as Berkeley C will cause no trouble.

Macintosh PowerMacs

Compiling with Metrowerks Codewarrior on Macintosh PowerMacs...

We shall assume that you have a recent version of the Metrowerks Codewarrior C++ compiler. This description, and the project files that we provide, assume Codewarrior 5.3. We also assume some familiarity with the use of the Codewarrior compiler and its Integrated Development Environment (IDE).

Start with our src directory (folder) that contains the C source code files such as dnaml.c and also the Codewarrior resource files such as dnaml.rsrc, which are provided by us.

Creating the project file. We will use DnaML as our example. We have provided a full set of project files in the self-extracting Macintosh archive. If you have them then you do not need to do the items on the following list:

  1. Start up the Codewarrior IDE integrated development environment.
  2. Create a new project file by choosing New... on the File menu.
  3. Type in the project name dnaml.proj
  4. On the Project menu on the left side of the New window, double-click on MacOS C/C++ Stationery
  5. In the New project window that opens, click on the triangle to the left of Standard Console.
  6. Move the slider at the right of the window down until you reach SIOUX-WASTE
  7. Click on the triangle to the left of SIOUX-WASTE. This opens another list of choices below.
  8. Click on the menu item SIOUX-WASTE C PPC. Press the OK button. After a bit a window dnaml.proj will open.
  9. Click on the triangle to the left of the Sources menu item. A template item called HelloWorld.c will open.
  10. Select HelloWorld.c.
  11. Open the Edit menu at the top of the Mac screen and select Clear. A box will open asking if you want to remove HelloWorld.c from the project.
  12. Select OK.
  13. If the dnaml.c file came from the self-extracting Macintosh archive that we distribute, it should show a yellow-and-back-striped Metrowerks icon (if not, as when you get it from some other form of our distribution, you may have to pass it through a program like Microsoft Word, making sure to save it as a Text Only file, to get Metrowerks to be able to see it as a potential source code file).
  14. Drag the dnaml.c file onto the Sources item in your dnaml.proj window.
  15. Drop it onto Sources so that it appears under the Sources choice. This may take a few tries -- if it appears above Sources grab it and move it again.
  16. Now add the other files that must be compiled with dnaml.c. These can be identified by looking at our Makefile -- for DnaML they are seq.c, phylip.c, seq.h, and phylip.h. Each of them needs to be added to the project file in the same way that dnaml.c was.
  17. Drag dnaml.rsrc into Sources in the same way. It doesn't matter whether it appears before or after dnaml.c.
  18. Go to the Edit menu and select the PPC Std C SIOUX-WASTE Settings item. A window of that name will then open.
  19. Under the Target item you will see a PPC Target item. Select it. A PPC Target window will open to the right.
  20. Change the name in the File Name box to be PHYLIP
  21. Change the ???? in the Creator box to (say) PHYD
  22. Change the Preferred Heap Size to 1024.
  23. Under Language Settings in the left-hand menu of the window, select C/C++ Language. A window called C/C++ Language will open to the immediate right.
  24. Click on Require Function Prototypes to deselect that setting.
  25. Click on the Save button at the lower-right of the project settings window.
  26. Close the PPC Std C SIOUX-WASTE Settings window using the usual box in the upper-left corner.
  27. On your Desktop you should now find a folder PHYLIP. If it has a file called HelloWorld.c you may want to discard that file.
  28. In that PHYLIP folder you will find a file dnaml.proj.
  29. Double-click on that project file. If the Metrowerks is not already open, it should open now.
  30. If a window called Project Messages opens and there is a complaint in it about access paths being wrong, you should fix these by selecting the Reset project entry paths item in the Project menu.
  31. Select the Make item in the Project menu.
  32. In the Project menu, select Make
Compiling a program once its resource file is available.. If the resource files are all available (as they should be), you did not need to do any of the above. Usually users will have no need to compile the programs, but occasionally they may want to change a setting or add a feature. In that case the Metrowerks Codewarrior compiler can be used. We have provided support for compiling the programs in its most recent version, version 5.3. The following discussion will assume that you have obtained and installed the compiler.

You should find in the source code directory src a subdirectory called mac which contains the Metrowerks Codewarrior compiler "project files" (with names ending in .proj, as well as the resource files (which end in .rsrc for each program. You can get into this subdirectory, activate the Metrowerks compiler, and open the appropriate project file. To compile the program, simply make sure that the project file is an active window, and type Command-M (which is to say, hold down the Command key while typing M). Alternatively, pull down the Project window and select Make. The program should then compile, possibly with ignorable warning messages.

Windows systems

Compiling with Microsoft Visual C++

Microsoft Visual C++ is used to compile the executables we distribute Windows. It can compile using a Makefile. We have supplied this in the source code distrubution as Makefile.msvc. You will need to preserve the Unix Makefile by renaming it to, say, Makefile.unix, then make a copy of Makefile.msvc and call it Makefile.

Setting the path. Before using nmake you will need to have the paths set properly. For this, use the Start menu to open Command or a Dos Prompt first. To set the path type

set MSVC=Path
where Path is where Microsoft Visual Studio is installed (e.g. it might be in c:\Microsoft Visual Studio). However the path you type should not have any spaces in it. This means that you may have to use the directory's DOS filename. In general to get a DOS name you take the first six letters of the directory name and follow them by ~1. For example, Microsoft Visual Studio will have a DOS name Micros~1, Program Files will be Progra~1). Depending on what other file are in the directory the DOS name may be the first six letters followed by ~2,~3,~4, etc... (e.g. Micros~3 or Progra~5). It may take some experimentation to figure it out. With older Versions of Windows (pre-win2000) it may be possible to just right click on the directory icon and select Properties to get the DOS name.

Once you have set MSVC, type

PATH=%PATH%;%MSVC%\VC98\bin
Then the Makefile will need to be edited. The line
MSVCPATH=c:\Micros~1\VC98
will need to be changed so that It points to whereever Microsoft Visual Studio is installed followed by \VC98.

Using the Makefile. The Makefile is invoked using the nmake command. If you simply type nmake you will get a list of possible make commands. For example, to compile a single program such as Dnaml but not install it, type make dnaml. To compile and install all programs type make install. We have supplied all the support files and icons needed for the compilations. They are in subdirectory msvc of the main source code directory.

Compiling with Borland C++

Borland C++ can be downloaded for free from Inprise (Borland) (see their site http://www.borland.com It can compile using a Makefile. We have supplied this in the source code distrubution as Makefile.bcc. You will need to preserve the Unix Makefile by renaming it to, say, Makefile.unix, then make a copy of Makefile.bcc and call it Makefile. The Makefile is invoked using the make command. If you simply type make you will get a list of possible make commands. For example, to compile a single program such as Dnaml but not install it, type make dnaml. To compile and install all programs type make install. We have supplied all the the support files and icons needed for the compilations. They are in subdirectory bcc of the main source code directory. We have had to supply a complete second set of the resource files with names *.brc because Borland resource files have a minor incompatibility with Microsoft Visual C++ resource files.

If this does not work the PATH may need to be set manually. This can be done by opening a Command or DOS window using the Start menu. To set the path, type

set BORLAND=Path
Where Path is where Borland is installed, such as C:\Progra~1\Borland. Then type
PATH=%PATH%;%BORLAND%\CBUILD~1\Bin

Compiling with Metrowerks Codewarrior for Windows

As with Macintosh systems, Metrowerks Codewarrior requires you to have project files for each program you compile. For Metrowerks Codewarrior for Windows we are not providing the projects themselves, but we are providing projects which have been exported as XML files. To open one of these one cannot just click on File/Open but instead on the menu option File/Import Project. Metrowerks will then ask you for the project name. Type in the name of the program (e.g. dnaml). Once this is done Metrowerks will act like this is a regular project file.

We have supplied a complete set of these XML project files in the source code distribution. They are in subdirectory metro of the main source code directory. This is supplied with the source code distribution for Windows (it is not in the source code distributions for other platforms). For Metrowerks Codewarrior for Windows we are not providing the projects themselves, but we are providing projects which have been exported as XML files. To open one of these one cannot just click on File/Open but instead on the menu option File/Import Project. Metrowerks will then ask you for the project name. Type in the name of the program (e.g. dnaml). Once this is done Metrowerks will act like this is a regular project file.

To compile the program pull down the Project menu and select Make. The program should then compile, possibly with ignorable warning messages.

For the moment we are not giving here the details of how to create these projects yourself -- you usually will not need to, as you have the project files we have supplied.

Compiling with Cygnus Gnu C++

Cygnus Solutions (now a part of Red Hat, Inc.) has adapted the Gnu C compiler to Windows systems and provided an environment, CygWin, which mimics Unix for compiling. This is available for purchase from them, and they also make it available to be downloaded for free. The download is large. To get it, go to their download site at http://sources.redhat.com/cygwin/download.html and follow the instructions there. It is a bit difficult to figure out how to download it -- you need to download their setup.exe program and then it will download the rest when it is run. You will need a lot of disk space for it.

Once you have installed the free Cygnus environment and the associated Gnu C compiler on your Windows system, compiling PHYLIP is essentially identical to what one does for Unix or Linux. In PHYLIP's src directory, change the name of our Unix Makefile to something like Makefile.unx (so as to keep it around). There is a special Makefile for the Cygwin compiler called Makefile.cyg. Make a copy of it called Makefile.

This Makefile should contain a compiling command:

CC = gcc

Now enter the Cygwin environment (which you can do using the Windows Start menu and its Programs menu item. There should be a Cygnus menu choice within that submenu, which you can use to start the Cygnus environment. This puts you in an imitation of a Unix shell.

On entering the CygWin environment you will find yourself in one of the subdirectories of the CygWin directory. Change to the directory where the PHYLIP programs have been put (for example by issuing the command

cd c:/phylip

You should then be able to compile PHYLIP by issuing the appropriate make command, such as make install. If you have modified one of our source code files such as dnaml.c, it would be wise to have saved the original version of it first as, say, dnaml.c0. To associate an icon with a program (say DnaML), you need an icon file (say dna.ico which contains the icon in standard format. There should also be a file called dnaml.rc which contains the single line:

dnaml ICON "dna.ico"

We have provided a subdirectory icons in the src subdirectory, containing a full set of icons and a full set of resource files (*.rc). Our Cygwin Makefile will automatically invoke them.

VMS VAX systems

We have not tried to compile version 3.6 on an OpenVMS system but the following instructions should work. On the OpenVMS operating system with DEC VAX VMS C the programs will compile without alteration. The commands for compiling a typical program (DNAPARS, which depends on the separately compiled files phylip.c and seq.c) are:

$ DEFINE LNK$LIBRARY SYS$LIBRARY:VAXCRTL
$ CC DNAPARS.C
$ CC PHYLIP.C
$ CC SEQ.C
$ LINK DNAPARS,PHYLIP,SEQ

Once you use this $ DEFINE statement during a given interactive session, you need not repeat it again as the symbol LNK$LIBRARY is thereafter properly defined. The compilation process leaves a file DNAPARS.OBJ in your directory: this can be discarded. The executable program is named DNAPARS.EXE. To run the program one then uses the command:

$ R DNAPARS

The compiler defaults to the filenames INFILE., OUTFILE., and TREEFILE.. If the input file INFILE. does not exist the program will prompt you to type in its name. Note that some commands on VMS such as TYPE OUTFILE will fail because the name of the file that it will attempt to type out will be not OUTFILE. but OUTFILE.LIS. To get it to type the write file you would have to instead issue the command TYPE OUTFILE..

When you are using the interactive previewing feature of DRAWGRAM (or DRAWTREE) on a Tektronix or DEC ReGIS compatible terminal, you will want before running the program to have issued the command:

$ SET TERM/NOWRAP/ESCAPE

so that you do not run into trouble from the VMS line length limit of 255 characters or the filtering of escape characters.

To know which files to compile together, look at the entries in the Makefile.

VMS systems are rapidly disappearing, so we will not devote much effort to get PHYLIP working on them.

Parallel computers

As parallel computers become more common, the issue of how to compile PHYLIP for them has become more pressing. People have been compiling PHYLIP for vector machines and parallel machines for many years. We have not made a version for parallel machines because there is still no standard parallel programming environment on such machines (or rather, there are many standards, so that one cannot find one that makes a parallel execution version of PHYLIP practical). However the MPI Message Passing Interface is spreading rapidly, and we will probably support it in future versions of PHYLIP.

Although the underlying algorithms of most programs, which treat sites independently, should be amenable to vector and parallel processors, there are details of the code which might best be changed. In certain of the programs (Dnaml, Dnamlk, Proml, Promlk) I have put a special comment statement next to the loops in the program where the program will spend most of its time, and which are the places most likely to benefit from parallelization. This comment statement is:

           /* parallelize here */
In particular within these innermost loops of the programs there are often scalar quantities that are used for temporary bookkeeping. These quantities, such as sum1, sum2, zz, z1, yy, y1, aa, bb, cc, sum, and denom in procedure makenewv of DNAML (and similar quantities in procedure nuview) are there to minimize the number of array references. For vectorizing and parallelizing compilers it will be better to replace them by arrays so that processing can occur simultaneously.

If you succeed in making a parallel version of PHYLIP we would like to know how you did it. In particular, if you can prepare a web page which describes how to do it for your computer system, we would like to have it for inclusion in our PHYLIP web pages. Please e-mail it to me. We hope to have a set of pages that give detailed instructions on how to make parallel version of PHYLIP on various kinds of machines. Alternatively, if we are given your modified version of the program we may be able to figure out how to make modifications to our source code to allow users to compile the program in a way which makes those modifications.

Other computer systems

As you can see from the variety of different systems on which these programs have been successfully run, there are no serious incompatibility problems with most computer systems. PHYLIP in various past Pascal versions has also been compiled on 8080 and Z80 CP/M Systems, Apple II systems running UCSD Pascal, a variety of minicomputer systems such as DEC PDP-11's and HP 1000's, on 1970's era mainframes such as CDC Cyber systems, and so on. In a later era it was also compiled on IBM 370 mainframes, and of course on DOS and Windows systems and on Macintosh and PowerMacintosh systems. We have gradually accumulated experience on a wider variety of C compilers. If you succeed in compiling the C version of PHYLIP on a different machine or a different compiler, I would like to hear the details so that I can consider including the instructions in a future version of this manual.


Frequently Asked Questions

This set of Frequently Asked Questions, and their answers, is from the PHYLIP web site. A more up-to-date version can be found there, at:

"It doesn't work! It doesn't work!! It says can't find infile.
Actually, it's working just fine. Many of the programs look for an input file called infile, and if one of that name is not present in the current directory, they then ask you to type in the name of the input file. That's all that it's doing. This is done so that you can get the program to read the file without you having to type in its name, by making a copy of your input file and calling it infile. If you don't do that, then the program issues this message. It looks alarming, but really all that it is trying to do is to get you to type in the name of the input file. Try giving it the name of the input file.
"The program reads my data file and then says it's has a memory allocation error!"
This is what tends to happen if there is a problem with the format of the data file, so that the programs get confused and think they need to set aside memory for 1,000,000 species or so. The result is a "memory allocation error". Check the data file format against the documentation: make sure that the data files have not been saved in the format of your word processor (such as Microsoft Word) but in a "flat ASCII" or "text only" mode. Note that adding memory to your computer is not the way to solve this problem -- you probably have plenty of memory to run the program once the data file is in the correct format.
"On our Macintosh, larger data files fail to run."
We have set the memory allowances on the Macintosh executables to be generous, but not too big. You therefore may need to increase them. Use the Get Info item on the Finder File menu.
"I opened the program but I don't see where to create a data file!"
The programs (there are more than one) use data files that have been created outside of the program. They do not have any data editor within them. You can create a data file by using an editor, such as Microsoft Word, EMACS, vi, SimpleText, Notepad, etc. But be sure not to save the file in Microsoft Word's own format. It should be saved in Text Only format. You can use the documentation files, including the examples at the end of those files, to figure out the format of the input file. Documentation files such as main.html, sequence.html, distance.html and many others should be consulted. Many users create their data files by having their alignment program (such as ClustalW), output its alignments in PHYLIP format. Many alignment programs have options to do that. menu while the program is selected.
"I ran PHYLIP, and all it did was say it was extracting a bunch of files!"
There is no executable program named PHYLIP in the PHYLIP package! But in some cases (especially the Windows distribution) there is a file called phylip.exe. That file is an archive of documentation and source code. Once you have run it and extracted the files in it, so that they are in the directory, running it again will just do the extraction again, which is unnecessary. Similarly for the archive files for the Windows executables, which have names like phylipwx.exe and phylipwy.exe. They are run only once to extract their contents.
"One program makes an output file and then the next program crashes while reading it!"
Did you rename the file? If a program makes a file called outfile, and then the next program is told to use outfile as its input file, terrible things will happen. The second program first opens outfile as an output file, thus erasing it. When it then tries to read from this empty outfile a psychological crisis ensues. The solution is simply to rename outfile before trying to use it as an input file.
"I make a file called infile and then the program can't find it!"
Let me guess. You are using Windows, right? You made your file in Word or in Notepad or WordPad, right? If you made a file in one of these editors, and saved it, not in Word format, but in Text Only format, then you were doing the right thing. But when you told the operating system to save the file as infile, it actually didn't. It saved it as infile.txt. Then just to make life harder for you, the operating system is set up by default to not show that three-letter extension to the file name. Next to its icon it will show the name infile. So you think, quite reasonably, that there is a file called infile. But there isn't a file of that name, so the program, quite reasonably, can't find a file called infile. If you want to check what the actual file name is, use the Properties menu item of the File item on your folder (in Windows versions, anyway). You should be able to get the program to work by telling it that the file name is INFILE.TXT.
"Consense gives wierd branch lengths! How do I get more reasonable ones?"
Consense gives branch lengths which are simply the numbers of replicates that support the branch. This is not a good reflection of how long those branches are estimated to be. The best way to put better branch lengths on a consensus tree is to use it as a User Tree in a program that will estimate branch lengths for it. You may need to convert it to being an unrooted tree, using Retree, first. If the original program you were using was a parsimony program, which does not estimate branch lengths, you may instead have to make some distances between your species (using, for example, DnaDist), and use Fitch to put branch lengths on the user tree. Here is the sequence of steps you should go through:
  1. Take the tree and use Retree to make sure it is Unrooted (just read it into Retree and then save it, specifying Unrooted)
  2. Use the unrooted tree as a User Tree (option U) in one of our programs (such as Fitch or DnaML). If you use Fitch, you also need to use one of the distance programs such as DnaDist to compute a set of distances to serve as its input.
  3. Specify that the branch lengths of the tree are not to be used but should be re-estimated. This is actually the default.
"DrawTree (or DrawGram) doesn't work: it can't find the font file!"
Six font files, called font1 through font6, are distributed with the executables (and with the source code too). The program looks for a copy of one of them called fontfile. If you haven't made such a copy called fontfile it then asks you for the name of the font file. If they are in the current directory, just type one of font1 through font6. The reason for having the program look for fontfile is so that you can copy your favorite font file, call the copy fontfile, and then it will be found automatically without you having to type the name of the font file each time.
"Can DrawGram draw a scale beside the tree? Print the branch lengths as numbers?"
It can't do either of these. Doing so would make the program more complex, and it is not obvious how to fit the branch length numbers into a tree that has many very short internal branches. If you want these scales or numbers, choose an output plot file format (such as Postscript, PICT or PCX) that can be read by a drawing program such as Adobe Illustrator, Freehand, Canvas, CorelDraw, or MacDraw. Then you can add the scales and branch length numbers yourself by hand. Note the menu option in DrawTree and DrawGram that specifies the tree size to be a given number of centimeters per unit branch length.
"How can I get DrawGram or DrawTree to print the bootstrap values next to the branches?"
When you do bootstrapping and use Consense, it prints the bootstrap values in its output file (both in a table of sets, and on the diagram of the tree which it makes). These are also in the output tree file of Consense. There they are in place of branch lengths. So to get them to be on the output of DrawGram or DrawTree, you must write the tree in the format of a drawing program and use it to put the values in by hand, as mentioned in the answer to the previous question.
"I have an HP Laserjet and can't get DrawGram to print on it"
DRAWGRAM and DRAWTREE produce a plot file (called plotfile): they do not send it to the printer. It is up to you to get the plot file to the printer. If you are running Windows or DOS this can probably be done with the MSDOS command COPY/B PLOTFILE PRN:, unless your printer is a networked printer. The /B is important. If it is omitted the copy command will strip off the highest bit of each byte, which can cause the printing to fail or produce garbage.
"DNAML won't read the treefile that is produced by DNAPARS!"
That's because the DnaPars tree file is a rooted tree, and DnaML wants an unrooted tree. Try using Retree to change the file to be an unrooted tree file.
"In bootstrapping, SEQBOOT makes too large a file"
If there are 1000 bootstrap replicates, it will make a file 1000 times as long as your original data set. But for many methods there is another way that uses much less file space. You can use SEQBOOT to make a file of multiple sets of weights, and use those together with the original data set to do bootstrapping.
"In bootstrapping, the output file gets too big."
When running a program such as NEIGHBOR or DNAPARS with multiple data sets (or multiple weights) for purposes of bootstrapping, the output file is usually not needed, as it is the output tree file that is used next. You can use the menu of the program to turn off the writing of trees into the output file. The trees will still be written into the tree file.
"Why doesn't NEIGHBOR read my DNA sequences correctly?"
Because it wants to have as input a distance matrix, not sequences. You have to use DNADIST to make the distance matrix first.

How to make it do various things

"How do I bootstrap?"
The general method of bootstrapping involves running SEQBOOT to make multiple bootstrapped data sets out of your one data set, then running one of the tree-making programs with the Multiple data sets option to analyze them all, then running CONSENSE to make a majority rule consensus tree from the resulting tree file. Read the documentation of SEQBOOT to get further information. Before, only parsimony methods could be bootstrapped. With this new system almost any of the tree-making methods in the package can be bootstrapped. It is somewhat more tedious but you will find it much more rewarding.
"How do I specify a multi-species outgroup with your parsimony programs?"
It's not a feature but is not too hard to do in many of the programs. In parsimony programs like MIX, for which the W (Weights) and A (Ancestral states) options are available, and weights can be larger than 1, all you need to do is:
(a)
In MIX, make up an extra character with states 0 for all the outgroups and 1 for all the ingroups. If using DNAPARS the ingroup can have (say) G and the outgroup A.
(b)
Assign this character an enormous weight (such as Z for 35) using the W option, all other characters getting weight 1, or whatever weight they had before.
(c)
If it is available, Use the A (Ancestral states) option to designate that for that new character the state found in the outgroup is the ancestral state.
(d)
In MIX do not use the O (Outgroup) option.
(e)
After the tree is found, the designated ingroup should have been held together by the fake character. The tree will be rooted somewhere in the outgroup (the program may or may not have a preference for one place in the outgroup over another). Make sure that you subtract from the total number of steps on the tree all steps in the new character.

In programs like DNAPARS, you cannot use this method as weights of sites cannot be greater than 1. But you do an analogous trick, by adding a largish number of extra sites to the data, with one nucleotide state ("A") for the ingroup and another ("G") for the outgroup. You will then have to use RETREE to manually reroot the tree in the desired place.

"How do I force certain groups to remain monophyletic in your parsimony programs?"
By the same method as in the previous question, using multiple fake characters, any number of groups of species can be forced to be monophyletic. In MOVE, DOLMOVE, and DNAMOVE you can specify whatever outgroups you want without going to this trouble.
"How can I reroot one of the trees written out by PHYLIP?"
Use the program RETREE. But keep in mind whether the tree inferred by the original program was already rooted, or whether you are free to reroot it.
"What do I do about deletions and insertions in my sequences?"
The molecular sequence programs will accept sequences that have gaps (the "-" character). They do various things with them, mostly not optimal. DNAPARS counts "gap" as if it were a fifth nucleotide state (in addition to A, C, G, and T). Each site counts one change when a gap arises or disappears. The disadvantage of this treatment is that a long gap will be overweighted, with one event per gapped site. So a gap of 10 nucleotides will count as being as much evidence as 10 single site nucleotide substitutions. If there are not overlapping gaps, one way to correct this is to recode the first site in the gap as "-" but make all the others be "?" so the gap only counts as one event. Other programs such as DNAML and DNADIST count gaps as equivalent to unknown nucleotides (or unknown amino acids) on the grounds that we don't know what would be there if something were there. This completely leaves out the information from the presence or absence of the gap itself, but does not bias the gapped sequence to be close to or far from other gapped or ungapped sequences. So it is not necessary to remove gapped regions from your sequences, unless the presence of gaps indicates that the region is badly aligned.
"How can I produce distances for my data set which has 0's and 1's?"
You can't do it in a simple and general way, for a straightforward reason. Distance methods must correct the distances for superimposed changes. Unless we know specifically how to do this for your particular characters, we cannot accomplish the correction. There are many formulas we could use, but we can't choose among them without much more information. There are issues of superimposed changes, as well as heterogeneity of rates of change in different characters. Thus we have not provided a distance program for 0/1 data. It is up to you to figure out what is an appropriate stochastic model for your data and to find the right distance formulas.
"I have RFLP fragment data: which programs should I use?"
This is more difficult question than you may imagine. Here is quick tour of the issues:
  • You can code fragments are 0 and 1 and use a parsimony program. It is not obvious in advance whether 0 or 1 is ancestral, though it is likely that change in one direction is more likely than change in the other for each fragment. One can use either Wagner parsimony (programs MIX, PENNY or MOVE) or use Dollo parsimony (DOLLOP, DOLPENNY or DOLMOVE) with the ancestral states all set as unknown ("?").
  • You can use a distance matrix method using the RFLP distance of Nei and Li (1979). Their restriction fragment distance is available in our program RestDist.
  • You should be very hesitant to bootstrap RFLP's. The individual fragments do not evolve independently: a single nucleotide substitution can eliminate one fragment and create two (or vice versa).
For restriction sites (rather than fragments) life is a bit easier: they evolve nearly independently so bootstrapping is possible and RESTML can be used. Also directionality of change is less ambiguous when parsimony is used.
"Why don't your parsimony programs print out branch lengths?"
Well, DNAPARS and PARS can. The others have not yet been upgraded to the same level. The longer answer is that it is because there are problems defining the branch lengths. If you look closely at the reconstructions of the states of the hypothetical ancestral nodes for almost any data set and almost any parsimony method you will find some ambiguous states on those nodes. There is then usually an ambiguity as to which branch the change is actually on. Other parsimony programs resolve this in one or another arbitrary fashion, sometimes with the user specifying how (for example, methods that push the changes up the tree as far as possible or down it as far as possible). Our older programs leave it to the user to do this. In DNAPARS and PARS we use an algorithm discovered by Hochbaum and Pathria (1997) (and independently by Wayne Maddison) to compute branch lengths that average over all possible placements of the changes. But these branch lengths, as nice as they are, do not correct for mulitple superimposed changes. Few programs available from others currently correct the branch lengths for multiple changes of state that may have overlain each other. One possible way to get branch lengths with nucleotide sequence data is to take the tree topology that you got, use RETREE to convert it to be unrooted, prepare a distance matrix from your data using DNADIST, and then use FITCH with that tree as User Tree and see what branch lengths it estimates.
"Why can't your programs handle unordered multistate characters?"
In this 3.6 release there is a program PARS which does parsimony for undordered multistate characters with up to 8 states, plus ?. The other the discrete characters parsimony programs can only handle two states, 0 and 1. This is mostly because I have not yet had time to modify them to do so - the modifications would have to be extensive. Ultimately I hope to get these done. If you have four or fewer states and need a feature that is not in PARS, you could recode your states to look like nucleotides and use the parsimony programs in the molecular sequence section of PHYLIP, or you could use one of the excellent parsimony programs produced by others.

Background information needed:

"What file format do I use for the sequences?"
"How do I use the programs? I can't find any documentation!"
These are discussed in the documentation files. Do you have them? If you have a copy of this page you probably do. They are in a separate archive from the executables (they are in the Documentation and Sources archives, which you should definitely fetch). Input file formats are discussed in main.html, in sequence.html, distance.html, contchar.html, discrete.html, and the documentation files for the individual programs.
"Where can I find out how to infer phylogenies?
There are few books yet. For molecular data you could use one of these:
  • Graur, D. and W.-H. Li. 2000. Fundamentals of Molecular Evolution. Sinauer Associates, Sunderland, Massachusetts. (or the earlier edition by Li and Graur).
  • Page, R. D. P. and E. C. Holmes. 1998. Molecular Evolution: A Phylogenetic Approach. Blackwell, Oxford.
  • Nei, M. and S. Kumar. 2000. Molecular Evolution and Phylogenetics. Oxford University Press, Oxford.
  • Li, W.-H. 1999. Molecular Evolution. Sinauer Associates, Sunderland, Massachusetts.
In addition, one of these three review articles may help:
  • Swofford, D. L., G. J. Olsen, P. J. Waddell, and D. M. Hillis. 1996. Phylogenetic inference. pp. 407-514 in Molecular Systematics, 2nd ed., ed. D. M. Hillis, C. Moritz, and B. K. Mable. Sinauer Associates, Sunderland, Massachusetts.
  • Felsenstein, J. 1988. Phylogenies from molecular sequences: inference and reliability. Annual Review of Genetics 22: 521-565.
  • Felsenstein, J. 1988. Phylogenies and quantitative characters. Annual Review of Ecology and Systematics 19: 445-471.
My own book on phylogenies is due to be published in late 2002. It will be called "Inferring Phylogenies". For information on whether it has been published you should check the Sinauer Associates web site.

Questions about distribution and citation:

"If I copied PHYLIP from a friend without you knowing, should I try to keep you from finding out?"
No. It is to your advantage and mine for you to let me know. If you did not get PHYLIP "officially" from me or from someone authorized by me, but copied a friend's version, you are not in my database of users. You may also have an old version which has since been substantially improved. I don't mind you "bootlegging" PHYLIP (it's free anyway), but you should realize that you may have copied an outdated version. If you are reading this Web page, you can get the latest version just as quickly over Internet. It will help both of us if you get onto my mailing list. If you are on it, then I will give your name to other nearby users when they ask for the names of nearby users, and they are urged to contact you and update your copy. (I benefit by getting a better feel for how many distributions there have been, and having a better mailing list to use to give other users local people to contact). Use the registration form which can be accessed through our web site's registration page.
"How do I make a citation to the PHYLIP package in the paper I am writing?"
One way is like this:

Felsenstein, J. 2002. PHYLIP (Phylogeny Inference Package) version 3.6a3. Distributed by the author. Department of Genome Sciences, University of Washington, Seattle.

or if the editor for whom you are writing insists that the citation must be to a printed publication, you could cite a notice for version 3.2 published in Cladistics:

Felsenstein, J. 1989. PHYLIP - Phylogeny Inference Package (Version 3.2). Cladistics 5: 164-166.

For a while a printed version of the PHYLIP documentation was available and one could cite that. This is no longer true. Other than that, this is difficult, because I have never written a paper announcing PHYLIP! My 1985b paper in Evolution on the bootstrap method contains a one-paragraph Appendix describing the availability of this package, and that can also be cited as a reference for the package, although it was distributed since 1980 while the bootstrap paper is 1985. A paper on PHYLIP is needed mostly to give people something to cite, as word-of-mouth, references in other people's papers, and electronic newsgroup postings have spread the word about PHYLIP's existence quite effectively.

"Can I make copies of PHYLIP available to the students in my class?"
Generally, yes. Read the Copyright notice near the front of this main documentation page. If you charge money for PHYLIP, or use it in a service for which you charge money, you will need to negotiate a royalty. But you can make it freely available and you do not need to get any special permission from us to do so.
"How many copies of PHYLIP have been distributed?"
On 27 September, 1996 we reached 5,000 registered installations worldwide. (By now we are well over 15,000 but have lost count for the moment). Of course there are many more people who have got copies from friends. PHYLIP is the most widely distributed phylogeny package. (This situation may reverse itself rapidly once PAUP* is fully released. During the years it was in full distribution, PAUP was ahead in phylogenies published, and the availability of distance and likelihood methods in PAUP* are making it very popular.) In recent years magnetic tape distribution and e-mail distribution of PHYLIP have disappeared, and there has been a big decrease of diskette distributions (down to only one or two per year). But all this has been more than offset by, first, an explosion of distributions by anonymous ftp over Internet, and then a bigger explosion of World Wide Web distributions and registrations (about 6 registrations per day at the moment).

Questions about documentation

"Where can I get a printed version of the PHYLIP documents?"
For the moment, you can only get a printed version by printing it yourself. For versions 3.1 to 3.3 a printed version was sold by Christopher Meacham and Tom Duncan, then at the University Herbarium of the University of California at Berkeley. But they have had to discontinue this as it was too much work. You should be able to print out the documentation files on almost any printer and make yourself a printed version of whichever of them you need.
"Why have I been dropped from your newsletter mailing list?"
You haven't. The newsletter was dropped. It simply was too hard to mail it out to such a large mailing list. The last issue of the newsletter was Number 9 in May, 1987. The Listserver News Bulletins that we tried for a while have also been dropped as too hard to keep up to date. I am hoping that our World Wide Web site will take their place.

Additional Frequently Asked Questions, or: "Why didn't it occur to you to ...

... allow the options to be set on the command line?
We could in Unix and Linux, or somewhat differently in Windows. But there are so many options that this would be difficult, especially when the options require additional information to be supplied such as rates of evolution for many categories of sites. You may be asking this question because you want to automate the operation of PHYLIP programs using batch files (command files) to run in background. If that is the issue, see the section of this main documentation page on "Running the programs in background or under control of a command file". It explains how to set the options using input redirection and a file that has the menu responses as keystrokes.
... write these programs in Pascal?"
These programs started out in Pascal in 1980. In 1993 we released both Pascal and C versions. The present version (3.6) and future versions will be C-only. I make fewer mistakes in Pascal and do like the language better than C, but C has overtaken Pascal and Pascal compilers are starting to be hard to find on some machines. Also C is a bit better standardized which makes the number of modifications a user has to make to adapt the programs to their system much less.
... write these programs in Java?"
Well, we might. It is not completely clear which of two contenders, C++ and Java, will become more widespread, and which one will gradually fade away. Whichever one is more successful, we will probably want to use for future versions of PHYLIP. As the C compilers that are used to compile PHYLIP are usually also able to compile C++, we will be moving in that direction, but with constant worrying about whether to convert PHYLIP to Java instead.
... forgot about all those inferior systems and just develop PHYLIP for Unix?"
This is self-answering, since the same people first said I should just develop it for Apple II's, then for CP/M Z-80's, then for IBM PCDOS, then for Macintoshes or for Sun workstations, and then for Windows. If I had listened to them and done any one of these, I would have had a very hard time adapting the package to any of the other ones once these folks changed their mind (and most of them did)!
... write these programs in PROLOG (or Ada, or Modula-2, or SIMULA, or BCPL, or PL/I, or APL, or LISP)?"
These are all languages I have considered. All have advantages, but they are not really widespread (as are C and C++).
... include in the package a program to do the Distance Wagner method, (or successive approximations character weighting, or transformation series analysis)?"
In most cases where I have not included other methods, it is because I decided that they had no substantial advantages over methods that were included (such as the programs FITCH, KITSCH, NEIGHBOR, the T option of MIX and DOLLOP, and the "?" ancestral states option of the discrete characters parsimony programs).
... include in the package ordination methods and more clustering algorithms?"
Because this is not a clustering package, it's a package for phylogeny estimation. Those are different tasks with different objectives and mostly different methods. Mary Kuhner and Jon Yamato have, however, included in NEIGHBOR an option for UPGMA clustering, which will be very similar to KITSCH in results.
... include in the package a program to do nucleotide sequence alignment?"
Well, yes, I should have, and this is scheduled to be in future releases. But multiple sequence alignment programs, in the era after Sankoff, Morel, and Cedergren's 1973 classic paper, need to use substantial computer horsepower to estimate the alignment and the tree together (but see Karl Nicholas's program GeneDoc or Ward Wheeler and David Gladstein's MALIGN, as well as more approximate methods of tree-based alignment used in ClustalW or TreeAlign).

(Fortunately) obsolete questions

(The following four questions, once common, have finally disappeared, I am pleased to report).

"Why didn't it occur to you to ...

... let me log in to your computer in Seattle and copy the files out over a phone line?"
No thanks. It would cost you for a lot of long-distance telephone time, plus a half hour of my time and yours in which I had to explain to you how to log in and do the copying.
... send me a listing of your program?"
Damn it, it's not "a program", it's 35 programs, in a great many files. What were you thinking of doing, having 1800-line programs typed in by slaves at your end? If you were going to go to all that trouble why not try network transfer? If you have these then you can print out all the listings you want to and add them to the huge stack of printed output in the corner of your office.
... write a magnetic tape in our computer center's favorite format (inverted Lithuanian EBCDIC at 998 bpi)?"
Because the ANSI standard format is the most widely used one, and even though your computer center may pretend it can't read a tape written this way, if you sniff around you will find a utility to read it. It's just a lot easier for me to let you do that work. If I tried to put the tape into your format, I would probably get it wrong anyway.
... give us a version of these in FORTRAN?"
Because the programs are far easier to write and debug in C or Pascal, and cannot easily be rewritten into FORTRAN (they make extensive use of recursive calls and of records and pointers). In any case, C is widely available. If you don't have a C compiler or don't know how to use it, you are going to have to learn a language like C or Pascal sooner or later, and the sooner the better.


New Features in This Version

Version 3.6 has many new features:

  • Faster (well, less, slow) likelihood programs.
  • The DNA and protein likelihood and distance programs allow for rate variation between sites using a gamma distribution of rates among sites, or using a gamma distribution plus a given fraction of sites which are assumed invariant.
  • A new multistate discrete characters parsimony program, PARS, that handles unordered multistate characters.
  • The DNAPARS and PARS parsimony programs can infer multifurcating trees, which sensibly reduces the number of tied trees they find.
  • A new protein sequence likelihood program, PROML, and also a version, PROMLK which assumes a molecular clock.
  • A new restriction sites and restriction fragments distance program, RESTDIST, that can also be used to compute distances for RAPD and AFLP data. It also allows for gamma-distributed rate variation among DNA sites.
  • In the DNA likelihood programs, you can now specify different categories of rates of change (such as rates for first, second, and third positions of a coding sequence) and assign them to specific sites. This is in addition to the ability of the program to use the Hidden Markov Model mechanism to allow rates of change to vary across sites in a way that does not ask you to assign which rate goes with which site.
  • The input files for many of the programs are now simpler, in that they do not contain options information such as specification of weights and categories. That information is now provided in separete files with default names such as weights and categories.
  • The DNA likelihood programs can now evaluate multifurcating user trees (option U).
  • All programs that read in user-defined trees now do so from a separate file, whose default name is intree, rather than requiring them to be in the input file as before.
  • The DNA likelihood programs can infer the sequence at ancestral nodes in the interior of the tree.
  • DNAPARS can now do transversion parsimony.
  • The bootstrapping program SEQBOOT now can, instead of producing a large file containing multiple data sets, be asked instead to produce a weights file with multiple sets of weights. Many programs in this release can analyze those multiple weights together with the original data set, which saves disk space.
  • The bootstrapping program SEQBOOT can pass weights and categories information through to a multiple weights file or a multiple categories file.
  • SEQBOOT can also convert sequence files from Interleaved to Sequential form, or back.
  • SEQBOOT can also write a sequence data file into a preliminary version of a new XML format which is being defined for sequence alignments, for use by programs that need XML input (none of the current PHYLIP programs yet need this format, but it will be useful in the future).
  • RETREE can now write tree out into a preliminary version of a new XML tree file format which is in the process of being defined.
  • The Kishino-Hasegawa-Templeton (KHT) test which compares user-defined trees (option U) is now joined by the Shimodaira-Hasegawa (SH) test (Shimodaira and Hasegawa, 1999) which corrects for comparisons among multiple tests. This avoids a statistical problem with multiple user trees.
  • CONTRAST can now carry out an analysis that takes into account within-species variation, according to a model similar (but not identical) to that introduced by Michael Lynch (1990)
  • A new program, TREEDIST, computes the Robinson-Foulds symmetric difference distance among trees. This measures the number of branches in the trees that are present in one but not the other.
  • FITCH and KITSCH now have an option to make trees by the minimum evolution distance matrix method.
  • The protein parsimony program PROTPARS now allows you to choose among a number of different genetic codes such as mitochondrial codes.
  • The consensus tree program CONSENSE can compute the Ml family of consensus tree methods, which generalize the Majority Rule consensus tree method. It can also compute our extended Majority Rule consensus (which is Majority Rule with some additional groups added to resolve the tree more completely), and it can also compute the original Majority Rule consensus tree method which does not add these extra groups. It can also compute the Strict consensus.
  • The tree-drawing programs DRAWGRAM and DRAWTREE have a number of new options of kinds of file they can produce, including Windows Bitmap files, files for the Idraw and FIG X windows drawing programs, the POV ray-tracer, and even VRML Virtual Reality Markup Language files that will enable you to wander around the tree using a VRML plugin for your browser, such as Cosmo Player.
  • DRAWTREE now uses my new Equal Daylight Algorithm to draw unrooted trees. This gives a much better-looking tree. Of course, competing programs such as TREEVIEW and PAUP draw trees that look just as good - because they too have started to use my method (with my encouragement). DRAWTREE also can use another algorithm, the n-body method.
  • The tree-drawing programs can now produce trees across multiple pages, which is handy for looking at trees with very large numbers of tips, and for producing giant diagrams by pasting together multiple sheets of paper.

There are many more, lesser features added as well.


Coming Attractions, Future Plans

There are some obvious deficiencies in this version. Some of these holes will be filled in the next few releases (leading to version 4.0). They include:

  1. A program to align molecular sequences on a predefined User Tree may ultimately be included. This will allow alignment and phylogeny reconstruction to procede iteratively by successive runs of two programs, one aligning on a tree and the other finding a better tree based on that alignment. In the shorter run a simple two-sequence alignment program may be included.
  2. An interactive "likelihood explorer" for DNA sequences will be written. This will allow, either with or without the assumption of a molecular clock, trees to be varied interactively so that the user can get a much better feel for the shape of the likelihood surface. Likelihood will be able to be plotted against branch lengths for any branch.
  3. If possible we will find some way of correcting for purine/pyrimidine richness variations among species, within the framework of the maximum likelihood programs. That they maximum likelihood programs do not allow for base composition variation is their major limitation at the moment.
  4. The Hidden Markov Model (regional rates) option of DNAML and DNAMLK will be generalized to allow for rates at sites to gradually change as one moves along the tree, in an attempt to implement Fitch and Markowitz's (1970) notion of "covarions".
  5. Obviously we need to start thinking about a more visual mouse/windows interface, but only if that can be used on X windows, Macintoshes, and Windows.
  6. Program PENNY and its relatives will improved so as to run faster and find all most parsimonious trees more quickly.
  7. A more sophisticated compatibility program should be included, if I can find one.
  8. An "evolutionary clock" version of CONTML will be done, and the same may also be done for RESTML.
  9. We are gradually generalizing the tree structures in the programs to infer multifurcating trees as well as bifurcating ones. We should be able to have any program read any tree and know what to do with it, without the user having to fret about whether an unrooted tree was fed to a program that needs a rooted tree.
  10. We are economizing on the size of the source code, and enforcing some standardization of it, by putting frequently used routines in separate files which can be linked into various programs. This will enforce a rather complete standardization of our code.
  11. We will move our code to an object-oriented language, most lkely C++. One could describe the language that version 3.4 was written in as "Pascal", version 3.5 as "Pascal written in C", version 3.6 as "C written in C", and maybe version 4.0 as "C++ written in C" and then 4.1 as "C++ written in C++". At least that scenario is one possibility.

Much of the future development of the package will be in the DNA and protein likelihood programs and the distance matrix programs. This is for several reasons. First, I am more interested in those problems. Second, collection of molecular data is increasing rapidly, and those programs have the most promise for future development for those data.


Endorsements

Here are some comments people have made in print about PHYLIP. Explanatory material in square brackets is my own. They fall naturally into two groups:

From the pages of Cladistics:

"Under no circumstances can we recommend PHYLIP/WAG [their name for the Wagner parsimony option of MIX]."
Luckow, M. and R. A. Pimentel (1985)

"PHYLIP has not proven very effective in implementing parsimony (Luckow and Pimentel, 1985)."
J. Carpenter (1987a)

"... PHYLIP. This is the computer program where every newsletter concerning it is mostly bug-catching, some of which have been put there by previous corrections. As Platnick (1987) documents, through dint of much labor useful results may be attained with this program, but I would suggest an easier way: FORMAT b:"
J. Carpenter (1987b)

"PHYLIP is bug-infested and both less effective and orders of magnitude slower than other programs ...."
"T. N. Nayenizgani" [J. S. Farris] (1990)

"Hennig86 [by J. S. Farris] provides such substantial improvements over previously available programs (for both mainframes and microcomputers) that it should now become the tool of choice for practising systematists."
N. Platnick (1989)

... and in the pages of other journals:

"The availability, within PHYLIP of distance, compatibility, maximum likelihood, and generalized `invariants' algorithms (Cavender and Felsenstein, 1987) sets it apart from other packages .... One of the strengths of PHYLIP is its documentation ...."
Michael J. Sanderson (1990)
(Sanderson also criticizes PHYLIP for slowness and inflexibility of its parsimony algorithms, and compliments other packages on their strengths).

"This package of programs has gradually become a basic necessity to anyone working seriously on various aspects of phylogenetic inference .... The package includes more programs than any other known phylogeny package. But it is not just a collection of cladistic and related programs. The package has great value added to the whole, and for this it is unique and of extreme importance .... its various strengths are in the great array of methods provided ...."
Bernard R. Baum (1989)

(note also W. Fink's critical remarks (1986) on version 2.8 of PHYLIP).


References for the Documentation Files

In the documentation files that follow I frequently refer to papers in the literature. In order to centralize the references they are given in this section. The chapter by David Swofford, Gary Olsen, Peter Waddell, and David Hillis (1996) is also an excellent review of the issues in phylogeny reconstruction. If you want to find further papers beyond these, my Quarterly Review of Biology review of 1982 and my Annual Review of Genetics review of 1988 list many further references.

Adams, E. N. 1972. Consensus techniques and the comparison of taxonomic trees. Systematic Zoology 21: 390-397.

Adams, E. N. 1986. N-trees as nestings: complexity, similarity, and consensus. Journal of Classification 3: 299-317.

Archie, J. W. 1989. A randomization test for phylogenetic information in systematic data. Systematic Zoology 38: 219-252.

Barry, D., and J. A. Hartigan. 1987. Statistical analysis of hominoid molecular evolution. Statistical Science 2: 191-210.

Baum, B. R. 1989. PHYLIP: Phylogeny Inference Package. Version 3.2. (Software review). Quarterly Review of Biology 64: 539-541.

Bron, C., and J. Kerbosch. 1973. Algorithm 457: Finding all cliques of an undirected graph. Communications of the Association for Computing Machinery 16: 575-577.

Camin, J. H., and R. R. Sokal. 1965. A method for deducing branching sequences in phylogeny. Evolution 19: 311-326.

Carpenter, J. 1987a. A report on the Society for the Study of Evolution workshop "Computer Programs for Inferring Phylogenies". Cladistics 3: 363-375.

Carpenter, J. 1987b. Cladistics of cladists. Cladistics 3: 363-375.

Cavalli-Sforza, L. L., and A. W. F. Edwards. 1967. Phylogenetic analysis: models and estimation procedures. Evolution 32: 550-570 (also American Journal of Human Genetics 19: 233-257).

Cavender, J. A. and J. Felsenstein. 1987. Invariants of phylogenies in a simple case with discrete states. Journal of Classification 4: 57-71.

Churchill, G.A. 1989. Stochastic models for heterogeneous DNA sequences. Bulletin of Mathematical Biology 51: 79-94.

Conn, E. E. and P. K. Stumpf. 1963. Outlines of Biochemistry. John Wiley and Sons, New York.

Day, W. H. E. 1983. Computationally difficult parsimony problems in phylogenetic systematics. Journal of Theoretical Biology 103: 429-438.

Dayhoff, M. O. and R. V. Eck. 1968. Atlas of Protein Sequence and Structure 1967-1968. National Biomedical Research Foundation, Silver Spring, Maryland.

Dayhoff, M. O., R. M. Schwartz, and B. C. Orcutt. 1979. A model of evolutionary change in proteins. pp. 345-352 in Atlas of Protein Sequence and Structure, volume 5, supplement 3, 1978, ed. M. O. Dayhoff. National Biomedical Research Foundation, Silver Spring, Maryland .

Dayhoff, M. O. 1979. Atlas of Protein Sequence and Structure, Volume 5, Supplement 3, 1978. National Biomedical Research Foundation, Washington, D.C.

DeBry, R. W. and N. A. Slade. 1985. Cladistic analysis of restriction endonuclease cleavage maps within a maximum-likelihood framework. Systematic Zoology 34: 21-34.

Dempster, A. P., N. M. Laird, and D. B. Rubin. 1977. Maximum likelihood from incomplete data via the EM algorithm. Journal of the Royal Statistical Society B 39: 1-38.

Eck, R. V., and M. O. Dayhoff. 1966. Atlas of Protein Sequence and Structure 1966. National Biomedical Research Foundation, Silver Spring, Maryland.

Edwards, A. W. F., and L. L. Cavalli-Sforza. 1964. Reconstruction of evolutionary trees. pp. 67-76 in Phenetic and Phylogenetic Classification, ed. V. H. Heywood and J. McNeill. Systematics Association Volume No. 6. Systematics Association, London.

Estabrook, G. F., C. S. Johnson, Jr., and F. R. McMorris. 1976a. A mathematical foundation for the analysis of character compatibility. Mathematical Biosciences 23: 181-187.

Estabrook, G. F., C. S. Johnson, Jr., and F. R. McMorris. 1976b. An algebraic analysis of cladistic characters. Discrete Mathematics 16: 141-147.

Estabrook, G. F., F. R. McMorris, and C. A. Meacham. 1985. Comparison of undirected phylogenetic trees based on subtrees of four evolutionary units. Systematic Zoology 34: 193-200.

Faith, D. P. 1990. Chance marsupial relationships. Nature345: 393-394.

Faith, D. P. and P. S. Cranston. 1991. Could a cladogram this short have arisen by chance alone?: On permutation tests for cladistic structure. Cladistics 7: 1-28.

Farris, J. S. 1977. Phylogenetic analysis under Dollo's Law. Systematic Zoology 26: 77-88.

Farris, J. S. 1978a. Inferring phylogenetic trees from chromosome inversion data. Systematic Zoology 27: 275-284.

Farris, J. S. 1981. Distance data in phylogenetic analysis. pp. 3-23 in Advances in Cladistics: Proceedings of the first meeting of the Willi Hennig Society, ed. V. A. Funk and D. R. Brooks. New York Botanical Garden, Bronx, New York.

Farris, J. S. 1983. The logical basis of phylogenetic analysis. pp. 1-47 in Advances in Cladistics, Volume 2, Proceedings of the Second Meeting of the Willi Hennig Society. ed. Norman I. Platnick and V. A. Funk. Columbia University Press, New York.

Farris, J. S. 1985. Distance data revisited. Cladistics 1: 67-85.

Farris, J. S. 1986. Distances and statistics. Cladistics 2: 144-157.

Farris, J. S. ["T. N. Nayenizgani"]. 1990. The systematics association enters its golden years (review of Prospects in Systematics, ed. D. Hawksworth). Cladistics 6: 307-314.

Felsenstein, J. 1973a. Maximum likelihood and minimum-steps methods for estimating evolutionary trees from data on discrete characters. Systematic Zoology 22: 240-249.

Felsenstein, J. 1973b. Maximum-likelihood estimation of evolutionary trees from continuous characters. American Journal of Human Genetics 25: 471-492.

Felsenstein, J. 1978a. The number of evolutionary trees. Systematic Zoology 27: 27-33.

Felsenstein, J. 1978b. Cases in which parsimony and compatibility methods will be positively misleading. Systematic Zoology 27: 401-410.

Felsenstein, J. 1979. Alternative methods of phylogenetic inference and their interrelationship. Systematic Zoology 28: 49-62.

Felsenstein, J. 1981a. Evolutionary trees from DNA sequences: a maximum likelihood approach. Journal of Molecular Evolution 17: 368-376.

Felsenstein, J. 1981b. A likelihood approach to character weighting and what it tells us about parsimony and compatibility. Biological Journal of the Linnean Society 16: 183-196.

Felsenstein, J. 1981c. Evolutionary trees from gene frequencies and quantitative characters: finding maximum likelihood estimates. Evolution 35: 1229-1242.

Felsenstein, J. 1982. Numerical methods for inferring evolutionary trees. Quarterly Review of Biology 57: 379-404.

Felsenstein, J. 1983b. Parsimony in systematics: biological and statistical issues. Annual Review of Ecology and Systematics 14: 313-333.

Felsenstein, J. 1984a. Distance methods for inferring phylogenies: a justification. Evolution 38: 16-24.

Felsenstein, J. 1984b. The statistical approach to inferring evolutionary trees and what it tells us about parsimony and compatibility. pp. 169-191 in: Cladistics: Perspectives in the Reconstruction of Evolutionary History, edited by T. Duncan and T. F. Stuessy. Columbia University Press, New York.

Felsenstein, J. 1985a. Confidence limits on phylogenies with a molecular clock. Systematic Zoology 34: 152-161.

Felsenstein, J. 1985b. Confidence limits on phylogenies: an approach using the bootstrap. Evolution 39: 783-791.

Felsenstein, J. 1985c. Phylogenies from gene frequencies: a statistical problem. Systematic Zoology 34: 300-311.

Felsenstein, J. 1985d. Phylogenies and the comparative method. American Naturalist 125: 1-12.

Felsenstein, J. 1986. Distance methods: a reply to Farris. Cladistics 2: 130-144.

Felsenstein, J. and E. Sober. 1986. Parsimony and likelihood: an exchange. Systematic Zoology 35: 617-626.

Felsenstein, J. 1988a. Phylogenies and quantitative characters. Annual Review of Ecology and Systematics 19: 445-471.

Felsenstein, J. 1988b. Phylogenies from molecular sequences: inference and reliability. Annual Review of Genetics 22: 521-565.

Felsenstein, J. 1992. Phylogenies from restriction sites, a maximum likelihood approach. Evolution 46: 159-173.

Felsenstein, J. and G. A. Churchill. 1996. A hidden Markov model approach to variation among sites in rate of evolution Molecular Biology and Evolution 13: 93-104.

Fink, W. L. 1986. Microcomputers and phylogenetic analysis. Science 234: 1135-1139.

Fitch, W. M., and E. Markowitz. 1970. An improved method for determining codon variability in a gene and its application to the rate of fixation of mutations in evolution. Biochemical Genetics 4: 579-593.

Fitch, W. M., and E. Margoliash. 1967. Construction of phylogenetic trees. Science 155: 279-284.

Fitch, W. M. 1971. Toward defining the course of evolution: minimum change for a specified tree topology. Systematic Zoology 20: 406-416.

Fitch, W. M. 1975. Toward finding the tree of maximum parsimony. pp. 189-230 in Proceedings of the Eighth International Conference on Numerical Taxonomy, ed. G. F. Estabrook. W. H. Freeman, San Francisco.

Fitch, W. M. and E. Markowitz. 1970. An improved method for determining codon variability and its application to the rate of fixation of mutations in evolution. Biochemical Genetics 4: 579-593.

George, D. G., L. T. Hunt, and W. C. Barker. 1988. Current methods in sequence comparison and analysis. pp. 127-149 in Macromolecular Sequencing and Synthesis, ed. D. H. Schlesinger. Alan R. Liss, New York.

Gomberg, D. 1966. "Bayesian" post-diction in an evolution process. unpublished manuscript: University of Pavia, Italy.

Graham, R. L., and L. R. Foulds. 1982. Unlikelihood that minimal phylogenies for a realistic biological study can be constructed in reasonable computational time. Mathematical Biosciences 60: 133-142.

Hasegawa, M. and T. Yano. 1984a. Maximum likelihood method of phylogenetic inference from DNA sequence data. Bulletin of the Biometric Society of Japan No. 5: 1-7.

Hasegawa, M. and T. Yano. 1984b. Phylogeny and classification of Hominoidea as inferred from DNA sequence data. Proceedings of the Japan Academy 60 B: 389-392.

Hasegawa, M., Y. Iida, T. Yano, F. Takaiwa, and M. Iwabuchi. 1985a. Phylogenetic relationships among eukaryotic kingdoms as inferred from ribosomal RNA sequences. Journal of Molecular Evolution 22: 32-38.

Hasegawa, M., H. Kishino, and T. Yano. 1985b. Dating of the human-ape splitting by a molecular clock of mitochondrial DNA. Journal of Molecular Evolution 22: 160-174.

Hendy, M. D., and D. Penny. 1982. Branch and bound algorithms to determine minimal evolutionary trees. Mathematical Biosciences 59: 277-290.

Higgins, D. G. and P. M. Sharp. 1989. Fast and sensitive multiple sequence alignments on a microcomputer. Computer Applications in the Biological Sciences (CABIOS) 5: 151-153.

Hochbaum, D. S. and A. Pathria. 1997. Path costs in evolutionary tree reconstruction. Journal of Computational Biology 4: 163-175.

Holmquist, R., M. M. Miyamoto, and M. Goodman. 1988. Higher-primate phylogeny - why can't we decide? Molecular Biology and Evolution 5: 201-216.

Inger, R. F. 1967. The development of a phylogeny of frogs. Evolution 21: 369-384.

Jin, L. and M. Nei. 1990. Limitations of the evolutionary parsimony method of phylogenetic analysis. Molecular Biology and Evolution 7: 82-102.

Jones, D. T., W. R. Taylor and J. M. Thornton. 1992. The rapid generation of mutation data matrices from protein sequences. Computer Applications in the Biosciences (CABIOS) 8: 275-282.

Jukes, T. H. and C. R. Cantor. 1969. Evolution of protein molecules. pp. 21-132 in Mammalian Protein Metabolism, ed. H. N. Munro. Academic Press, New York.

Kidd, K. K. and L. A. Sgaramella-Zonta. 1971. Phylogenetic analysis: concepts and methods. American Journal of Human Genetics 23: 235-252.

Kim, J. and M. A. Burgman. 1988. Accuracy of phylogenetic-estimation methods using simulated allele-frequency data. Evolution 42: 596-602.

Kimura, M. 1980. A simple model for estimating evolutionary rates of base substitutions through comparative studies of nucleotide sequences. Journal of Molecular Evolution 16: 111-120.

Kimura, M. 1983. The Neutral Theory of Molecular Evolution. Cambridge University Press, Cambridge.

Kingman, J. F. C. 1982a. The coalescent. Stochastic Processes and Their Applications 13: 235-248.

Kingman, J. F. C. 1982b. On the genealogy of large populations. Journal of Applied Probability 19A: 27-43.

Kishino, H. and M. Hasegawa. 1989. Evaluation of the maximum likelihood estimate of the evolutionary tree topologies from DNA sequence data, and the branching order in Hominoidea. Journal of Molecular Evolution 29: 170-179.

Kluge, A. G., and J. S. Farris. 1969. Quantitative phyletics and the evolution of anurans. Systematic Zoology 18: 1-32.

Kuhner, M. K. and J. Felsenstein. 1994. A simulation comparison of phylogeny algorithms under equal and unequal evolutionary rates. Molecular Biology and Evolution 11: 459-468 (Erratum 12: 525  1995).

Künsch, H. R. 1989. The jackknife and the bootstrap for general stationary observations. Annals of Statistics 17: 1217-1241.

Lake, J. A. 1987. A rate-independent technique for analysis of nucleic acid sequences: evolutionary parsimony. Molecular Biology and Evolution 4: 167-191.

Lake, J. A. 1994. Reconstructing evolutionary trees from DNA and protein sequences: paralinear distances. Proceedings of the Natonal Academy of Sciences, USA 91: 1455-1459.

Le Quesne, W. J. 1969. A method of selection of characters in numerical taxonomy. Systematic Zoology 18: 201-205.

Le Quesne, W. J. 1974. The uniquely evolved character concept and its cladistic application. Systematic Zoology 23: 513-517.

Lewis, H. R., and C. H. Papadimitriou. 1978. The efficiency of algorithms. Scientific American 238: 96-109 (January issue)

Lockhart, P. J., M. A. Steel, M. D. Hendy, and D. Penny. 1994. Recovering evolutionary trees under a more realistic model of sequence evolution. Molecular Biology and Evolution 11: 605-612.

López-Martínez, N.; Álvarez-Sierra, M. A. & García Moreno, E. 1986. Paleontología y Bioestratigrafía (Micromamíferos) del Mioceno medio-superior del Sector Central de la Cuenca del Duero. Stvdia Geologica Salmanticensia 22: 146-191.

Luckow, M. and D. Pimentel. 1985. An empirical comparison of numerical Wagner computer programs. Cladistics 1: 47-66.

Lynch, M. 1990. Methods for the analysis of comparative data in evolutionary biology. Evolution 45: 1065-1080.

Maddison, D. R. 1991. The discovery and importance of multiple islands of most-parsimonious trees. Systematic Zoology 40: 315-328.

Margush, T. and F. R. McMorris. 1981. Consensus n-trees. Bulletin of Mathematical Biology 43: 239-244.

Nelson, G. 1979. Cladistic analysis and synthesis: principles and definitions, with a historical note on Adanson's Familles des Plantes (1763-1764). Systematic Zoology 28: 1-21.

Nei, M. 1972. Genetic distance between populations. American Naturalist 106: 283-292.

Nei, M. and W.-H. Li. 1979. Mathematical model for studying genetic variation in terms of restriction endonucleases. Proceedings of the National Academy of Sciences, USA 76: 5269-5273.

Page, R. D. M. 1989. Comments on component-compatibility in historical biogeography. Cladistics 5: 167-182.

Penny, D. and M. D. Hendy. 1985. Testing methods of evolutionary tree construction. Cladistics 1: 266-278.

Platnick, N. 1987. An empirical comparison of microcomputer parsimony programs. Cladistics 3: 121-144.

Platnick, N. 1989. An empirical comparison of microcomputer parsimony programs. II. Cladistics 5: 145-161.

Reynolds, J. B., B. S. Weir, and C. C. Cockerham. 1983. Estimation of the coancestry coefficient: basis for a short-term genetic distance. Genetics 105: 767-779.

Robinson, D. F. and L. R. Foulds. 1981. Comparison of phylogenetic trees. Mathematical Biosciences 53: 131-147.

Rohlf, F. J. and M. C. Wooten. 1988. Evaluation of the restricted maximum likelihood method for estimating phylogenetic trees using simulated allele- frequency data. Evolution 42: 581-595.

Rzhetsky, A., and M. Nei. 1992. Statistical properties of the ordinary least-squares, generalized least-squares, and minimum-evolution methods of phylogenetic inference. Journal of Molecular Evolution 35: 367-375 .

Saitou, N., Nei, M. 1987. The neighbor-joining method: a new method for reconstructing phylogenetic trees. Molecular Biology and Evolution 4: 406-425.

Sanderson, M. J. 1990. Flexible phylogeny reconstruction: a review of phylogenetic inference packages using parsimony. Systematic Zoology 39: 414-420.

Sankoff, D. D., C. Morel, R. J. Cedergren. 1973. Evolution of 5S RNA and the nonrandomness of base replacement. Nature New Biology 245: 232-234.

Shimodaira, H. and M. Hasegawa. 1999. Multiple comparisons of log-likelihoods with applications to phylogenetic inference. Molecular Biology and Evolution 16: 1114-1116.

Sokal, R. R. and P. H. A. Sneath. 1963. Principles of Numerical Taxonomy. W. H. Freeman, San Francisco.

Smouse, P. E. and W.-H. Li. 1987. Likelihood analysis of mitochondrial restriction-cleavage patterns for the human-chimpanzee-gorilla trichotomy. Evolution 41: 1162-1176.

Sober, E. 1983a. Parsimony in systematics: philosophical issues. Annual Review of Ecology and Systematics 14: 335-357.

Sober, E. 1983b. A likelihood justification of parsimony. Cladistics 1: 209-233.

Sober, E. 1988. Reconstructing the Past: Parsimony, Evolution, and Inference. MIT Press, Cambridge, Massachusetts.

Sokal, R. R., and P. H. A. Sneath. 1963. Principles of Numerical Taxonomy. W. H. Freeman, San Francisco.

Steel, M. A. 1994. Recovering a tree from the Markov leaf colourations it generates under a Markov model. Applied Mathematics Letters 7: 19-23.

Studier, J. A. and K. J. Keppler. 1988. A note on the neighbor-joining algorithm of Saitou and Nei. Molecular Biology and Evolution5: 729-731.

Swofford, D. L. and G. J. Olsen. 1990. Phylogeny reconstruction. Chapter 11, pages 411-501 in Molecular Systematics, ed. D. M. Hillis and C. Moritz. Sinauer Associates, Sunderland, Massachusetts.

Swofford, D. L., G. J. Olsen, P. J. Waddell, and D. M. Hillis. 1996. Phylogenetic inference. pp. 407-514 in Molecular Systematics, 2nd ed., ed. D. M. Hillis, C. Moritz, and B. K. Mable. Sinauer Associates, Sunderland, Massachusetts.

Templeton, A. R. 1983. Phylogenetic inference from restriction endonuclease cleavage site maps with particular reference to the evolution of humans and the apes. Evolution 37: 221-244.

Thompson, E. A. 1975. Human Evolutionary Trees. Cambridge University Press, Cambridge.

Wu, C. F. J. 1986. Jackknife, bootstrap and other resampling plans in regression analysis. Annals of Statistics 14: 1261-1295.

Yang, Z. 1993. Maximum-likelihood estimation of phylogeny from DNA sequences when substitution rates differ over sites. Molecular Biology and Evolution 10: 1396-1401.

Yang, Z. 1994. Maximum likelihood phylogenetic estimation from DNA sequences with variable rates over sites: approximate methods. Journal of Molecular Evolution 39: 306-314.

Yang, Z. 1995. A space-time process model for the evolution of DNA sequences. Genetics 139: 993-1005.

Credits

Over the years various granting agencies have contributed to the support of the PHYLIP project (at first without knowing it). They are:

Years Agency Grant or Contract Number
1999-2002 NSF BIR-9527687
1999-2002 NIH NIGMS R01 GM51929-04
1999-2001 NIH NIMH R01 HG01989-01
1995-1999 NIH NIGMS R01 GM51929-01
1992-1995 National Science Foundation DEB-9207558
1992-1994 NIH NIGMS Shannon Award 2 R55 GM41716-04
1989-1992 NIH NIGMS 1 R01-GM41716-01
1990-1992 National Science Foundation BSR-8918333
1987-1990 National Science Foundation BSR-8614807
1979-1987 U.S. Department of Energy DE-AM06-76RLO2225 TA DE-AT06-76EV71005

I am particularly grateful to program administrators William Moore, Irene Eckstrand, Peter Arzberger, and Conrad Istock, who have gone beyond the call of duty to make sure that PHYLIP continued.

Booby prizes for funding are awarded to:

  • The people at the U.S. Department of Energy who, in 1987, decided they were "not interested in phylogenies",
  • The members of the Systematics Panel of NSF who twice (in 1989 and 1992) positively recommended that my applications not be funded. I am very grateful to program director William Moore for courageously overruling their decision the first time. The 1992 NSF Systematics Panel could claim no credit for PHYLIP whatsoever.
  • The members of the 1992 Genetics Study Section of NIH who rated my proposal in the 53rd percentile (I don't know if that's 53rd from the top or the bottom, but does it matter?), thus denying it funding. I am, however, grateful to the NIGMS administrators, especially Irene Eckstrand, who supported giving me a "Shannon award" partially funding my work for a period in spite of this rating.

The original Camin-Sokal parsimony program and the polymorphism parsimony program were written by me in 1977 and 1978. They were Pascal versions of earlier FORTRAN programs I wrote in 1966 and 1967 using the same algorithm to infer phylogenies under the Camin-Sokal and polymorphism parsimony criteria. Harvey Motulsky worked for me as a programmer in 1971 and wrote FORTRAN programs to carry out the Camin-Sokal, Dollo, and polymorphism methods (he is known these days as the author of the scientific graphing package GraphPad). But most of the early work on PHYLIP other than my own was by Jerry Shurman and Mark Moehring. Jerry Shurman worked for me in the summers of 1979 and 1980, and Mark Moehring worked for me in the summers of 1980 and 1981. Both wrote original versions of many of the other programs, based on the original versions of my Camin-Sokal parsimony program and POLYM. These formed the basis of Version 1 of the Package, first distributed in October, 1980.

Version 2, released in the spring of 1982, involved a fairly complete rewrite by me of many of those programs. Hisashi Horino for version 3.3 reworked some parts of the programs CLIQUE and CONSENSE to make their output more comprehensible, and has added some code to the tree-drawing programs DRAWGRAM and DRAWTREE as well. He also worked on some of the Drawtree and Drawgram driver code.

My more recent part-time programmers Akiko Fuseki, Sean Lamont, Andrew Keeffe, Daniel Yek, Dan Fineman, Patrick Colacurcio, Mike Palczewski, and Doug Buxton gave me substantial help with the current release, and their excellent work is greatly appreciated. Akiko in particular did much of the hard work of adding new features and changing old ones in the 3.4 and 3.5 releases, centralized many of the C routines in support files, and is responsible for the new versions of DNAPARS and PARS. Andrew prepared the Macintosh version, wrote RETREE, added the ray-tracing and PICT code to the DRAW programs and has since done much other work. Sean was central to the conversion to C, and tested it extensively. My postdoctoral fellow Mary Kuhner and her associate Jon Yamato created NEIGHBOR, the neighbor-joining and UPGMA program, for the current release, for which I am also grateful (Naruya Saitou and Li Jin kindly encouraged us to use some of the code from their own implementation of this method).

I am very grateful to over 200 users for algorithmic suggestions, complaints about features (or lack of features), and information about the behavior of their operating systems and compilers. A list of some of their names will be found at the credits page on the PHYLIP web site.

A major contribution to this package has been made by others writing programs or parts of programs. Chris Meacham contributed the important program FACTOR, long demanded by users, and the even more important ones PLOTREE and PLOTGRAM. Important parts of the code in DRAWGRAM and DRAWTREE were taken over from those two programs. Kent Fiala wrote function "reroot" to do outgroup-rooting, which was an essential part of many programs in earlier versions. Someone at the Western Australia Institute of Technology suggested the name PHYLIP (by writing it the label on the outside of a magnetic tape), but they all seem to deny having done so (and I've lost the relevant letter).

The distribution of the package also owes much to Buz Wilson and Willem Ellis, who put a lot of effort into the early distributions of the PCDOS and Macintosh versions respectively. Christopher Meacham and Tom Duncan for three versions distributed a printed version of these documentation files (they are no longer able to do so), and I am very grateful to them for those efforts. William H.E. Day and F. James Rohlf have been very helpful in setting up the listserver news bulletin service which succeeded the PHYLIP newsletter for a time.

I also wish to thank the people who have made computer resources available to me, mostly in the loan of use of microcomputers. These include Jeremy Field, Clem Furlong, Rick Garber, Dan Jacobson, Rochelle Kochin, Monty Slatkin, Jim Archie, Jim Thomas, and George Gilchrist.

I should also note the computers used to develop this package: These include a CDC 6400, two DECSystem 1090s, my trusty old SOL-20, my old Osborne-1, a VAX 11/780, a VAX 8600, a MicroVAX I, a DECstation 3100, my old Toshiba 1100+, my DECstation 5000/200, a DECstation 5000/125, a Compudyne 486DX/33, a Trinity Genesis 386SX, a Zenith Z386, a Mac Classic, a DEC Alphastation 400 4/233, a Pentium 120, a Pentium 200, a PowerMac 6100, and a Macintosh G3. (One of the reasons we have been successful in achieving compatibility between different computer systems is that I have had to run them myself under so many different operating systems and compilers).


Other Phylogeny Programs Available Elsewhere

A comprehensive list of phylogeny programs is maintained at the PHYLIP web site on the Phylogeny Programs pages:

Here we will simply mention some of the major general-purpose programs. For many more and much more, see those web pages.

PAUP*   A comprehensive program with parsimony, likelihood, and distance matrix methods. It competes with PHYLIP to be responsible for the most trees published. Written by David Swofford and distributed by Sinauer Associates of Sunderland, Massachusetts. It is described in a web pages for the Macintosh version, the Windows version, and the Unix/OpenVMS version. Current prices are $100 for the Macintosh version, $85 for the Windows version, and $150 for Unix versions for many kinds of workstations.

MacClade   An interactive Macintosh and PowerMac program to rearrange trees and watch the changes in the fit of the trees to data as judged by parsimony. MacClade has a great many features including a spreadsheet data editor and many different descriptive statistics for different kinds of data. It is particularly designed to export and import data to and from PAUP*. MacClade is available for $100 from Sinauer Associates, of Sunderland, Massachusetts. It is described in a web page at http://www.sinauer.com/detail.php?id=4707. MacClade is also described on its Web page, at http://phylogeny.arizona.edu/macclade/macclade.html.

MEGA   A Windows and DOS program by Sudhir Kumar of Arizona State University (written together with Koichiro Tamura and Masatoshi Nei while he was a student in Nei's lab at Pennsylvania State University). It can carry out parsimony and distance matrix methods for DNA sequence data. Version 2.1 for Windows can be downloaded from the MEGA web site at http://www.megasoftware.net.

PAML   Ziheng Yang of the Department of Genetics and Biometry at University College, London has written this package of programs to carry out likelihood analysis of DNA and protein sequence data. PAML is particularly strong in the options for coping with variability of rates of evolution from site to site, though it is less able than some other packages to search effectively for the best tree. It is available as C source code and as PowerMac and Windows executables from its web site at http://abacus.gene.ucl.ac.uk/software/paml.html.

TREE-PUZZLE   This package by Korbinian Strimmer and Arndt von Haeseler was begun when they were at the Uviversität Munchen in Germany. TREE-PUZZLE can carry out likelihood methods for DNA and protein data, searching by the strategy of "quartet puzzling" which they invented. It can also compute distances. It superimposes trees estimated from many quartets of species. TREE-PUZZLE is available for Unix, Macintoshes, or Windows from their web site at http://www.tree-puzzle.de/.

DAMBE    A package written by Xuhua Xia, then of the Department of Ecology and Biodiversity of the University of Hong Kong. Its initials stand for Data Analysis in Molecular Biology and Evolution. DAMBE is a general-purpose package for DNA and protein sequence phylogenies. It can read and convert a number of file formats, and has many features for descriptive statistics, and can compute a number of commonly-used distance matrix measures and infer phylogenies by parsimony, distance, or likelihood methods, including bootstrapping and jackknifing. There are a number of kinds of statistical tests of trees available and it can also display phylogenies. DAMBE includes a copy of ClustalW as well; DAMBE consists of Windows95 executables. It is available from its web site at http://web.hku.hk/~xxia/software/software.htm. Xia has now moved to the Department of Biology of the University of Ottawa, Canada, and I suspect the DAMBE web site will soon follow him there.

MOLPHY   A package of programs for carrying out likelihood analysis of DNA and protein data, written by Jun Adachi and Masami Hasegawa of the Institute of Statistical Mathematics in Tokyo, Japan. The source code is available from them at the MOLPHY web site at http://www.ism.ac.jp/software/ismlib/softother.e.html, and Windows executables are available from Russell Malmberg's web site at http://dogwood.botany.uga.edu/malmberg/software.html.

Hennig86   A fast parsimony program by J. S. Farris of the Naturhistoriska Riksmuseet in Stockholm, Sweden for discrete characters data (it can handle DNA if its states are recoded to be digits). Reputed to be faster than PAUP*. The program is distributed as an executable and costs $50, plus $5 mailing costs ($10 outside of of the U.S.). The user's name should be stated, as copies are personalized as a copy-protection measure. It is distributed by Arnold Kluge, Amphibians and Reptiles, Museum of Zoology, University of Michigan, Ann Arbor, Michigan 48109-1079, U.S.A. (akluge@umich.edu) and by Diana Lipscomb at George Washington University (BIODL@gwuvm.gwu.edu).

RnA   J. S. Farris's very fast program which uses parsimony to carry out jackknifing resampling of DNA sequence data. This would be nearly equivalent in properties to bootstrapping if the jackknifing were sampling random halves of the data, but Farris prefers to have each jackknife sample delete a fraction 1/e of the data, which will give most groups too much support (he would disagree with this statement). RnA is available from Arnold Kluge, Amphibians and Reptiles, Museum of Zoology, University of Michigan, Ann Arbor, Michigan 48109-1079, U.S.A. (akluge@umich.edu) and Diana Lipscomb at George Washington University (BIODL@gwuvm.gwu.edu) who may be contacted for details. The cost is about $30 US.

NONA   Pablo Goloboff, of the Instituto Miguel Lillo in Tucuman, Argentina has written these very fast parsimony programs, capable of some relevant forms of weighted parsimony, which can handle either DNA sequence data or discrete characters. It is available as shareware from http://www.cladistics.com/aboutNona.htm There is a 30 day free trial, after which NONA must be purchased separately by sending a check for $40.00 to either directly to the the author, or to: James M. Carpenter, Attn: NONA, Division of Invertebrate Zoology, American Museum of Natural History, Central Park West at 79th Street, New York, NY 10024.

TNT This program, by Pablo Goloboff, J. S. Farris, and Kevin Nixon, is for searching large data sets for most parsimonious trees. The authors are respectively at the Instituto Miguel Lillo in Tucuman, Argentina, the Naturhistoriska Riksmuseet in Stockholm, Sweden, and the Hortorium, Cornell University, Ithaca, New York. TNT is described as faster than other methods, though not faster than NONA for small to medium data sets. Its distribution status is somewhat uncertain. The site http://www.cladistics.com/aboutTNT.html describes it as unavailable, while the web site http://www.cladistics.com/webtnt.html makes a beta version available for download. The program downloaded is free but needs a password to function, which the user should obtain from Pablo Goloboff (see the latter web page for details).

These are only a few of the more than 194 different phylogeny packages that are now available (as of January, 2001 - the number keeps increasing). The others are described (and web links and ftp addresses provided) at my Phylogeny Programs web pages at the address given above.


How You Can Help Me

Simply let me know of any problems you have had adapting the programs to your computer. I can often make "transparent" changes that, by making the code avoid the wilder, woolier, and less standard parts of C, not only help others who have your machine but even improve the chance of the programs functioning on new machines. I would like fairly detailed information on what gave trouble, on what operating system, machine, and (if relevant) compiler, and what had to be done to make the programs work. I am sometimes able to do some over-the-telephone trouble-shooting, particularly if I don't have to pay for the call, but electronic mail is a the best way for me to be asked about problems, as you can include your input and output files so I can see what is going on (please do not send them as Attachments, but as part of the body of a message). I'd really like these programs to be able to run with only routine changes on absolutely everything, down to and possibly including the Amana Touchmatic Radarange Microwave Oven which was an Intel 8080 system (in fact, early versions of this package did run successfully on Intel 8080 systems running the CP/M operating system). A PalmPilot version is contemplated too.

I would also like to know timings of programs from the package, when run on the three test input files provided above, for various computer and compiler combinations, so that I can provide this information in the section on speeds of this document.

For the phylogeny plotting programs DRAWGRAM and DRAWTREE, I am particularly interested in knowing what has to be done to adapt them for other graphic file formats.

You can also be helpful to PHYLIP users in your part of the world by helping them get the latest version of PHYLIP from our web site and by helping them with any problems they may have in getting PHYLIP working on their data.

Your help is appreciated. I am always happy to hear suggestions for features and programs that ought to be incorporated in the package, but please do not be upset if I turn out to have already considered the particular possibility you suggest and decided against it.


In Case of Trouble

Read The (documentation) Files Meticulously ("RTFM"). If that doesn't solve the problem, please check the Frequently Asked Questions web page at the PHYLIP web site:

http://evolution.gs.washington.edu/phylip/faq.html

and the PHYLIP Bugs web page at that site:

http://evolution.gs.washington.edu/phylip/bugs.html

If none of these answers your question, get in touch with me. My electronic mail address is given below. If you do ask about a problem, please specify the program name, version of the package, computer operating system, and send me your data file so I can test the problem. Do not send your data file as an e-mail Attachment but instead as the body of a message. I read the e-mail on a Unix system, which makes it impossible to read some formats of attachments without running around to other machines and moving the files there. This is one of my least favorite activities, so please do not use attachments. Also it will help if you have the relevant output and documentation files so that you can refer to them in any correspondence. I can also be reached by telephone by calling me in my office: +1-(206)-543-0150, or at home: +1-(206)-526-9057 (how's that for user support!). If I cannot be reached at either place, a message can be left at the office of the Department of Genome Sciences, (206)-221-7377 but I prefer strongly that I not call you, as in any phone consultation the least you can do is pay the phone bill. Better yet, use electronic mail.

Particularly if you are in a part of the world distant from me, you may also want to try to get in touch with other users of PHYLIP nearby. I can also, if requested, provide a list of nearby users.

Joe Felsenstein
Department of Genome Sciences
University of Washington
Box 357730
Seattle, Washington 98195-7730, U.S.A.

Electronic mail addresses:      joe@gs.washington.edu


PHYLIPNEW-3.69.650/doc/gendist.html0000664000175000017500000003157007712247475013403 00000000000000 gendist
version 3.6

GENDIST - Compute genetic distances from gene frequencies

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program computes any one of three measures of genetic distance from a set of gene frequencies in different populations (or species). The three are Nei's genetic distance (Nei, 1972), Cavalli-Sforza's chord measure (Cavalli- Sforza and Edwards, 1967) and Reynolds, Weir, and Cockerham's (1983) genetic distance. These are written to an output file in a format that can be read by the distance matrix phylogeny programs FITCH and KITSCH.

The three measures have somewhat different assumptions. All assume that all differences between populations arise from genetic drift. Nei's distance is formulated for an infinite isoalleles model of mutation, in which there is a rate of neutral mutation and each mutant is to a completely new alleles. It is assumed that all loci have the same rate of neutral mutation, and that the genetic variability initially in the population is at equilibrium between mutation and genetic drift, with the effective population size of each population remaining constant.

Nei's distance is:

                                      __  __
                                      \   \
                                      /_  /_  p1mi   p2mi
                                       m   i
           D  =  - ln  ( ------------------------------------- ).
                           __  __              __  __             
                           \   \               \   \
                         [ /_  /_  p1mi2]1/2   [ /_  /_  p2mi2]1/2     
                            m   i                m   i

where m is summed over loci, i over alleles at the m-th locus, and where

     p1mi

is the frequency of the i-th allele at the m-th locus in population 1. Subject to the above assumptions, Nei's genetic distance is expected, for a sample of sufficiently many equivalent loci, to rise linearly with time.

The other two genetic distances assume that there is no mutation, and that all gene frequency changes are by genetic drift alone. However they do not assume that population sizes have remained constant and equal in all populations. They cope with changing population size by having expectations that rise linearly not with time, but with the sum over time of 1/N, where N is the effective population size. Thus if population size doubles, genetic drift will be taking place more slowly, and the genetic distance will be expected to be rising only half as fast with respect to time. Both genetic distances are different estimators of the same quantity under the same model.

Cavalli-Sforza's chord distance is given by

                   __              __                     __
                   \               \                      \
     D2    =    4  /_  [  1   -    /_   p1mi1/2 p 2mi1/2]  /  /_  (am  - 1)
                    m               i                        m

where m indexes the loci, where i is summed over the alleles at the m-th locus, and where a is the number of alleles at the m-th locus. It can be shown that this distance always satisfies the triangle inequality. Note that as given here it is divided by the number of degrees of freedom, the sum of the numbers of alleles minus one. The quantity which is expected to rise linearly with amount of genetic drift (sum of 1/N over time) is D squared, the quantity computed above, and that is what is written out into the distance matrix.

Reynolds, Weir, and Cockerham's (1983) genetic distance is


                       __   __
                       \    \
                       /_   /_  [ p1mi     -  p2mi]2
                        m    i                  
       D2     =      --------------------------------------
                         __              __
                         \               \
                      2  /_   [  1   -   /_  p1mi    p2mi ]
                          m               i 

where the notation is as before and D2 is the quantity that is expected to rise linearly with cumulated genetic drift.

Having computed one of these genetic distances, one which you feel is appropriate to the biology of the situation, you can use it as the input to the programs FITCH, KITSCH or NEIGHBOR. Keep in mind that the statistical model in those programs implicitly assumes that the distances in the input table have independent errors. For any measure of genetic distance this will not be true, as bursts of random genetic drift, or sampling events in drawing the sample of individuals from each population, cause fluctuations of gene frequency that affect many distances simultaneously. While this is not expected to bias the estimate of the phylogeny, it does mean that the weighing of evidence from all the different distances in the table will not be done with maximal efficiency. One issue is which value of the P (Power) parameter should be used. This depends on how the variance of a distance rises with its expectation. For Cavalli-Sforza's chord distance, and for the Reynolds et. al. distance it can be shown that the variance of the distance will be proportional to the square of its expectation; this suggests a value of 2 for P, which the default value for FITCH and KITSCH (there is no P option in NEIGHBOR).

If you think that the pure genetic drift model is appropriate, and are thus tempted to use the Cavalli-Sforza or Reynolds et. al. distances, you might consider using the maximum likelihood program CONTML instead. It will correctly weigh the evidence in that case. Like those genetic distances, it uses approximations that break down as loci start to drift all the way to fixation. Although Nei's distance will not break down in that case, it makes other assumptions about equality of substitution rates at all loci and constancy of population sizes.

The most important thing to remember is that genetic distance is not an abstract, idealized measure of "differentness". It is an estimate of a parameter (time or cumulated inverse effective population size) of the model which is thought to have generated the differences we see. As an estimate, it has statistical properties that can be assessed, and we should never have to choose between genetic distances based on their aesthetic properties, or on the personal prestige of their originators. Considering them as estimates focuses us on the questions which genetic distances are intended to answer, for if there are none there is no reason to compute them. For further perspective on genetic distances, I recommend my own paper evaluating Reynolds, Weir, and Cockerham (1983), and the material in Nei's book (Nei, 1987).

INPUT FORMAT

The input to this program is standard and is as described in the Gene Frequencies and Continuous Characters Programs documentation file above. It consists of the number of populations (or species), the number of loci, and after that a line containing the numbers of alleles at each of the loci. Then the gene frequencies follow in standard format.

The options are selected using a menu:


Genetic Distance Matrix program, version 3.6a3

Settings for this run:
  A   Input file contains all alleles at each locus?  One omitted at each locus
  N                        Use Nei genetic distance?  Yes
  C                Use Cavalli-Sforza chord measure?  No
  R                   Use Reynolds genetic distance?  No
  L                         Form of distance matrix?  Square
  M                      Analyze multiple data sets?  No
  0              Terminal type (IBM PC, ANSI, none)?  (none)
  1            Print indications of progress of run?  Yes

  Y to accept these or type the letter for one to change

The A (All alleles) option is described in the Gene Frequencies and Continuous Characters Programs documentation file. As with CONTML, it is the signal that all alleles are represented in the gene frequency input, without one being left out per locus. C, N, and R are the signals to use the Cavalli-Sforza, Nei, or Reynolds et. al. genetic distances respectively. The Nei distance is the default, and it will be computed if none of these options is explicitly invoked. The L option is the signal that the distance matrix is to be written out in Lower triangular form. The M option is the usual Multiple Data Sets option, useful for doing bootstrap analyses with the distance matrix programs. It allows multiple data sets, but does not allow multiple sets of weights (since there is no provision for weighting in this program).

OUTPUT FORMAT

The output file simply contains on its first line the number of species (or populations). Each species (or population) starts a new line, with its name printed out first, and then and there are up to nine genetic distances printed on each line, in the standard format used as input by the distance matrix programs. The output, in its default form, is ready to be used in the distance matrix programs.

CONSTANTS

The constants available to be changed by the user if the program is recompiled are "namelength" the length of a species name, set to 10 in the distribution and "epsilon" which defines a small quantity that is used when checking whether allele frequencies at a locus sum to more than one: if all alleles are input (option A) and the sum differs from 1 by more than epsilon, or if not all alleles are input and the sum is greater than 1 by more then epsilon, the program will see this as an error and stop. You may find this causes difficulties if you gene frequencies have been rounded. I have tried to keep epsilon from being too small to prevent such problems.

RUN TIMES

The program is quite fast and the user should effectively never be limited by the amount of time it takes. All that the program has to do is read in the gene frequency data and then, for each pair of species, compute a genetic distance formula for each pair of species. This should require an amount of effort proportional to the total number of alleles over loci, and to the square of the number of populations.

FUTURE OF THIS PROGRAM

The main change that will be made to this program in the future is to add provisions for taking into account the sample size for each population. The genetic distance formulas have been modified by their inventors to correct for the inaccuracy of the estimate of the genetic distances, which on the whole should artificially increase the distance between populations by a small amount dependent on the sample sizes. The main difficulty with doing this is that I have not yet settled on a format for putting the sample size in the input data along with the gene frequency data for a species or population.

I may also include other distance measures, but only if I think their use is justified. There are many very arbitrary genetic distances, and I am reluctant to include most of them.


TEST DATA SET

    5    10
2 2 2 2 2 2 2 2 2 2
European   0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205
0.8055 0.5043
African    0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600
0.7582 0.6207
Chinese    0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726
0.7482 0.7334
American   0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000
0.8086 0.8636
Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396
0.9097 0.2976


TEST SET OUTPUT

    5
European    0.0000  0.0780  0.0807  0.0668  0.1030
African     0.0780  0.0000  0.2347  0.1050  0.2273
Chinese     0.0807  0.2347  0.0000  0.0539  0.0633
American    0.0668  0.1050  0.0539  0.0000  0.1348
Australian  0.1030  0.2273  0.0633  0.1348  0.0000

PHYLIPNEW-3.69.650/doc/dnadist.html0000664000175000017500000005760607712247475013404 00000000000000 dnadist

version 3.6

DNADIST -- Program to compute distance matrix
from nucleotide sequences

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program uses nucleotide sequences to compute a distance matrix, under four different models of nucleotide substitution. It can also compute a table of similarity between the nucleotide sequences. The distance for each pair of species estimates the total branch length between the two species, and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR. This is an alternative to use of the sequence data itself in the maximum likelihood program DNAML or the parsimony program DNAPARS.

The program reads in nucleotide sequences and writes an output file containing the distance matrix, or else a table of similarity between sequences. The four models of nucleotide substitution are those of Jukes and Cantor (1969), Kimura (1980), the F84 model (Kishino and Hasegawa, 1989; Felsenstein and Churchill, 1996), and the model underlying the LogDet distance (Barry and Hartigan, 1987; Lake, 1994; Steel, 1994; Lockhart et. al., 1994). All except the LogDet distance can be made to allow for for unequal rates of substitution at different sites, as Jin and Nei (1990) did for the Jukes-Cantor model. The program correctly takes into account a variety of sequence ambiguities, although in cases where they exist it can be slow.

Jukes and Cantor's (1969) model assumes that there is independent change at all sites, with equal probability. Whether a base changes is independent of its identity, and when it changes there is an equal probability of ending up with each of the other three bases. Thus the transition probability matrix (this is a technical term from probability theory and has nothing to do with transitions as opposed to transversions) for a short period of time dt is:

              To:    A        G        C        T
                   ---------------------------------
               A  | 1-3a      a         a       a
       From:   G  |  a       1-3a       a       a
               C  |  a        a        1-3a     a
               T  |  a        a         a      1-3a

where a is u dt, the product of the rate of substitution per unit time (u) and the length dt of the time interval. For longer periods of time this implies that the probability that two sequences will differ at a given site is:

      p = 3/4 ( 1 - e- 4/3 u t)

and hence that if we observe p, we can compute an estimate of the branch length ut by inverting this to get

     ut = - 3/4 loge ( 1 - 4/3 p )

The Kimura "2-parameter" model is almost as symmetric as this, but allows for a difference between transition and transversion rates. Its transition probability matrix for a short interval of time is:

              To:     A        G        C        T
                   ---------------------------------
               A  | 1-a-2b     a         b       b
       From:   G  |   a      1-a-2b      b       b
               C  |   b        b       1-a-2b    a
               T  |   b        b         a     1-a-2b

where a is u dt, the product of the rate of transitions per unit time and dt is the length dt of the time interval, and b is v dt, the product of half the rate of transversions (i.e., the rate of a specific transversion) and the length dt of the time interval.

The F84 model incorporates different rates of transition and transversion, but also allowing for different frequencies of the four nucleotides. It is the model which is used in DNAML, the maximum likelihood nucelotide sequence phylogenies program in this package. You will find the model described in the document for that program. The transition probabilities for this model are given by Kishino and Hasegawa (1989), and further explained in a paper by me and Gary Churchill (1996).

The LogDet distance allows a fairly general model of substitution. It computes the distance from the determinant of the empirically observed matrix of joint probabilities of nucleotides in the two species. An explanation of it is available in the chapter by Swofford et, al. (1996).

The first three models are closely related. The DNAML model reduces to Kimura's two-parameter model if we assume that the equilibrium frequencies of the four bases are equal. The Jukes-Cantor model in turn is a special case of the Kimura 2-parameter model where a = b. Thus each model is a special case of the ones that follow it, Jukes-Cantor being a special case of both of the others.

The Jin and Nei (1990) correction for variation in rate of evolution from site to site can be adapted to all of the first three models. It assumes that the rate of substitution varies from site to site according to a gamma distribution, with a coefficient of variation that is specified by the user. The user is asked for it when choosing this option in the menu.

Each distance that is calculated is an estimate, from that particular pair of species, of the divergence time between those two species. For the Jukes- Cantor model, the estimate is computed using the formula for ut given above, as long as the nucleotide symbols in the two sequences are all either A, C, G, T, U, N, X, ?, or - (the latter four indicate a deletion or an unknown nucleotide. This estimate is a maximum likelihood estimate for that model. For the Kimura 2-parameter model, with only these nucleotide symbols, formulas special to that estimate are also computed. These are also, in effect, computing the maximum likelihood estimate for that model. In the Kimura case it depends on the observed sequences only through the sequence length and the observed number of transition and transversion differences between those two sequences. The calculation in that case is a maximum likelihood estimate and will differ somewhat from the estimate obtained from the formulas in Kimura's original paper. That formula was also a maximum likelihood estimate, but with the transition/transversion ratio estimated empirically, separately for each pair of sequences. In the present case, one overall preset transition/transversion ratio is used which makes the computations harder but achieves greater consistency between different comparisons.

For the F84 model, or for any of the models where one or both sequences contain at least one of the other ambiguity codons such as Y, R, etc., a maximum likelihood calculation is also done using code which was originally written for DNAML. Its disadvantage is that it is slow. The resulting distance is in effect a maximum likelihood estimate of the divergence time (total branch length between) the two sequences. However the present program will be much faster than versions earlier than 3.5, because I have speeded up the iterations.

The LogDet model computes the distance from the determinant of the matrix of co-occurrence of nucleotides in the two species, according to the formula

   D  = - 1/4(loge(|F|) - 1/2loge(fA1fC1fG1fT1fA2fC2fG2fT2))
Where F is a matrix whose (i,j) element is the fraction of sites at which base i occurs in one species and base j occurs in the other. fji is the fraction of sites at which species i has base j. The LogDet distance cannot cope with ambiguity codes. It must have completely defined sequences. One limitation of the LogDet distance is that it may be infinite sometimes, if there are too many changes between certain pairs of nucleotides. This can be particularly noticeable with distances computed from bootstrapped sequences.

Note that there is an assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias the distances by making them too large, and that in turn would cause the distances to misinterpret the meaning of those sites that had changed.

For all of these distance methods, the program allows us to specify that "third position" bases have a different rate of substitution than first and second positions, that introns have a different rate than exons, and so on. The Categories option which does this allows us to make up to 9 categories of sites and specify different rates of change for them.

In addition to the four distance calculations, the program can also compute a table of similarities between nucleotide sequences. These values are the fractions of sites identical between the sequences. The diagonal values are 1.0000. No attempt is made to count similarity of nonidentical nucleotides, so that no credit is given for having (for example) different purines at corresponding sites in the two sequences. This option has been requested by many users, who need it for descriptive purposes. It is not intended that the table be used for inferring the tree.

INPUT FORMAT AND OPTIONS

Input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion -- neither is dot (".").

The options are selected using an interactive menu. The menu looks like this:


Nucleic acid sequence Distance Matrix program, version 3.6a3

Settings for this run:
  D  Distance (F84, Kimura, Jukes-Cantor, LogDet)?  F84
  G          Gamma distributed rates across sites?  No
  T                 Transition/transversion ratio?  2.0
  C            One category of substitution rates?  Yes
  W                         Use weights for sites?  No
  F                Use empirical base frequencies?  Yes
  L                       Form of distance matrix?  Square
  M                    Analyze multiple data sets?  No
  I                   Input sequences interleaved?  Yes
  0            Terminal type (IBM PC, ANSI, none)?  (none)
  1             Print out the data at start of run  No
  2           Print indications of progress of run  Yes

  Y to accept these or type the letter for one to change

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The D option selects one of the four distance methods, or the similarity table. It toggles among the five methods. The default method, if none is specified, is the F84 model.

If the G (Gamma distribution) option is selected, the user will be asked to supply the coefficient of variation of the rate of substitution among sites. This is different from the parameters used by Nei and Jin but related to them: their parameter a is also known as "alpha", the shape parameter of the Gamma distribution. It is related to the coefficient of variation by

     CV = 1 / a1/2

or

     a = 1 / (CV)2

(their parameter b is absorbed here by the requirement that time is scaled so that the mean rate of evolution is 1 per unit time, which means that a = b). As we consider cases in which the rates are less variable we should set a larger and larger, as CV gets smaller and smaller.

The F (Frequencies) option appears when the Maximum Likelihood distance is selected. This distance requires that the program be provided with the equilibrium frequencies of the four bases A, C, G, and T (or U). Its default setting is one which may save users much time. If you want to use the empirical frequencies of the bases, observed in the input sequences, as the base frequencies, you simply use the default setting of the F option. These empirical frequencies are not really the maximum likelihood estimates of the base frequencies, but they will often be close to those values (what they are is maximum likelihood estimates under a "star" or "explosion" phylogeny). If you change the setting of the F option you will be prompted for the frequencies of the four bases. These must add to 1 and are to be typed on one line separated by blanks, not commas.

The T option in this program does not stand for Threshold, but instead is the Transition/transversion option. The user is prompted for a real number greater than 0.0, as the expected ratio of transitions to transversions. Note that this is not the ratio of the first to the second kinds of events, but the resulting expected ratio of transitions to transversions. The exact relationship between these two quantities depends on the frequencies in the base pools. The default value of the T parameter if you do not use the T option is 2.0.

The C option allows user-defined rate categories. The user is prompted for the number of user-defined rates, and for the rates themselves, which cannot be negative but can be zero. These numbers, which must be nonnegative (some could be 0), are defined relative to each other, so that if rates for three categories are set to 1 : 3 : 2.5 this would have the same meaning as setting them to 2 : 6 : 5. The assignment of rates to sites is then made by reading a file whose default name is "categories". It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. Thus the categories file might look like this:

122231111122411155
1155333333444

The L option specifies that the output file is to have the distance matrix in lower triangular form.

The W (Weights) option is invoked in the usual way, with only weights 0 and 1 allowed. It selects a set of sites to be analyzed, ignoring the others. The sites selected are those with weight 1. If the W option is not invoked, all sites are analyzed. The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file.

The M (multiple data sets) option will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets from the input file. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights. Note also that when we use multiple weights for bootstrapping we can also then maintain different rate categories for different sites in a meaningful way. You should not use the multiple data sets option without using multiple weights, you should not at the same time use the user-defined rate categories option (option C).

The options 0 is the usual one. It is described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

OUTPUT FORMAT

As the distances are computed, the program prints on your screen or terminal the names of the species in turn, followed by one dot (".") for each other species for which the distance to that species has been computed. Thus if there are ten species, the first species name is printed out, followed by nine dots, then on the next line the next species name is printed out followed by eight dots, then the next followed by seven dots, and so on. The pattern of dots should form a triangle. When the distance matrix has been written out to the output file, the user is notified of that.

The output file contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. If the L option is used, the matrix or distances is in lower triangular form, so that only the distances to the other species that precede each species are printed. Otherwise the distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs.

If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters, and the identity of the distance measure that is being used.

If the C (Categories) option is used a table of the relative rates of expected substitution at each category of sites is printed, and a listing of the categories each site is in.

There will then follow the equilibrium frequencies of the four bases. If the Jukes-Cantor or Kimura distances are used, these will necessarily be 0.25 : 0.25 : 0.25 : 0.25. The output then shows the transition/transversion ratio that was specified or used by default. In the case of the Jukes-Cantor distance this will always be 0.5. The transition-transversion parameter (as opposed to the ratio) is also printed out: this is used within the program and can be ignored. There then follow the data sequences, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats.

The distances printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes may occur in the same site and overlie or even reverse each other. The branch lengths estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

One problem that can arise is that two or more of the species can be so dissimilar that the distance between them would have to be infinite, as the likelihood rises indefinitely as the estimated divergence time increases. For example, with the Jukes-Cantor model, if the two sequences differ in 75% or more of their positions then the estimate of dovergence time would be infinite. Since there is no way to represent an infinite distance in the output file, the program regards this as an error, issues an error message indicating which pair of species are causing the problem, and stops. It might be that, had it continued running, it would have also run into the same problem with other pairs of species. If the Kimura distance is being used there may be no error message; the program may simply give a large distance value (it is iterating towards infinity and the value is just where the iteration stopped). Likewise some maximum likelihood estimates may also become large for the same reason (the sequences showing more divergence than is expected even with infinite branch length). I hope in the future to add more warning messages that would alert the user the this.

If the similarity table is selected, the table that is produced is not in a format that can be used as input to the distance matrix programs. it has a heading, and the species names are also put at the tops of the columns of the table (or rather, the first 8 characters of each species name is there, the other two characters omitted to save space). There is not an option to put the table into a format that can be read by the distance matrix programs, nor is there one to make it into a table of fractions of difference by subtracting the similarity values from 1. This is done deliberately to make it more difficult for the use to use these values to construct trees. The similarity values are not corrected for multiple changes, and their use to construct trees (even after converting them to fractions of difference) would be wrong, as it would lead to severe conflict between the distant pairs of sequences and the close pairs of sequences.

PROGRAM CONSTANTS

The constants that are available to be changed by the user at the beginning of the program include "maxcategories", the maximum number of site categories, "iterations", which controls the number of times the program iterates the EM algorithm that is used to do the maximum likelihood distance, "namelength", the length of species names in characters, and "epsilon", a parameter which controls the accuracy of the results of the iterations which estimate the distances. Making "epsilon" smaller will increase run times but result in more decimal places of accuracy. This should not be necessary.

The program spends most of its time doing real arithmetic. The algorithm, with separate and independent computations occurring for each pattern, lends itself readily to parallel processing.


TEST DATA SET

   5   13
Alpha     AACGTGGCCACAT
Beta      AAGGTCGCCACAC
Gamma     CAGTTCGCCACAA
Delta     GAGATTTCCGCCT
Epsilon   GAGATCTCCGCCC


CONTENTS OF OUTPUT FILE (with all numerical options on)

(Note that when the options for displaying the input data are turned off, the output is in a form suitable for use as an input file in the distance matrix programs).


Nucleic acid sequence Distance Matrix program, version 3.6a3

 5 species,  13  sites

  F84 Distance

Transition/transversion ratio =   2.000000

Name            Sequences
----            ---------

Alpha        AACGTGGCCA CAT
Beta         AAGGTCGCCA CAC
Gamma        CAGTTCGCCA CAA
Delta        GAGATTTCCG CCT
Epsilon      GAGATCTCCG CCC



Empirical Base Frequencies:

   A       0.24615
   C       0.36923
   G       0.21538
  T(U)     0.16923

Alpha       0.0000  0.3039  0.8575  1.1589  1.5429
Beta        0.3039  0.0000  0.3397  0.9135  0.6197
Gamma       0.8575  0.3397  0.0000  1.6317  1.2937
Delta       1.1589  0.9135  1.6317  0.0000  0.1659
Epsilon     1.5429  0.6197  1.2937  0.1659  0.0000
PHYLIPNEW-3.69.650/doc/mix.html0000664000175000017500000003611107712247475012537 00000000000000 mix

version 3.6

MIX - Mixed method discrete characters parsimony

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

MIX is a general parsimony program which carries out the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified separately. The program defaults to carrying out Wagner parsimony.

The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin-Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969).

Here are the assumptions of these two methods:

  1. Ancestral states are known (Camin-Sokal) or unknown (Wagner).
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner).
  5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

INPUT FORMAT

The input for MIX is the standard input for discrete characters programs, described above in the documentation file for the discrete-characters programs. States "?", "P", and "B" are allowed.

The options are selected using a menu:


Mixed parsimony algorithm, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  X                     Use Mixed method?  No
  P                     Parsimony method?  Wagner
  J     Randomize input order of species?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  T              Use Threshold parsimony?  No, use ordinary parsimony
  A   Use ancestral states in input file?  No
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4     Print out steps in each character  No
  5     Print states at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The options U, X, J, O, T, A, and M are the usual User Tree, miXed methods, Jumble, Outgroup, Ancestral States, and Multiple Data Sets options, described either in the main documentation file or in the Discrete Characters Programs documentation file. The user-defined trees supplied if you use the U option must be given as rooted trees with two-way splits (bifurcations). The O option is acted upon only if the final tree is unrooted and is not a user-defined tree. One of the important uses of the the O option is to root the tree so that if there are any characters in which the ancestral states have not been specified, the program will print out a table showing which ancestral states require the fewest steps. Note that when any of the characters has Camin-Sokal parsimony assumed for it, the tree is rooted and the O option will have no effect.

The option P toggles between the Camin-Sokal parsimony criterion and the default Wagner parsimony criterion. Option X invokes mixed-method parsimony. If the A option is invoked, the ancestor is not to be counted as one of the species.

The F (Factors) option is not available in this program, as it would have no effect on the result even if that information were provided in the input file.

OUTPUT FORMAT

Output is standard: a list of equally parsimonious trees, which will be printed as rooted or unrooted depending on which is appropriate, and, if the user chooses, a table of the number of changes of state required in each character. If the Wagner option is in force for a character, it may not be possible to unambiguously locate the places on the tree where the changes occur, as there may be multiple possibilities. If the user selects menu option 5, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the Camin-Sokal parsimony method is invoked and the Ancestors option is also used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest state changes. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use MOVE to display the tree and examine its interior states, as the algorithm in MOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in MIX gives up more easily on displaying these states.

If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will also be printed out showing the best guesses of which are the ancestral states in each character. You will find it useful to understand the difference between the Camin-Sokal parsimony criterion with unknown ancestral state and the Wagner parsimony criterion.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one.

At the beginning of the program is a constant, maxtrees, the maximum number of trees which the program will store for output.

The program is descended from earlier programs SOKAL and WAGNER which have long since been removed from the PHYLIP package, since MIX has all their capabilites and more.


TEST DATA SET

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110


TEST SET OUTPUT (with all numerical options on)


Mixed parsimony algorithm, version 3.6a3

5 species, 6 characters

Wagner parsimony method


Name         Characters
----         ----------

Alpha        11011 0
Beta         11000 0
Gamma        10011 0
Delta        00100 1
Epsilon      00111 0



     4 trees in all found




           +--Epsilon   
     +-----4  
     !     +--Gamma     
  +--2  
  !  !     +--Delta     
--1  +-----3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000

steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       2   2   2   1   1   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                1?011 0
  1       2         no     .?... .
  2       4        maybe   .0... .
  4    Epsilon      yes    0.1.. .
  4    Gamma        no     ..... .
  2       3         yes    .?.00 .
  3    Delta        yes    001.. 1
  3    Beta        maybe   .1... .
  1    Alpha       maybe   .1... .





     +--------Gamma     
     !  
  +--2     +--Epsilon   
  !  !  +--4  
  !  +--3  +--Delta     
--1     !  
  !     +-----Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000

steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       1   2   1   2   2   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                1?011 0
  1       2         no     .?... .
  2    Gamma       maybe   .0... .
  2       3        maybe   .?.?? .
  3       4         yes    001?? .
  4    Epsilon     maybe   ...11 .
  4    Delta        yes    ...00 1
  3    Beta        maybe   .1.00 .
  1    Alpha       maybe   .1... .





     +--------Epsilon   
  +--4  
  !  !  +-----Gamma     
  !  +--2  
--1     !  +--Delta     
  !     +--3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000

steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       2   2   2   1   1   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                1?011 0
  1       4        maybe   .0... .
  4    Epsilon      yes    0.1.. .
  4       2         no     ..... .
  2    Gamma        no     ..... .
  2       3         yes    ...00 .
  3    Delta        yes    0.1.. 1
  3    Beta         yes    .1... .
  1    Alpha       maybe   .1... .





     +--------Gamma     
  +--2  
  !  !  +-----Epsilon   
  !  +--4  
--1     !  +--Delta     
  !     +--3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000

steps in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       2   2   2   1   1   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                1?011 0
  1       2        maybe   .0... .
  2    Gamma        no     ..... .
  2       4        maybe   ?.?.. .
  4    Epsilon     maybe   0.1.. .
  4       3         yes    ?.?00 .
  3    Delta        yes    0.1.. 1
  3    Beta         yes    110.. .
  1    Alpha       maybe   .1... .


PHYLIPNEW-3.69.650/doc/move.html0000664000175000017500000003706307712247475012717 00000000000000 move

version 3.6

MOVE - Interactive mixed method parsimony

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

MOVE is an interactive parsimony program, inspired by Wayne Maddison and David Maddison's marvellous program MacClade, which is written for Apple Macintosh computers. MOVE reads in a data set which is prepared in almost the same format as one for the mixed method parsimony program MIX. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different characters and the way their states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different characters are affected by changes in the tree topology.

This program is compatible with fewer computer systems than the other programs in PHYLIP. It can be adapted to PCDOS systems or to any system whose screen or terminals emulate DEC VT100 terminals (such as Telnet programs for logging in to remote computers over a TCP/IP network, VT100-compatible windows in the X windowing system, and any terminal compatible with ANSI standard terminals). For any other screen types, there is a generic option which does not make use of screen graphics characters to display the character states. This will be less effective, as the states will be less easy to see when displayed.

The input data file is set up almost identically to the data files for MIX.

The user interaction starts with the program presenting a menu. The menu looks like this:


Interactive mixed parsimony algorithm, version 3.6a3

Settings for this run:
  X                         Use Mixed method?  No
  P                         Parsimony method?  Wagner
  A                     Use ancestral states?  No
  F                  Use factors information?  No
  O                            Outgroup root?  No, use as outgroup species   1
  W                           Sites weighted?  No
  T                  Use Threshold parsimony?  No, use ordinary parsimony
  U  Initial tree (arbitrary, user, specify)?  Arbitrary
  0       Graphics type (IBM PC, ANSI, none)?  (none)
  S                 Width of terminal screen?  80
  L                Number of lines on screen?  24

Are these settings correct? (type Y or the letter for one to change)

The P (Parsimony method) option selects among Wagner parsimony and Camin-Sokal parsimony. If X (miXed methods) is selected the P menu item disappears, as it is then irrelevant.

The X (miXed methods), A (Ancestors), F (Factors), O (Outgroup), T (Threshold), and 0 (Graphics type) options are the usual ones and are described in the main documentation page and in the discrete characters program documentation page. The L option allows the program to take advantage of larger screens if available. The U (initial tree) option allows the user to choose whether the initial tree is to be arbitrary, interactively specified by the user, or read from a tree file. Typing U causes the program to change among the three possibilities in turn. I would recommend that for a first run, you allow the tree to be set up arbitrarily (the default), as the "specify" choice is difficult to use and the "user tree" choice requires that you have available a tree file with the tree topology of the initial tree. Its default name is intree. The program will ask you for its name if it looks for the input tree file and does not find one of this name. If you wish to set up some particular tree you can also do that by the rearrangement commands specified below. The T (threshold) option allows a continuum of methods between parsimony and compatibility. Thresholds less than or equal to 1.0 do not have any meaning and should not be used: they will result in a tree dependent only on the input order of species and not at all on the data! Note that the usual W (Weights) option is not available in MOVE. We hope to add it soon. The F (Factors) option is available in this program. It is only used to inform the program which groups of characters are to be counted together in computing the number of characters compatible with the tree. Thus if three binary characters are all factors of the same multistate character, the multistate character will be counted as compatible with the tree only if all three factors are compatible with it.

After the initial menu is displayed and the choices are made, the program then sets up an initial tree and displays it. Below it will be a one-line menu of possible commands, which looks like this:

NEXT? (Options: R # + - S . T U W O F C H ? X Q) (H or ? for Help)

If you type H or ? you will get a single screen showing a description of each of these commands in a few words. Here are slightly more detailed descriptions:

R
("Rearrange"). This command asks for the number of a node which is to be removed from the tree. It and everything to the right of it on the tree is to be removed (by breaking the branch immediately below it). The command also asks for the number of a node below which that group is to be inserted. If an impossible number is given, the program refuses to carry out the rearrangement and asks for a new command. The rearranged tree is displayed: it will often have a different number of steps than the original. If you wish to undo a rearrangement, use the Undo command, for which see below.

#
This command, and the +, - and S commands described below, determine which character has its states displayed on the branches of the trees. The initial tree displayed by the program does not show states of sites. When # is typed, the program does not ask the user which character is to be shown but automatically shows the states of the next binary character that is not compatible with the tree (the next character that does not perfectly fit the current tree). The search for this character "wraps around" so that if it reaches the last character without finding one that is not compatible with the tree, the search continues at the first character; if no incompatible character is found the current character is shown, and if no current character is shown then the first character is shown. The display takes the form of different symbols or textures on the branches of the tree. The state of each branch is actually the state of the node above it. A key of the symbols or shadings used for states 0, 1 and ? are shown next to the tree. State ? means that either state 0 or state 1 could exist at that point on the tree, and that the user may want to consider the different possibilities, which are usually apparent by inspection.

+
This command is the same as # except that it goes forward one character, showing the states of the next character. If no character has been shown, using + will cause the first character to be shown. Once the last character has been reached, using + again will show the first character.

-
This command is the same as + except that it goes backwards, showing the states of the previous character. If no character has been shown, using - will cause the last character to be shown. Once character number 1 has been reached, using - again will show the last character.

S
("Show"). This command is the same as + and - except that it causes the program to ask you for the number of a character. That character is the one whose states will be displayed. If you give the character number as 0, the program will go back to not showing the states of the characters.

. (dot)
This command simply causes the current tree to be redisplayed. It is of use when the tree has partly disappeared off of the top of the screen owing to too many responses to commands being printed out at the bottom of the screen.

T
("Try rearrangements"). This command asks for the name of a node. The part of the tree at and above that node is removed from the tree. The program tries to re-insert it in each possible location on the tree (this may take some time, and the program reminds you to wait). Then it prints out a summary. For each possible location the program prints out the number of the node to the right of the place of insertion and the number of steps required in each case. These are divided into those that are better, tied, or worse than the current tree. Once this summary is printed out, the group that was removed is inserted into its original position. It is up to you to use the R command to actually carry out any the arrangements that have been tried.

U
("Undo"). This command reverses the effect of the most recent rearrangement, outgroup re-rooting, or flipping of branches. It returns to the previous tree topology. It will be of great use when rearranging the tree and when a rearrangement proves worse than the preceding one -- it permits you to abandon the new one and return to the previous one without remembering its topology in detail.

W
("Write"). This command writes out the current tree onto a tree output file. If the file already has been written to by this run of MOVE, it will ask you whether you want to replace the contents of the file, add the tree to the end of the file, or not write out the tree to the file. The tree is written in the standard format used by PHYLIP (a subset of the Newick standard). It is in the proper format to serve as the User-Defined Tree for setting up the initial tree in a subsequent run of the program. Note that if you provided the initial tree topology in a tree file and replace its contents, that initial tree will be lost.

O
("Outgroup"). This asks for the number of a node which is to be the outgroup. The tree will be redisplayed with that node as the left descendant of the bottom fork. Under some options (for example the Camin-Sokal parsimony method or the Ancestor state options), the number of steps required on the tree may change on re-rooting. Note that it is possible to use this to make a multi-species group the outgroup (i.e., you can give the number of an interior node of the tree as the outgroup, and the program will re-root the tree properly with that on the left of the bottom fork).

F
("Flip"). This asks for a node number and then flips the two branches at that node, so that the left-right order of branches at that node is changed. This does not actually change the tree topology (or the number of steps on that tree) but it does change the appearance of the tree.
.br
C
("Clade"). When the data consist of more than 12 species (or more than half the number of lines on the screen if this is not 24), it may be difficult to display the tree on one screen. In that case the tree will be squeezed down to one line per species. This is too small to see all the interior states of the tree. The C command instructs the program to print out only that part of the tree (the "clade") from a certain node on up. The program will prompt you for the number of this node. Remember that thereafter you are not looking at the whole tree. To go back to looking at the whole tree give the C command again and enter "0" for the node number when asked. Most users will not want to use this option unless forced to.

H
("Help"). Prints a one-screen summary of what the commands do, a few words for each command.

?
("huh?"). A synonym for H. Same as Help command.

X
("Exit"). Exit from program. If the current tree has not yet been saved into a file, the program will ask you whether it should be saved.

Q
("Quit"). A synonym for X. Same as the eXit command.

ADAPTING THE PROGRAM TO YOUR COMPUTER AND TO YOUR TERMINAL

As we have seen, the initial menu of the program allows you to choose among three screen types (PC, ANSI, and none). If you want to avoid having to make this choice every time, you can change some of the constants in the file phylip.h to have the terminal type initialize itself in the proper way, and recompile. The constants that need attention are ANSICRT and IBMCRT. Currently these are both set to "false" on Macintosh and on Unix/Linux systems, and IBMCRT is set to "true" on Windows systems. If your system has an ANSI compatible terminal, you might want to find the definition of ANSICRT in phylip.h and set it to "true", and IBMCRT to "false".

MORE ABOUT THE PARSIMONY CRITERION

MOVE uses as its numerical criterion the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified separately. The program defaults to carrying out Wagner parsimony.

The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin- Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969).

Here are the assumptions of these two methods:

  1. Ancestral states are known (Camin-Sokal) or unknown (Wagner).
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner).
  5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Below is a test data set, but we cannot show the output it generates because of the interactive nature of the program.


TEST DATA SET

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110
PHYLIPNEW-3.69.650/doc/protdist.html0000664000175000017500000004547507712247475013627 00000000000000 protdist

version 3.6

PROTDIST -- Program to compute distance matrix
from protein sequences

© Copyright 1993, 2000-2002 by the University of Washington. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program uses protein sequences to compute a distance matrix, under four different models of amino acid replacement. It can also compute a table of similarity between the amino acid sequences. The distance for each pair of species estimates the total branch length between the two species, and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR. This is an alternative to use of the sequence data itself in the parsimony program PROTPARS.

The program reads in protein sequences and writes an output file containing the distance matrix or similarity table. The four models of amino acid substitution are one which is based on the Jones, Taylor and Thornton (1992) model of amino acid change, one based on the PAM matrixes of Margaret Dayhoff, one due to Kimura (1983) which approximates it based simply on the fraction of similar amino acids, and one based on a model in which the amino acids are divided up into groups, with change occurring based on the genetic code but with greater difficulty of changing between groups. The program correctly takes into account a variety of sequence ambiguities.

The four methods are:

(1) The Dayhoff PAM matrix. This uses Dayhoff's PAM 001 matrix from Dayhoff (1979), page 348. The PAM model is an empirical one that scales probabilities of change from one amino acid to another in terms of a unit which is an expected 1% change between two amino acid sequences. The PAM 001 matrix is used to make a transition probability matrix which allows prediction of the probability of changing from any one amino acid to any other, and also predicts equilibrium amino acid composition. The program assumes that these probabilities are correct and bases its computations of distance on them. The distance that is computed is scaled in units of expected fraction of amino acids changed. This is a unit of 100 PAM's.

(2) The Jones-Taylor-Thornton model. This is similar to the Dayhoff PAM model, except that it is based on a recounting of the number of observed changes in amino acids by Jones, Taylor, and Thornton (1992). They used a much larger sample of protein sequences than did Dayhoff. The distance is scaled in units of the expected fraction of amino acids changed (100 PAM's). Because its sample is so much larger this model is to be preferred over the original Dayhoff PAM model. It is the default model in this program.

(3) Kimura's distance. This is a rough-and-ready distance formula for approximating PAM distance by simply measuring the fraction of amino acids, p, that differs between two sequences and computing the distance as (Kimura, 1983)

     D = - loge ( 1 - p - 0.2 p2 ).

This is very quick to do but has some obvious limitations. It does not take into account which amino acids differ or to what amino acids they change, so some information is lost. The units of the distance measure are fraction of amino acids differing, as also in the case of the PAM distance. If the fraction of amino acids differing gets larger than 0.8541 the distance becomes infinite.

(4) The Categories distance. This is my own concoction. I imagined a nucleotide sequence changing according to Kimura's 2-parameter model, with the exception that some changes of amino acids are less likely than others. The amino acids are grouped into a series of categories. Any base change that does not change which category the amino acid is in is allowed, but if an amino acid changes category this is allowed only a certain fraction of the time. The fraction is called the "ease" and there is a parameter for it, which is 1.0 when all changes are allowed and near 0.0 when changes between categories are nearly impossible.

In this option I have allowed the user to select the Transition/Transversion ratio, which of several genetic codes to use, and which categorization of amino acids to use. There are three of them, a somewhat random sample:

(a)
The George-Hunt-Barker (1988) classification of amino acids,
(b)
A classification provided by my colleague Ben Hall when I asked him for one,
(c)
One I found in an old "baby biochemistry" book (Conn and Stumpf, 1963), which contains most of the biochemistry I was ever taught, and all that I ever learned.

Interestingly enough, all of them are consisten with the same linear ordering of amino acids, which they divide up in different ways. For the Categories model I have set as default the George/Hunt/Barker classification with the "ease" parameter set to 0.457 which is approximately the value implied by the empirical rates in the Dayhoff PAM matrix.

The method uses, as I have noted, Kimura's (1980) 2-parameter model of DNA change. The Kimura "2-parameter" model allows for a difference between transition and transversion rates. Its transition probability matrix for a short interval of time is:

              To:     A        G        C        T
                   ---------------------------------
               A  | 1-a-2b     a         b       b
       From:   G  |   a      1-a-2b      b       b
               C  |   b        b       1-a-2b    a
               T  |   b        b         a     1-a-2b

where a is u dt, the product of the rate of transitions per unit time and dt is the length dt of the time interval, and b is v dt, the product of half the rate of transversions (i.e., the rate of a specific transversion) and the length dt of the time interval.

Each distance that is calculated is an estimate, from that particular pair of species, of the divergence time between those two species. The Kimura distance is straightforward to compute. The other two are considerably slower, and they look at all positions, and find that distance which makes the likelihood highest. This likelihood is in effect the length of the internal branch in a two-species tree that connects these two species. Its likelihood is just the product, under the model, of the probabilities of each position having the (one or) two amino acids that are actually found. This is fairly slow to compute.

The computation proceeds from an eigenanalysis (spectral decomposition) of the transition probability matrix. In the case of the PAM 001 matrix the eigenvalues and eigenvectors are precomputed and are hard-coded into the program in over 400 statements. In the case of the Categories model the program computes the eigenvalues and eigenvectors itself, which will add a delay. But the delay is independent of the number of species as the calculation is done only once, at the outset.

The actual algorithm for estimating the distance is in both cases a bisection algorithm which tries to find the point at which the derivative os the likelihood is zero. Some of the kinds of ambiguous amino acids like "glx" are correctly taken into account. However, gaps are treated as if they are unkown nucleotides, which means those positions get dropped from that particular comparison. However, they are not dropped from the whole analysis. You need not eliminate regions containing gaps, as long as you are reasonably sure of the alignment there.

Note that there is an assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias the distances by making them too large, and that in turn would cause the distances to misinterpret the meaning of those positions that had changed.

The program can now correct distances for unequal rates of change at different amino acid positions. This correction, which was introduced for DNA sequences by Jin and Nei (1990), assumes that the distribution of rates of change among amino acid positions follows a Gamma distribution. The user is asked for the value of a parameter that determines the amount of variation of rates among amino acid positions. Instead of the more widely-known coefficient alpha, PROTDIST uses the coefficient of variation (ratio of the standard deviation to the mean) of rates among amino acid positions. . So if there is 20% variation in rates, the CV is is 0.20. The square of the C.V. is also the reciprocal of the better-known "shape parameter", alpha, of the Gamma distribution, so in this case the shape parameter alpha = 1/(0.20*0.20) = 25. If you want to achieve a particular value of alpha, such as 10, you will want to use a CV of 1/sqrt(100) = 1/10 = 0.1.

In addition to the four distance calculations, the program can also compute a table of similarities between amino acid sequences. These values are the fractions of amino acid positions identical between the sequences. The diagonal values are 1.0000. No attempt is made to count similarity of nonidentical amino acids, so that no credit is given for having (for example) different hydrophobic amino acids at the corresponding positions in the two sequences. This option has been requested by many users, who need it for descriptive purposes. It is not intended that the table be used for inferring the tree.

INPUT FORMAT AND OPTIONS

Input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites. There follows the character W if the Weights option is being used.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

After that are the lines (if any) containing the information for the W option, as described below.

The options are selected using an interactive menu. The menu looks like this:


Protein distance algorithm, version 3.6a3

Settings for this run:
  P     Use JTT, PAM, Kimura or categories model?  Jones-Taylor-Thornton matrix
  G  Gamma distribution of rates among positions?  No
  C           One category of substitution rates?  Yes
  W                    Use weights for positions?  No
  M                   Analyze multiple data sets?  No
  I                  Input sequences interleaved?  Yes
  0                 Terminal type (IBM PC, ANSI)?  (none)
  1            Print out the data at start of run  No
  2          Print indications of progress of run  Yes

Are these settings correct? (type Y or the letter for one to change)

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The G option chooses Gamma distributed rates of evolution across amino acid psoitions. The program will pronmpt you for the Coefficient of Variation of rates. As is noted above, thi is 1/sqrt(alpha) if alpha is the more familiar "shape coefficient" of the Gamma distribution. If the G option is not selected, the program defaults to having no variation of rates among sites.

The options M and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The P option selects one of the four distance methods, or the similarity table. It toggles among these five methods. The default method, if none is specified, is the Jones-Taylor-Thornton model. If the Categories distance is selected another menu option, T, will appear allowing the user to supply the Transition/Transversion ratio that should be assumed at the underlying DNA level, and another one, C, which allows the user to select among various nuclear and mitochondrial genetic codes.i The transition/transversion ratio can be any number from 0.5 upwards.

The W (Weights) option is invoked in the usual way, with only weights 0 and 1 allowed. It selects a set of sites to be analyzed, ignoring the others. The sites selected are those with weight 1. If the W option is not invoked, all sites are analyzed.

OUTPUT FORMAT

As the distances are computed, the program prints on your screen or terminal the names of the species in turn, followed by one dot (".") for each other species for which the distance to that species has been computed. Thus if there are ten species, the first species name is printed out, followed by one dot, then on the next line the next species name is printed out followed by two dots, then the next followed by three dots, and so on. The pattern of dots should form a triangle. When the distance matrix has been written out to the output file, the user is notified of that.

The output file contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. The distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs.

If the similarity table is selected, the table that is produced is not in a format that can be used as input to the distance matrix programs. it has a heading, and the species names are also put at the tops of the columns of the table (or rather, the first 8 characters of each species name is there, the other two characters omitted to save space). There is not an option to put the table into a format that can be read by the distance matrix programs, nor is there one to make it into a table of fractions of difference by subtracting the similarity values from 1. This is done deliberately to make it more difficult for the use to use these values to construct trees. The similarity values are not corrected for multiple changes, and their use to construct trees (even after converting them to fractions of difference) would be wrong, as it would lead to severe conflict between the distant pairs of sequences and the close pairs of sequences.

If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters, and the identity of the distance measure that is being used.

In the Categories model of substitution, the distances printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change is set to 1.0. For the Dayhoff PAM and Kimura models the distance are scaled in terms of the expected numbers of amino acid substitutions per site. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes may occur in the same site and overlie or even reverse each other. The branch lengths estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the protein (or nucleotide) sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

One problem that can arise is that two or more of the species can be so dissimilar that the distance between them would have to be infinite, as the likelihood rises indefinitely as the estimated divergence time increases. For example, with the Kimura model, if the two sequences differ in 85.41% or more of their positions then the estimate of divergence time would be infinite. Since there is no way to represent an infinite distance in the output file, the program regards this as an error, issues a warning message indicating which pair of species are causing the problem, and computes a distance of -1.0.

PROGRAM CONSTANTS

The constants that are available to be changed by the user at the beginning of the program include "namelength", the length of species names in characters, and "epsilon", a parameter which controls the accuracy of the results of the iterations which estimate the distances. Making "epsilon" smaller will increase run times but result in more decimal places of accuracy. This should not be necessary.

The program spends most of its time doing real arithmetic. Any software or hardware changes that speed up that arithmetic will speed it up by a nearly proportional amount.


TEST DATA SET

(Note that although these may look like DNA sequences, they are being treated as protein sequences consisting entirely of alanine, cystine, glycine, and threonine).

   5   13
Alpha     AACGTGGCCACAT
Beta      AAGGTCGCCACAC
Gamma     CAGTTCGCCACAA
Delta     GAGATTTCCGCCT
Epsilon   GAGATCTCCGCCC


CONTENTS OF OUTPUT FILE (with all numerical options on )

(Note that when the numerical options are not on, the output file produced is in the correct format to be used as an input file in the distance matrix programs).


  Jones-Taylor-Thornton model distance

Name            Sequences
----            ---------

Alpha        AACGTGGCCA CAT
Beta         ..G..C.... ..C
Gamma        C.GT.C.... ..A
Delta        G.GA.TT..G .C.
Epsilon      G.GA.CT..G .CC



Alpha       0.0000  0.3304  0.6257  1.0320  1.3541
Beta        0.3304  0.0000  0.3756  1.0963  0.6776
Gamma       0.6257  0.3756  0.0000  0.9758  0.8616
Delta       1.0320  1.0963  0.9758  0.0000  0.2267
Epsilon     1.3541  0.6776  0.8616  0.2267  0.0000
PHYLIPNEW-3.69.650/doc/sequence.html0000664000175000017500000004005607712247476013556 00000000000000 sequence

version 3.6

Molecular Sequence Programs

(c) Copyright 1986-2000 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

These programs estimate phylogenies from protein sequence or nucleic acid sequence data. PROTPARS uses a parsimony method intermediate between Eck and Dayhoff's method (1966) of allowing transitions between all amino acids and counting those, and Fitch's (1971) method of counting the number of nucleotide changes that would be needed to evolve the protein sequence. DNAPARS uses the parsimony method allowing changes between all bases and counting the number of those. DNAMOVE is an interactive parsimony program allowing the user to rearrange trees by hand and see where characters states change. DNAPENNY uses the branch-and-bound method to search for all most parsimonious trees in the nucleic acid sequence case. DNACOMP adapts to nucleotide sequences the compatibility (largest clique) approach. DNAINVAR does not directly estimate a phylogeny, but computes Lake's (1987) and Cavender's (Cavender and Felsenstein, 1987) phylogenetic invariants, which are quantities whose values depend on the phylogeny. DNAML does a maximum likelihood estimate of the phylogeny (Felsenstein, 1981a). DNAMLK is similar to DNAML but assumes a molecular clock. DNADIST computes distance measures between pairs of species from nucleotide sequences, distances that can then be used by the distance matrix programs FITCH and KITSCH. RESTML does a maximum likelihood estimate from restriction sites data. SEQBOOT allows you to read in a data set and then produce multiple data sets from it by bootstrapping, delete-half jackknifing, or by permuting within sites. This then allows most of these methods to be bootstrapped or jackknifed, and for the Permutation Tail Probability Test of Archie (1989) and Faith and Cranston (1991) to be carried out.

The input and output format for RESTML is described in its document files. In general its input format is similar to those described here, except that the one-letter codes for restriction sites is specific to that program and is described in that document file. Since the input formats for the eight DNA sequence and two protein sequence programs apply to more than one program, they are described here. Their input formats are standard, making use of the IUPAC standards. .sp 2 .ce INTERLEAVED AND SEQUENTIAL FORMATS

The sequences can continue over multiple lines; when this is done the sequences must be either in "interleaved" format, similar to the output of alignment programs, or "sequential" format. These are described in the main document file. In sequential format all of one sequence is given, possibly on multiple lines, before the next starts. In interleaved format the first part of the file should contain the first part of each of the sequences, then possibly a line containing nothing but a carriage-return character, then the second part of each sequence, and so on. Only the first parts of the sequences should be preceded by names. Here is a hypothetical example of interleaved format:

  5    42
Turkey    AAGCTNGGGC ATTTCAGGGT
Salmo gairAAGCCTTGGC AGTGCAGGGT
H. SapiensACCGGTTGGC CGTTCAGGGT
Chimp     AAACCCTTGC CGTTACGCTT
Gorilla   AAACCCTTGC CGGTACGCTT

GAGCCCGGGC AATACAGGGT AT
GAGCCGTGGC CGGGCACGGT AT
ACAGGTTGGC CGTTCAGGGT AA
AAACCGAGGC CGGGACACTC AT
AAACCATTGC CGGTACGCTT AA

while in sequential format the same sequences would be:

  5    42
Turkey    AAGCTNGGGC ATTTCAGGGT
GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT
GAGCCGTGGC CGGGCACGGT AT
H. SapiensACCGGTTGGC CGTTCAGGGT
ACAGGTTGGC CGTTCAGGGT AA
Chimp     AAACCCTTGC CGTTACGCTT
AAACCGAGGC CGGGACACTC AT
Gorilla   AAACCCTTGC CGGTACGCTT
AAACCATTGC CGGTACGCTT AA

Note, of course, that a portion of a sequence like this:

300 AAGCGTGAAC GTTGTACTAA TRCAG

is perfectly legal, assuming that the species name has gone before, and is filled out to full length by blanks. The above digits and blanks will be ignored, the sequence being taken as starting at the first base symbol (in this case an A). This should enable you to use output from many multiple-sequence alignment programs with only minimal editing.

In interleaved format the present versions of the programs may sometimes have difficulties with the blank lines between groups of lines, and if so you might want to retype those lines, making sure that they have only a carriage-return and no blank characters on them, or you may perhaps have to eliminate them. The symptoms of this problem are that the programs complain that the sequences are not properly aligned, and you can find no other cause for this complaint.

INPUT FOR THE DNA SEQUENCE PROGRAMS

The input format for the DNA sequence programs is standard: the data have A's, G's, C's and T's (or U's). The first line of the input file contains the number of species and the number of sites. As with the other programs, options information may follow this. Following this, each species starts on a new line. The first 10 characters of that line are the species name. There then follows the base sequence of that species, each character being one of the letters A, B, C, D, G, H, K, M, N, O, R, S, T, U, V, W, X, Y, ?, or - (a period was also previously allowed but it is no longer allowed, because it sometimes is used in different senses in other programs). Blanks will be ignored, and so will numerical digits. This allows GENBANK and EMBL sequence entries to be read with minimum editing.

These characters can be either upper or lower case. The algorithms convert all input characters to upper case (which is how they are treated). The characters constitute the IUPAC (IUB) nucleic acid code plus some slight extensions. They enable input of nucleic acid sequences taking full account of any ambiguities in the sequence.

SymbolMeaning
AAdenine
GGuanine
CCytosine
TThymine
UUracil
YpYrimidine(C or T)
RpuRine(A or G)
W"Weak"(A or T)
S"Strong"(C or G)
K"Keto"(T or G)
M"aMino"(C or A)
Bnot A(C or G or T)
Dnot C(A or G or T)
Hnot G(A or C or T)
Vnot T(A or C or G)
X,N,?unknown(A or C or G or T)
Odeletion
-deletion

INPUT FOR THE PROTEIN SEQUENCE PROGRAMS

The input for the protein sequence programs is fairly standard. The first line contains the number of species and the number of amino acid positions (counting any stop codons that you want to include). These are followed on the same line by the options. The only options which need information in the input file are U (User Tree) and W (Weights). They are as described in the main documentation file. If the W (Weights) option is used there must be a W in the first line of the input file.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The protein sequences are given by the one-letter code used by the late Margaret Dayhoff's group in the Atlas of Protein Sequences, and consistent with the IUB standard abbreviations. In the present version it is:

SymbolStands for
Aala
Basx
Ccys
Dasp
Eglu
Fphe
Ggly
Hhis
Iileu
J(not used)
Klys
Lleu
Mmet
Nasn
O(not used)
Ppro
Qgln
Rarg
Sser
Tthr
U(not used)
Vval
Wtrp
Xunknown amino acid
Ytyr
Zglx
*nonsense (stop)
?unknown amino acid or deletion
-deletion

where "nonsense", and "unknown" mean respectively a nonsense (chain termination) codon and an amino acid whose identity has not been determined. The state "asx" means "either asn or asp", and the state "glx" means "either gln or glu" and the state "deletion" means that alignment studies indicate a deletion has happened in the ancestry of this position, so that it is no longer present. Note that if two polypeptide chains are being used that are of different length owing to one terminating before the other, they can be coded as (say)

             HIINMA*????
             HIPNMGVWABT
since after the stop codon we do not definitely know that there has been a deletion, and do not know what amino acid would have been there. If DNA studies tell us that there is DNA sequence in that region, then we could use "X" rather than "?". Note that "X" means an unknown amino acid, but definitely an amino acid, while "?" could mean either that or a deletion. Otherwise one will usually want to use "?" after a stop codon, if one does not know what amino acid is there. If the DNA sequence has been observed there, one probably ought to resist putting in the amino acids that this DNA would code for, and one should use "X" instead, because under the assumptions implicit in this either the parsimony or the distance methods, changes to any noncoding sequence are much easier than changes in a coding region that change the amino acid

Here are the same one-letter codes tabulated the other way 'round:

Amino acidOne-letter code
alaA
argR
asnN
aspD
asxB
cysC
glnQ
gluE
glyG
glxZ
hisH
ileuI
leuL
lysK
metM
pheF
proP
serS
thrT
trpW
tyrY
valV
deletion-
nonsense (stop)*
unknown amino acidX
unknown (incl. deletion)?

THE OPTIONS

The programs allow options chosen from their menus. Many of these are as described in the main documentation file, particularly the options J, O, U, T, W, and Y. (Although T has a different meaning in the programs DNAML and DNADIST than in the others).

The U option indicates that user-defined trees are provided at the end of the input file. This happens in the usual way, except that for PROTPARS, DNAPARS, DNACOMP, and DNAMLK, the trees must be strictly bifurcating, containing only two-way splits, e. g.: ((A,B),(C,(D,E)));. For DNAML and RESTML it must have a trifurcation at its base, e. g.: ((A,B),C,(D,E));. The root of the tree may in those cases be placed arbitrarily, since the trees needed are actually unrooted, though they look different when printed out. The program RETREE should enable you to reroot the trees without having to hand-edit or retype them. For DNAMOVE the U option is not available (although there is an equivalent feature which uses rooted user trees).

A feature of the nucleotide sequence programs other than DNAMOVE is that they save time and computer memory space by recognizing sites at which the pattern of bases is the same, and doing their computation only once. Thus if we have only four species but a large number of sites, there are (ignoring ambiguous bases) only about 256 different patterns of nucleotides (4 x 4 x 4 x 4) that can occur. The programs automatically count how many occurrences there are of each and then only needs to do as much computation as would be needed with 256 sites, even though the number of sites is actually much larger. If there are ambiguities (such as Y or R nucleotides), these are also handled correctly, and do not cause trouble. The programs store the full sequences but reserve other space for bookkeeping only for the distinct patterns. This saves space. Thus the programs will run very effectively with few species and many sites. On larger numbers of species, if rates of evolution are small, many of the sites will be invariant (such as having all A's) and thus will mostly have one of four patterns. The programs will in this way automatically avoid doing duplicate computations for such sites. PHYLIPNEW-3.69.650/doc/contchar.html0000664000175000017500000001644307712247475013551 00000000000000 contchar

version 3.6

Gene Frequencies and Continuous Character Data Programs

© Copyright 1986-2000 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny.

When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions:

  1. Different lineages evolve independently.
  2. After two lineages split, their characters change independently.
  3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method).
  4. Different loci or characters drift independently.

How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c).

The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option:

     5    3
2 3 2
Alpha      0.90 0.80 0.10 0.56
Beta       0.72 0.54 0.30 0.20
Gamma      0.38 0.10 0.05  0.98
Delta      0.42 0.40 0.43 0.97
Epsilon    0.10 0.30 0.70 0.62

whereas here is what it would have to look like if the A option were invoked:

     5    3
2 3 2
Alpha      0.90 0.10 0.80 0.10 0.10 0.56 0.44
Beta       0.72 0.28 0.54 0.30 0.16 0.20 0.80
Gamma      0.38 0.62 0.10 0.05 0.85  0.98 0.02
Delta      0.42 0.58 0.40 0.43 0.17 0.97 0.03
Epsilon    0.10 0.90 0.30 0.70 0.00 0.62 0.38

The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name).

If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message.

While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points.

CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables.

CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set.

In the quantitative characters mode, a typical small data set would be:

     5   6
Alpha      0.345 0.467 1.213  2.2  -1.2 1.0
Beta       0.457 0.444 1.1    1.987 -0.2 2.678
Gamma      0.6 0.12 0.97 2.3  -0.11 1.54
Delta      0.68  0.203 0.888 2.0  1.67
Epsilon    0.297  0.22 0.90 1.9 1.74

Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale.

For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs. PHYLIPNEW-3.69.650/doc/dnainvar.html0000664000175000017500000004037107712247475013547 00000000000000 dnainvar

version 3.6

DNAINVAR -- Program to compute Lake's and Cavender's
phylogenetic invariants from nucleotide sequences

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program reads in nucleotide sequences for four species and computes the phylogenetic invariants discovered by James Cavender (Cavender and Felsenstein, 1987) and James Lake (1987). Lake's method is also called by him "evolutionary parsimony". I prefer Cavender's more mathematically precise term "invariants", as the method bears somewhat more relationship to likelihood methods than to parsimony. The invariants are mathematical formulas (in the present case linear or quadratic) in the EXPECTED frequencies of site patterns which are zero for all trees of a given tree topology, irrespective of branch lengths. We can consider at a given site that if there are no ambiguities, we could have for four species the nucleotide patterns (considering the same site across all four species) AAAA, AAAC, AAAG, ... through TTTT, 256 patterns in all.

The invariants are formulas in the expected pattern frequencies, not the observed pattern frequencies. When they are computed using the observed pattern frequencies, we will usually find that they are not precisely zero even when the model is correct and we have the correct tree topology. Only as the number of nucleotides scored becomes infinite will the observed pattern frequencies approach their expectations; otherwise, we must do a statistical test of the invariants.

Some explanation of invariants will be found in the above papers, and also in my recent review article on statistical aspects of inferring phylogenies (Felsenstein, 1988b). Although invariants have some important advantages, their validity also depends on symmetry assumptions that may not be satisfied. In the discussion below suppose that the possible unrooted phylogenies are I: ((A,B),(C,D)), II: ((A,C),(B,D)), and III: ((A,D),(B,C)).

Lake's Invariants, Their Testing and Assumptions

Lake's invariants are fairly simple to describe: the patterns involved are only those in which there are two purines and two pyrimidines at a site. Thus a site with AACT would affect the invariants, but a site with AAGG would not. Let us use (as Lake does) the symbols 1, 2, 3, and 4, with the proviso that 1 and 2 are either both of the purines or both of the pyrimidines; 3 and 4 are the other two nucleotides. Thus 1 and 2 always differ by a transition; so do 3 and 4. Lake's invariants, expressed in terms of expected frequencies, are the three quantities:

(1)      P(1133) + P(1234) - P(1134) - P(1233),

(2)      P(1313) + P(1324) - P(1314) - P(1323),

(3)      P(1331) + P(1342) - P(1341) - P(1332),

He showed that invariants (2) and (3) are zero under Topology I, (1) and (3) are zero under topology II, and (1) and (2) are zero under Topology III. If, for example, we see a site with pattern ACGC, we can start by setting 1=A. Then 2 must be G. We can then set 3=C (so that 4 is T). Thus its pattern type, making those substitutions, is 1323. P(1323) is the expected probability of the type of pattern which includes ACGC, TGAG, GTAT, etc.

Lake's invariants are easily tested with observed frequencies. For example, the first of them is a test of whether there are as many sites of types 1133 and 1234 as there are of types 1134 and 1233; this is easily tested with a chi-square test or, as in this program, with an exact binomial test. Note that with several invariants to test, we risk overestimating the significance of results if we simply accept the nominal 95% levels of significance (Li and Guoy, 1990).

Lake's invariants assume that each site is evolving independently, and that starting from any base a transversion is equally likely to end up at each of the two possible bases (thus, an A undergoing a transversion is equally likely to end up as a C or a T, and similarly for the other four bases from which one could start. Interestingly, Lake's results do not assume that rates of evolution are the same at all sites. The result that the total of 1133 and 1234 is expected to be the same as the total of 1134 and 1233 is unaffected by the fact that we may have aggregated the counts over classes of sites evolving at different rates.

Cavender's Invariants, Their Testing and Assumptions

Cavender's invariants (Cavender and Felsenstein, 1987) are for the case of a character with two states. In the nucleic acid case we can classify nucleotides into two states, R and Y (Purine and Pyrimidine) and then use the two-state results. Cavender starts, as before, with the pattern frequencies. Coding purines as R and pyrimidines as Y, the patterns types are RRRR, RRRY, and so on until YYYY, a total of 16 types. Cavender found quadratic functions of the expected frequencies of these 16 types that were expected to be zero under a given phylogeny, irrespective of branch lengths. Two invariants (called K and L) were found for each tree topology. The L invariants are particularly easy to understand. If we have the tree topology ((A,B),(C,D)), then in the case of two symmetric states, the event that A and B have the same state should be independent of whether C and D have the same state, as the events determining these happen in different parts of the tree. We can set up a contingency table:

                                 C = D         C =/= D
                           ------------------------------
                          |
                   A = B  |   YYYY, YYRR,     YYYR, YYRY,
                          |   RRRR, RRYY      RRYR, RRRY
                          |
                 A =/= B  |   YRYY, YRRR,     YRYR, YRRY,
                          |   RYYY, RYRR      RYYR, RYRY

and we expect that the events C = D and A = B will be independent. Cavender's L invariant for this tree topology is simply the negative of the crossproduct difference,

      P(A=/=B and C=D) P(A=B and C=/=D) - P(A=B and C=D) P(A=/=B and C=/=D).

One of these L invariants is defined for each of the three tree topologies. They can obviously be tested simply by doing a chi-square test on the contingency table. The one corresponding to the correct topology should be statistically indistinguishable from zero. Again, there is a possible multiple tests problem if all three are tested at a nominal value of 95%.

The K invariants are differences between the L invariants. When one of the tables is expected to have crossproduct difference zero, the other two are expected to be nonzero, and also to be equal. So the difference of their crossproduct differences can be taken; this is the K invariant. It is not so easily tested.

The assumptions of Cavender's invariants are different from those of Lake's. One obviously need not assume anything about the frequencies of, or transitions among, the two different purines or the two different pyrimidines. However one does need to assume independent events at each site, and one needs to assume that the Y and R states are symmetric, that the probability per unit time that a Y changes into an R is the same as the probability that an R changes into a Y, so that we expect equal frequencies of the two states. There is also an assumption that all sites are changing between these two states at the same expected rate. This assumption is not needed for Lake's invariants, since expectations of sums are equal to sums of expectations, but for Cavender's it is, since products of expectations are not equal to expectations of products.

It is helpful to have both sorts of invariants available; with further work we may appreciate what other invaraints there are for various models of nucleic acid change.

INPUT FORMAT

The input data for DNAINVAR is standard. The first line of the input file contains the number of species (which must always be 4 for this version of DNAINVAR) and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:


Nucleic acid sequence Invariants method, version 3.6a3

Settings for this run:
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3      Print out the counts of patterns  Yes
  4              Print out the invariants  Yes

  Y to accept these or type the letter for one to change

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options W, M and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

OUTPUT FORMAT

The output consists first (if option 1 is selected) of a reprinting of the input data, then (if option 2 is on) tables of observed pattern frequencies and pattern type frequencies. A table will be printed out, in alphabetic order AAAA through TTTT of all the patterns that appear among the sites and the number of times each appears. This table will be invaluable for computation of any other invariants. There follows another table, of pattern types, using the 1234 notation, in numerical order 1111 through 1234, of the number of times each type of pattern appears. In this computation all sites at which there are any ambiguities or deletions are omitted. Cavender's invariants could actually be computed from sites that have only Y or R ambiguities; this will be done in the next release of this program.

If option 3 is on the invariants are then printed out, together with their statistical tests. For Lake's invariants the two sums which are expected to be equal are printed out, and then the result of an one-tailed exact binomial test which tests whether the difference is expected to be this positive or more. The P level is given (but remember the multiple-tests problem!).

For Cavender's L invariants the contingency tables are given. Each is tested with a one-tailed chi-square test. It is possible that the expected numbers in some categories could be too small for valid use of this test; the program does not check for this. It is also possible that the chi-square could be significant but in the wrong direction; this is not tested in the current version of the program. To check it beware of a chi-square greater than 3.841 but with a positive invariant. The invariants themselves are computed, as the difference of cross-products. Their absolute magnitudes are not important, but which one is closest to zero may be indicative. Significantly nonzero invariants should be negative if the model is valid. The K invariants, which are simply differences among the L invariants, are also printed out without any test on them being conducted. Note that it is possible to use the bootstrap utility SEQBOOT to create multiple data sets, and from the output from sunning all of these get the empirical variability of these quadratic invariants.

PROGRAM CONSTANTS

The constants that are defined at the beginning of the program include "maxsp", which must always be 4 and should not be changed.

The program is very fast, as it has rather little work to do; these methods are just a little bit beyond the reach of hand tabulation. Execution speed should never be a limiting factor.

FUTURE OF THE PROGRAM

In a future version I hope to allow for Y and R codes in the calculation of the Cavender invariants, and to check for significantly negative cross-product differences in them, which would indicate violation of the model. By then there should be more known about invariants for larger number of species, and any such advances will also be incorporated.


TEST DATA SET

   4   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT


TEST SET OUTPUT (run with all numerical options turned on)


Nucleic acid sequence Invariants method, version 3.6a3

 4 species,  13  sites

Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         ..G..C.... ..C
Gamma        C.TT.C.T.. C.A
Delta        GGTA.TT.GG CC.



   Pattern   Number of times

     AAAC         1
     AAAG         2
     AACC         1
     AACG         1
     CCCG         1
     CCTC         1
     CGTT         1
     GCCT         1
     GGGT         1
     GGTA         1
     TCAT         1
     TTTT         1


Symmetrized patterns (1, 2 = the two purines  and  3, 4 = the two pyrimidines
                  or  1, 2 = the two pyrimidines  and  3, 4 = the two purines)

     1111         1
     1112         2
     1113         3
     1121         1
     1132         2
     1133         1
     1231         1
     1322         1
     1334         1

Tree topologies (unrooted): 

    I:  ((Alpha,Beta),(Gamma,Delta))
   II:  ((Alpha,Gamma),(Beta,Delta))
  III:  ((Alpha,Delta),(Beta,Gamma))


Lake's linear invariants
 (these are expected to be zero for the two incorrect tree topologies.
  This is tested by testing the equality of the two parts
  of each expression using a one-sided exact binomial test.
  The null hypothesis is that the first part is no larger than the second.)

 Tree                             Exact test P value    Significant?

   I      1    -     0   =     1       0.5000               no
   II     0    -     0   =     0       1.0000               no
   III    0    -     0   =     0       1.0000               no


Cavender's quadratic invariants (type L) using purines vs. pyrimidines
 (these are expected to be zero, and thus have a nonsignificant
  chi-square, for the correct tree topology)
They will be misled if there are substantially
different evolutionary rate between sites, or
different purine:pyrimidine ratios from 1:1.

  Tree I:

   Contingency Table

      2     8
      1     2

   Quadratic invariant =             4.0

   Chi-square =    0.23111 (not significant)


  Tree II:

   Contingency Table

      1     5
      1     6

   Quadratic invariant =            -1.0

   Chi-square =    0.01407 (not significant)


  Tree III:

   Contingency Table

      1     2
      6     4

   Quadratic invariant =             8.0

   Chi-square =    0.66032 (not significant)




Cavender's quadratic invariants (type K) using purines vs. pyrimidines
 (these are expected to be zero for the correct tree topology)
They will be misled if there are substantially
different evolutionary rate between sites, or
different purine:pyrimidine ratios from 1:1.
No statistical test is done on them here.

  Tree I:              -9.0
  Tree II:              4.0
  Tree III:             5.0

PHYLIPNEW-3.69.650/doc/pars.html0000664000175000017500000002535407712247475012716 00000000000000 pars

version 3.6

PARS - Discrete character parsimony

© Copyright 1986-2000 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

PARS is a general parsimony program which carries out the Wagner parsimony method with multiple states. Wagner parsimony allows changes among all states. The criterion is to find the tree which requires the minimum number of changes. The Wagner method was originated by Eck and Dayhoff (1966) and by Kluge and Farris (1969). Here are its assumptions:

  1. Ancestral states are unknown unknown.
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes to all other states are equally probable (Wagner).
  5. These changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than these state changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

INPUT FORMAT

The input for PARS is the standard input for discrete characters programs, described above in the documentation file for the discrete-characters programs, except that multiple states (up to 9 of them) are allowed. Any characters other than "?" are allowed as states, up to a maximum of 9 states. In fact, one can use different symbols in different columns of the data matrix, although it is rather unlikely that you would want to do that. The symbols you can use are:

  • The digits 0-9,
  • The letters A-Z and a-z,
  • The symbols "!\"#$%&'()*+,-./:;<=>?@\[\\]^_`\{|}~
    (of these, probably only + and - will be of interest to most users).
But note that these do not include blank (" "). Blanks in the input data are simply skipped by the program, so that they can be used to make characters into groups for ease of viewing. The "?" (question mark) symbol has special meaning. It is allowed in the input but is not available as the symbol of a state. Rather, it means that the state is unknown.

PARS can handle both bifurcating and multifurcating trees. In doing its search for most parsimonious trees, it adds species not only by creating new forks in the middle of existing branches, but it also tries putting them at the end of new branches which are added to existing forks. Thus it searches among both bifurcating and multifurcating trees. If a branch in a tree does not have any characters which might change in that branch in the most parsimonious tree, it does not save that tree. Thus in any tree that results, a branch exists only if some character has a most parsimonious reconstruction that would involve change in that branch.

It also saves a number of trees tied for best (you can alter the number it saves using the V option in the menu). When rearranging trees, it tries rearrangements of all of the saved trees. This makes the algorithm slower than earlier programs such as MIX.

The options are selected using a menu:


Discrete character parsimony algorithm, version 3.6

Setting for this run:
  U                 Search for best tree?  Yes
  S                        Search option?  More thorough search
  V              Number of trees to save?  100
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  T              Use Threshold parsimony?  No, use ordinary parsimony
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4          Print out steps in each site  No
  5  Print character at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file, with integer weights from 0 to 35 allowed by using the characters 0, 1, 2, ..., 9 and A, B, ... Z.

The User tree (option U) is read from a file whose default name is intree. The trees can be multifurcating. They must be preceded in the file by a line giving the number of trees in the file.

The options J, O, T, and M are the usual Jumble, Outgroup, Threshold parsimony, and Multiple Data Sets options, described either in the main documentation file or in the Discrete Characters Programs documentation file.

The M (multiple data sets option) will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights.

The O (outgroup) option will have no effect if the U (user-defined tree) option is in effect. The T (threshold) option allows a continuum of methods between parsimony and compatibility. Thresholds less than or equal to 1.0 do not have any meaning and should not be used: they will result in a tree dependent only on the input order of species and not at all on the data!

OUTPUT FORMAT

Output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees. Each tree has branch lengths. These are computed using an algorithm published by Hochbaum and Pathria (1997) which I first heard of from Wayne Maddison who invented it independently of them. This algorithm averages the number of reconstructed changes of state over all sites a over all possible most parsimonious placements of the changes of state among branches. Note that it does not correct in any way for multiple changes that overlay each other.

If option 2 is toggled on a table of the number of changes of state required in each character is also printed. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item D appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?", there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across sites. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the discrete characters are evolving independently, which is unlikely to be true for many suites of morphological characters.

Option 6 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.


TEST DATA SET

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110


TEST SET OUTPUT (with all numerical options on)


Discrete character parsimony algorithm, version 3.6


One most parsimonious tree found:


                 +Epsilon   
       +---------3  
  +----2         +--------------Delta     
  |    |  
  |    +Gamma     
  |  
  1---------Beta      
  |  
  +Alpha     


requires a total of      8.000

  between      and       length
  -------      ---       ------
     1           2       0.166667
     2           3       0.333333
     3      Epsilon      0.000000
     3      Delta        0.500000
     2      Gamma        0.000000
     1      Beta         0.333333
     1      Alpha        0.000000

steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                110110
   1      2         yes    .0....
   2      3         yes    0.1...
   3   Epsilon      no     ......
   3   Delta        yes    ...001
   2   Gamma        no     ......
   1   Beta         yes    ...00.
   1   Alpha        no     ......


PHYLIPNEW-3.69.650/doc/kitsch.html0000664000175000017500000003421107712247475013226 00000000000000 kitsch

version 3.6

KITSCH -- Fitch-Margoliash and Least Squares Methods
with Evolutionary Clock

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program carries out the Fitch-Margoliash and Least Squares methods, plus a variety of others of the same family, with the assumption that all tip species are contemporaneous, and that there is an evolutionary clock (in effect, a molecular clock). This means that branches of the tree cannot be of arbitrary length, but are constrained so that the total length from the root of the tree to any species is the same. The quantity minimized is the same weighted sum of squares described in the Distance Matrix Methods documentation file.

The options are set using the menu:


Fitch-Margoliash method with contemporary tips, version 3.6a3

Settings for this run:
  D      Method (F-M, Minimum Evolution)?  Fitch-Margoliash
  U                 Search for best tree?  Yes
  P                                Power?  2.00000
  -      Negative branch lengths allowed?  No
  L         Lower-triangular data matrix?  No
  R         Upper-triangular data matrix?  No
  S                        Subreplicates?  No
  J     Randomize input order of species?  No. Use input order
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

Most of the options are described in the Distance Matrix Programs documentation file.

The D (methods) option allows choice between the Fitch-Margoliash criterion and the Minimum Evolution method (Kidd and Sgaramella-Zonta, 1971; Rzhetsky and Nei, 1993). Minimum Evolution (not to be confused with parsimony) uses the Fitch-Margoliash criterion to fit branch lengths to each topology, but then chooses topologies based on their total branch length (rather than the goodness of fit sum of squares). There is no constraint on negative branch lengths in the Minimum Evolution method; it sometimes gives rather strange results, as it can like solutions that have large negative branch lengths, as these reduce the total sum of branch lengths!

Note that the User Trees (used by option U) must be rooted trees (with a bifurcation at their base). If you take a user tree from FITCH and try to evaluate it in KITSCH, it must first be rooted. This can be done using RETREE. Of the options available in FITCH, the O option is not available, as KITSCH estimates a rooted tree which cannot be rerooted, and the G option is not available, as global rearrangement is the default condition anyway. It is also not possible to specify that specific branch lengths of a user tree be retained when it is read into KITSCH, unless all of them are present. In that case the tree should be properly clocklike. Readers who wonder why we have not provided the feature of holding some of the user tree branch lengths constant while iterating others are invited to tell us how they would do it. As you consider particular possible patterns of branch lengths you will find that the matter is not at all simple.

If you use a User Tree (option U) with branch lengths with KITSCH, and the tree is not clocklike, when two branch lengths give conflicting positions for a node, KITSCH will use the first of them and ignore the other. Thus the user tree:

     ((A:0.1,B:0.2):0.4,(C:0.06,D:0.01):43);

is nonclocklike, so it will be treated as if it were actually the tree:

     ((A:0.1,B:0.1):0.4,(C:0.06,D:0.06):44);

The input is exactly the same as described in the Distance Matrix Methods documentation file. The output is a rooted tree, together with the sum of squares, the number of tree topologies searched, and, if the power P is at its default value of 2.0, the Average Percent Standard Deviation is also supplied. The lengths of the branches of the tree are given in a table, that also shows for each branch the time at the upper end of the branch. "Time" here really means cumulative branch length from the root, going upwards (on the printed diagram, rightwards). For each branch, the "time" given is for the node at the right (upper) end of the branch. It is important to realize that the branch lengths are not exactly proportional to the lengths drawn on the printed tree diagram! In particular, short branches are exaggerated in the length on that diagram so that they are more visible.

The method may be considered as providing an estimate of the phylogeny. Alternatively, it can be considered as a phenetic clustering of the tip species. This method minimizes an objective function, the sum of squares, not only setting the levels of the clusters so as to do so, but rearranging the hierarchy of clusters to try to find alternative clusterings that give a lower overall sum of squares. When the power option P is set to a value of P = 0.0, so that we are minimizing a simple sum of squares of the differences between the observed distance matrix and the expected one, the method is very close in spirit to Unweighted Pair Group Arithmetic Average Clustering (UPGMA), also called Average-Linkage Clustering. If the topology of the tree is fixed and there turn out to be no branches of negative length, its result should be the same as UPGMA in that case. But since it tries alternative topologies and (unless the N option is set) it combines nodes that otherwise could result in a reversal of levels, it is possible for it to give a different, and better, result than simple sequential clustering. Of course UPGMA itself is available as an option in program NEIGHBOR.

The U (User Tree) option requires a bifurcating tree, unlike FITCH, which requires an unrooted tree with a trifurcation at its base. Thus the tree shown below would be written:

     ((D,E),(C,(A,B)));

If a tree with a trifurcation at the base is by mistake fed into the U option of KITSCH then some of its species (the entire rightmost furc, in fact) will be ignored and too small a tree read in. This should result in an error message and the program should stop. It is important to understand the difference between the User Tree formats for KITSCH and FITCH. You may want to use RETREE to convert a user tree that is suitable for FITCH into one suitable for KITSCH or vice versa.

An important use of this method will be to do a formal statistical test of the evolutionary clock hypothesis. This can be done by comparing the sums of squares achieved by FITCH and by KITSCH, BUT SOME CAVEATS ARE NECESSARY. First, the assumption is that the observed distances are truly independent, that no original data item contributes to more than one of them (not counting the two reciprocal distances from i to j and from j to i). THIS WILL NOT HOLD IF THE DISTANCES ARE OBTAINED FROM GENE FREQUENCIES, FROM MORPHOLOGICAL CHARACTERS, OR FROM MOLECULAR SEQUENCES. It may be invalid even for immunological distances and levels of DNA hybridization, provided that the use of common standard for all members of a row or column allows an error in the measurement of the standard to affect all these distances simultaneously. It will also be invalid if the numbers have been collected in experimental groups, each measured by taking differences from a common standard which itself is measured with error. Only if the numbers in different cells are measured from independent standards can we depend on the statistical model. The details of the test and the assumptions are discussed in my review paper on distance methods (Felsenstein, 1984a). For further and sometimes irrelevant controversy on these matters see the papers by Farris (1981, 1985, 1986) and myself (Felsenstein, 1986, 1988b).

A second caveat is that the distances must be expected to rise linearly with time, not according to any other curve. Thus it may be necessary to transform the distances to achieve an expected linearity. If the distances have an upper limit beyond which they could not go, this is a signal that linearity may not hold. It is also VERY important to choose the power P at a value that results in the standard deviation of the variation of the observed from the expected distances being the P/2-th power of the expected distance.

To carry out the test, fit the same data with both FITCH and KITSCH, and record the two sums of squares. If the topology has turned out the same, we have N = n(n-1)/2 distances which have been fit with 2n-3 parameters in FITCH, and with n-1 parameters in KITSCH. Then the difference between S(K) and S(F) has d1 = n-2 degrees of freedom. It is statistically independent of the value of S(F), which has d2 = N-(2n-3) degrees of freedom. The ratio of mean squares

      [S(K)-S(F)]/d1
     ----------------
          S(F)/d2

should, under the evolutionary clock, have an F distribution with n-2 and N-(2n-3) degrees of freedom respectively. The test desired is that the F ratio is in the upper tail (say the upper 5%) of its distribution. If the S (subreplication) option is in effect, the above degrees of freedom must be modified by noting that N is not n(n-1)/2 but is the sum of the numbers of replicates of all cells in the distance matrix read in, which may be either square or triangular. A further explanation of the statistical test of the clock is given in a paper of mine (Felsenstein, 1986).

The program uses a similar tree construction method to the other programs in the package and, like them, is not guaranteed to give the best-fitting tree. The assignment of the branch lengths for a given topology is a least squares fit, subject to the constraints against negative branch lengths, and should not be able to be improved upon. KITSCH runs more quickly than FITCH.

The constant available for modification at the beginning of the program is "epsilon", which defines a small quantity needed in some of the calculations. There is no feature saving multiply trees tied for best, because exact ties are not expected, except in cases where it should be obvious from the tree printed out what is the nature of the tie (as when an interior branch is of length zero).


TEST DATA SET

    7
Bovine      0.0000  1.6866  1.7198  1.6606  1.5243  1.6043  1.5905
Mouse       1.6866  0.0000  1.5232  1.4841  1.4465  1.4389  1.4629
Gibbon      1.7198  1.5232  0.0000  0.7115  0.5958  0.6179  0.5583
Orang       1.6606  1.4841  0.7115  0.0000  0.4631  0.5061  0.4710
Gorilla     1.5243  1.4465  0.5958  0.4631  0.0000  0.3484  0.3083
Chimp       1.6043  1.4389  0.6179  0.5061  0.3484  0.0000  0.2692
Human       1.5905  1.4629  0.5583  0.4710  0.3083  0.2692  0.0000


TEST SET OUTPUT FILE (with all numerical options on)


   7 Populations

Fitch-Margoliash method with contemporary tips, version 3.6a3

                  __ __             2
                  \  \   (Obs - Exp)
Sum of squares =  /_ /_  ------------
                                2
                   i  j      Obs

negative branch lengths not allowed


Name                       Distances
----                       ---------

Bovine        0.00000   1.68660   1.71980   1.66060   1.52430   1.60430
              1.59050
Mouse         1.68660   0.00000   1.52320   1.48410   1.44650   1.43890
              1.46290
Gibbon        1.71980   1.52320   0.00000   0.71150   0.59580   0.61790
              0.55830
Orang         1.66060   1.48410   0.71150   0.00000   0.46310   0.50610
              0.47100
Gorilla       1.52430   1.44650   0.59580   0.46310   0.00000   0.34840
              0.30830
Chimp         1.60430   1.43890   0.61790   0.50610   0.34840   0.00000
              0.26920
Human         1.59050   1.46290   0.55830   0.47100   0.30830   0.26920
              0.00000


                                           +-------Human     
                                         +-6 
                                    +----5 +-------Chimp     
                                    !    ! 
                                +---4    +---------Gorilla   
                                !   ! 
       +------------------------3   +--------------Orang     
       !                        ! 
  +----2                        +------------------Gibbon    
  !    ! 
--1    +-------------------------------------------Mouse     
  ! 
  +------------------------------------------------Bovine    


Sum of squares =      0.107

Average percent standard deviation =   5.16213

From     To            Length          Height
----     --            ------          ------

   6   Human           0.13460         0.81285
   5      6            0.02836         0.67825
   6   Chimp           0.13460         0.81285
   4      5            0.07638         0.64990
   5   Gorilla         0.16296         0.81285
   3      4            0.06639         0.57352
   4   Orang           0.23933         0.81285
   2      3            0.42923         0.50713
   3   Gibbon          0.30572         0.81285
   1      2            0.07790         0.07790
   2   Mouse           0.73495         0.81285
   1   Bovine          0.81285         0.81285

PHYLIPNEW-3.69.650/doc/restml.html0000664000175000017500000005006307712247475013252 00000000000000 restml

version 3.6

RESTML -- Restriction sites Maximum Likelihood program

© Copyright 1986-2000 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements a maximum likelihood method for restriction sites data (not restriction fragment data). This program is one of the slowest programs in this package, and can be very tedious to run. It is possible to have the program search for the maximum likelihood tree. It will be more practical for some users (those that do not have fast machines) to use the U (User Tree) option, which takes less run time, optimizing branch lengths and computing likelihoods for particular tree topologies suggested by the user. The model used here is essentially identical to that used by Smouse and Li (1987) who give explicit expressions for computing the likelihood for three-species trees. It does not place prior probabilities on trees as they do. The present program extends their approach to multiple species by a technique which, while it does not give explicit expressions for likelihoods, does enable their computation and the iterative improvement of branch lengths. It also allows for multiple restriction enzymes. The algorithm has been described in a paper (Felsenstein, 1992). Another relevant paper is that of DeBry and Slade (1985).

The assumptions of the present model are:

  1. Each restriction site evolves independently.
  2. Different lineages evolve independently.
  3. Each site undergoes substitution at an expected rate which we specify.
  4. Substitutions consist of replacement of a nucleotide by one of the other three nucleotides, chosen at random.

Note that if the existing base is, say, an A, the chance of it being replaced by a G is 1/3, and so is the chance that it is replaced by a T. This means that there can be no difference in the (expected) rate of transitions and transversions. Users who are upset at this might ponder the fact that a version allowing different rates of transitions and transversions would run an estimated 16 times slower. If it also allowed for unequal frequencies of the four bases, it would run about 300,000 times slower! For the moment, until a better method is available, I guess I'll stick with this one!

INPUT FORMAT AND OPTIONS

Subject to these assumptions, the program is an approximately correct maximum likelihood method. The input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites, but there is also a third number, which is the number of different restriction enzymes that were used to detect the restriction sites. Thus a data set with 10 species and 35 different sites, representing digestion with 4 different enzymes, would have the first line of the data file look like this:

   10   35    4

The first line of the data file will also contain a letter W following these numbers (and separated from them by a space) if the Weights option is being used. As with all programs using the weights option, a line or lines must then follow, before the data, with the weights for each site.

The site data are in standard form. Each species starts with a species name whose maximum length is given by the constant "nmlngth" (whose value in the program as distributed is 10 characters). The name should, as usual, be padded out to that length with blanks if necessary. The sites data then follows, one character per site (any blanks will be skipped and ignored). Like the DNA and protein sequence data, the restriction sites data may be either in the "interleaved" form or the "sequential" form. Note that if you are analyzing restriction sites data with the programs DOLLOP or MIX or other discrete character programs, at the moment those programs do not use the "aligned" or "interleaved" data format. Therefore you may want to avoid that format when you have restriction sites data that you will want to feed into those programs.

The presence of a site is indicated by a "+" and the absence by a "-". I have also allowed the use of "1" and "0" as synonyms for "+" and "-", for compatibility with MIX and DOLLOP which do not allow "+" and "-". If the presence of the site is unknown (for example, if the DNA containing it has been deleted so that one does not know whether it would have contained the site) then the state "?" can be used to indicate that the state of this site is unknown.

User-defined trees may follow the data in the usual way. The trees must be unrooted, which means that at their base they must have a trifurcation.

The options are selected by a menu, which looks like this:


Restriction site Maximum Likelihood method, version 3.6

Settings for this run:
  U                 Search for best tree?  Yes
  A               Are all sites detected?  No
  S        Speedier but rougher analysis?  Yes
  G                Global rearrangements?  No
  J   Randomize input order of sequences?  No. Use input order
  L                          Site length?  6
  O                        Outgroup root?  No, use as outgroup species  1
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

The U, J, O, M, and 0 options are the usual ones, described in the main documentation file. The user trees for option U are read from a file whose default name is intree. The I option selects between Interleaved and Sequential input data formats, and is described in the documentation file for the molecular sequences programs.

The G (global search) option causes, after the last species is added to the tree, each possible group to be removed and re-added. This improves the result, since the position of every species is reconsidered. It approximately triples the run-time of the program.

The two options specific to this program are the A, and L options. The L (Length) option allows the user to specify the length in bases of the restriction sites. Allowed values are 1 to 8 (the constant "maxcutter" in file phylip.h controls the maximum allowed value). At the moment the program assumes that all sites have the same length (for example, that all enzymes are 6-base-cutters). The default value for this parameter is 6, which will be used if the L option is not invoked. A desirable future development for the package would be allowing the L parameter to be different for every site. It would also be desirable to allow for ambiguities in the recognition site, since some enzymes recognize 2 or 4 sequences. Both of these would require fairly complicated programming or else slower execution times.

The A (All) option specifies that all sites are detected, even those for which all of the species have the recognition sequence absent (character state "-"). The default condition is that it is assumed that such sites will not occur in the data. The likelihood computed when the A option is not used is the probability of the pattern of sites given that tree and conditional on the pattern not being all absences. This will be realistic for most data, except for cases in which the data are extracted from sites data for a larger number of species, in which case some of the site positions could have all absences in the subset of species. In such cases an effective way of analyzing the data would be to omit those sites and not use the A option, as such positions, even if not absolutely excluded, are nevertheless less likely than random to have been incorporated in the data set.

The W (Weights) option, which is invoked in the input file rather than in the menu, allows the user to select a subset of sites to be analyzed. It is invoked in the usual way, except that only weights 0 and 1 are allowed. If the W option is not used, all sites will be analyzed. If the Weights option is used, there must be a W in the first line of the input file.

OUTPUT FORMAT

The output starts by giving the number of species, and the number of sites. If the default condition is used instead of the A option the program states that it is assuming that sites absent in all species have been omitted. The value of the site length (6 bases, for example) is also given.

If option 2 (print out data) has been selected, there then follow the restriction site sequences, printed in groups of ten sites. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given.

A table is printed showing the length of each tree segment, as well as (very) rough confidence limits on the length. As with DNAML, if a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow.

In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant.

One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that).

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. Remember that testing one tree topology against another is not a simple matter, because two different tree topologies are not hypotheses that are nested one within the other. If the trees differ by only one branch swap, it seems to be conservative to test the difference between their likelihoods with one degree of freedom, but other than that little is known and more work on this is needed.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one.

The branch lengths printed out are scaled in terms of expected numbers of base substitutions, not counting replacements of a base by itself. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability, and since probabilities never exceed 1.0 this logarithm will typically be negative. The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14. The log likelihood will not always be negative since a combinatorial constant has been left out of the expression for the likelihood. This does not affect the tree found or the likelihood ratios (or log likelihood differences) between trees.

THE ALGORITHM

The program uses a Newton-Raphson algorithm to update one branch length at a time. This is faster than the EM algorithm which was described in my paper on restriction sites maximum likelihood (Felsenstein, 1992). The likelihood that is being maximized is the same one used by Smouse and Li (1987) extended for multiple species. moving down on the likelihood surface. You may have to "tune" the value of extrapol to suit your data.

PROGRAM CONSTANTS

The constants include "maxcutter" (set in phylip.h), the maximum length of an enzyme recognition site. The memory used by the program will be approximately proportional to this value, which is 8 in the distribution copy. The program also uses constants "iterations" and "smoothings", and decreasing "epsilon". Reducing "iterations" and "smoothings" or increasing "epsilon" will result in faster execution but a worse result. These values will not usually have to be changed.

The program spends most of its time doing real arithmetic. The algorithm, with separate and independent computations occurring at each site, lends itself readily to parallel processing.

A feature of the algorithm is that it saves time by recognizing sites at which the pattern of presence/absence is the same, and does that computation only once. Thus if we have only four species but a large number of sites, there are only about (ignoring ambiguous bases) 16 different patterns of presence/absence (2 x 2 x 2 x 2) that can occur. The program automatically counts occurrences of each and does the computation for each pattern only once, so that it only needs to do as much computation as would be needed with at most 16 sites, even though the number of sites is actually much larger. Thus the program will run very effectively with few species and many sites.

PAST AND FUTURE OF THE PROGRAM

This program was developed by modifying DNAML version 3.1 and also adding some of the modifications that were added to DNAML version 3.2, with which it shares many of its data structures and much of its strategy. Version 3.6 changed from EM iterations of branch lengths, which involved arbitrary extrapolation factors, to the Newton-Raphson algorithm, which improved the speed of the program (though only from "very slow" to "slow").

There are a number of obvious directions in which the program needs to be modified in the future. Extension to allow for different rates of transition and transversion is straightforward, but would slow down the program considerably, as I have mentioned above. I have not included in the program any provision for saving and printing out multiple trees tied for highest likelihood, in part because an exact tie is unlikely.


TEST DATA SET

   5   13   2
Alpha     ++-+-++--+++-
Beta      ++++--+--+++-
Gamma     -+--+-++-+-++
Delta     ++-+----++---
Epsilon   ++++----++---


CONTENTS OF OUTPUT FILE (if all numerical options are on)


Restriction site Maximum Likelihood method, version 3.6

   5 Species,   13 Sites,   2 Enzymes

  Recognition sequences all 6 bases long

Sites absent from all species are assumed to have been omitted


Name            Sites
----            -----

Alpha        ++-+-++--+ ++-
Beta         ++++--+--+ ++-
Gamma        -+--+-++-+ -++
Delta        ++-+----++ ---
Epsilon      ++++----++ ---





  +----Gamma     
  |  
  |     +Epsilon   
  |  +--3  
  1--2  +Delta     
  |  |  
  |  +Beta      
  |  
  +Alpha     


remember: this is an unrooted tree!

Ln Likelihood =   -40.34358

 
 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------
   1          Gamma           0.10813     (  0.01154,     0.21901) **
   1             2            0.01156     (     zero,     0.04578)
   2             3            0.05885     (     zero,     0.12697) **
   3          Epsilon         0.00100     (     zero,     0.00617)
   3          Delta           0.01460     (     zero,     0.05036)
   2          Beta            0.00100     (     zero,    infinity)
   1          Alpha           0.01310     (     zero,     0.04806)

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01


PHYLIPNEW-3.69.650/doc/consense.html0000664000175000017500000003120107712247475013552 00000000000000 consense

version 3.6

CONSENSE -- Consensus tree program

© Copyright 1986-2000 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

CONSENSE reads a file of computer-readable trees and prints out (and may also write out onto a file) a consensus tree. At the moment it carries out a family of consensus tree methods called the Ml methods (Margush and McMorris, 1981). These include strict consensus and majority rule consensus. Basically the consensus tree consists of monophyletic groups that occur as often as possible in the data. If a group occurs in more than a fraction l of all the input trees it will definitely appear in the consensus tree.

The tree printed out has at each fork a number indicating how many times the group which consists of the species to the right of (descended from) the fork occurred. Thus if we read in 15 trees and find that a fork has the number 15, that group occurred in all of the trees. The strict consensus tree consists of all groups that occurred 100% of the time, the rest of the resolution being ignored. The tree printed out here includes groups down to 50%, and below it until the tree is fully resolved.

The majority rule consensus tree consists of all groups that occur more than 50% of the time. Any other percentage level between 50% and 100% can also be used, and that is why the program in effect carries out a family of methods. You have to decide on the percentage level, figure out for yourself what number of occurrences that would be (e.g. 15 in the above case for 100%), and resolutely ignore any group below that number. Do not use numbers at or below 50%, because some groups occurring (say) 35% of the time will not be shown on the tree. The collection of all groups that occur 35% or more of the time may include two groups that are mutually self contradictory and cannot appear in the same tree. In this program, as the default method I have included groups that occur less than 50% of the time, working downwards in their frequency of occurrence, as long as they continue to resolve the tree and do not contradict more frequent groups. In this respect the method is similar to the Nelson consensus method (Nelson, 1979) as explicated by Page (1989) although it is not identical to it.

The program can also carry out Strict consensus, Majority Rule consensus without the extension which adds groups until the tree is fully resolved, and other members of the Ml family, where the user supplied the fraction of times the group must appear in the input trees to be included in the consensus tree. For the moment the program cannot carry out any other consensus tree method, such as Adams consensus (Adams, 1972, 1986) or methods based on quadruples of species (Estabrook, McMorris, and Meacham, 1985).

INPUT, OUTPUT, AND OPTIONS

Input is a tree file (called intree) which contains a series of trees in the Newick standard form -- the form used when many of the programs in this package write out tree files. Each tree starts on a new line. Each tree can have a weight, which is a real number and is located in comment brackets "[" and "]" just before the final ";" which ends the description of the tree. When the input trees have weights (like [0.01000]) then the total number of trees will be the total of those weights, which is often a number like 1.00. When the a tree doesn't have a weight it will each be assigned a weight of 1. This means that when we have tied trees (as from a parsimony program) three alternative tied trees will be counted as if each was 1/3 of a tree.

Note that this program can correctly read trees whether or not they are bifurcating: in fact they can be multifurcating at any level in the tree.

The options are selected from a menu, which looks like this:


Majority-rule and strict consensus tree program, version 3.6

Settings for this run:
 C   Consensus type (strict, MR, MRe, Ml)  Majority Rule (extended)
 O                         Outgroup root:  No, use as outgroup species  1
 R         Trees to be treated as Rooted:  No
 T    Terminal type (IBM PC, ANSI, none):  none
 1         Print out the sets of species:  Yes
 2  Print indications of progress of run:  Yes
 3                        Print out tree:  Yes
 4        Write out trees onto tree file:  Yes

Are these settings correct? (type Y or the letter for one to change)

Option C (Consensus method) selects which of four methods the program uses. The program defaults to using the extended Majority Rule method. Each time the C option is chosen the program moves on to another method, the others being in order Strict, Majority Rule, and Ml. Here are descriptions of the methods. In each case the fraction of times a set appears among the input trees is counted by weighting by the weights of the trees (the numbers like [0.6000] that appear at the ends of trees in some cases).

Strict
A set of species must appear in all input trees to be included in the strict consensus tree.

Majority Rule (extended)
Any set of species that appears in more than 50% of the trees is included. The program then considers the other sets of species in order of the frequency with which they have appeared, adding to the consensus tree any which are compatible with it until the tree is fully resolved. This is the default setting.

Ml
The user is asked for a fraction between 0.5 and 1, and the program then includes in the consensus tree any set of species that occurs among the input trees more than that fraction of then time. The Strict consensus and the Majority Rule consensus are extreme cases of the Ml consensus, being for fractions of 1 and 0.5 respectively.

Majority Rule
A set of species is included in the consensus tree if it is present in more than half of the input trees.

Option R (Rooted) toggles between the default assumption that the input trees are unrooted trees and the selection that specifies that the tree is to be treated as a rooted tree and not re-rooted. Otherwise the tree will be treated as outgroup-rooted and will be re-rooted automatically at the first species encountered on the first tree (or at a species designated by the Outgroup option).

Option O is the usual Outgroup rooting option. It is in effect only if the Rooted option selection is not in effect. The trees will be re-rooted with a species of your choosing. You will be asked for the number of the species that is to be the outgroup. If we want to outgroup-root the tree on the line leading to a species which appears as the third species (counting left-to-right) in the first computer-readable tree in the input file, we would invoke select menu option O and specify species 3.

Output is a list of the species (in the order in which they appear in the first tree, which is the numerical order used in the program), a list of the subsets that appear in the consensus tree, a list of those that appeared in one or another of the individual trees but did not occur frequently enough to get into the consensus tree, followed by a diagram showing the consensus tree. The lists of subsets consists of a row of symbols, each either "." or "*". The species that are in the set are marked by "*". Every ten species there is a blank, to help you keep track of the alignment of columns. The order of symbols corresponds to the order of species in the species list. Thus a set that consisted of the second, seventh, and eighth out of 13 species would be represented by:

	  .*....**.. ...

Note that if the trees are unrooted the final tree will have one group, consisting of every species except the Outgroup (which by default is the first species encountered on the first tree), which always appears. It will not be listed in either of the lists of sets, but it will be shown in the final tree as occurring all of the time. This is hardly surprising: in telling the program that this species is the outgroup we have specified that the set consisting of all of the others is always a monophyletic set. So this is not to be taken as interesting information, despite its dramatic appearance.

Option 2 in the menu gives you the option of turning off the writing of these sets into the output file. This may be useful if you are primarily interested in getting the tree file.

Option 3 is the usual tree file option. If this is on (it is by default) then the final tree will be written onto an output tree file (whose default name is "outtree"). Note that the lengths on the tree on the output tree file are not branch lengths but the number of times that each group appeared in the input trees. This number is the sum of the weights of the trees in which it appeared, so that if there are 11 trees, ten of them having weight 0.1 and one weight 1.0, a group that appeared in the last tree and in 6 others would be shown as appearing 1.6 times and its branch length will be 1.6.

CONSTANTS

The program uses the consensus tree algorithm originally designed for the bootstrap programs. It is quite fast, and execution time is unlikely to be limiting for you (assembling the input file will be much more of a limiting step). In the future, if possible, more consensus tree methods will be incorporated (although the current methods are the ones needed for the component analysis of bootstrap estimates of phylogenies, and in other respects I also think that the present ones are among the best).


TEST DATA SET

(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));
(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));
(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));
(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));
(A,(B,(E,(G,((F,I),(((J,H),D),C))))));
(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));
(A,(B,(E,((F,I),(G,(((J,H),D),C))))));
(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));
(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));


TEST SET OUTPUT


Majority-rule and strict consensus tree program, version 3.6

Species in order: 

  A
  B
  H
  D
  J
  G
  E
  F
  I
  C


Sets included in the consensus tree

Set (species in order)     How many times out of    9.00

.......**.                   9.00
..********                   9.00
..***....*                   6.00
..****.***                   6.00
..***.....                   6.00
..*.*.....                   4.00
..***..***                   2.00


Sets NOT included in consensus tree:

Set (species in order)     How many times out of    9.00

.....**...                   3.00
.....****.                   3.00
..**......                   3.00
.....*****                   3.00
..*.******                   2.00
.....*.**.                   2.00
..****...*                   2.00
....******                   2.00
...*******                   1.00


Majority rule consensus (extended to resolve tree)

CONSENSUS TREE:
the numbers at the forks indicate the number
of times the group consisting of the species
which are to the right of that fork occurred
among the trees, out of   9.00 trees

  +-------------------------------------------------------A
  |
  |             +-----------------------------------------E
  |             |
  |             |                                  +------I
  |             |             +----------------9.0-|
  |             |             |                    +------F
  |      +--9.0-|             |
  |      |      |      +--2.0-|             +-------------D
  |      |      |      |      |      +--6.0-|
  |      |      |      |      |      |      |      +------J
  |      |      |      |      +--6.0-|      +--4.0-|
  +------|      +--6.0-|             |             +------H
         |             |             |
         |             |             +--------------------C
         |             |
         |             +----------------------------------G
         |
         +------------------------------------------------B


  remember: this is an unrooted tree!

PHYLIPNEW-3.69.650/doc/promlk.html0000664000175000017500000007341207712247475013253 00000000000000 dnamlk

version 3.6

ProMLK -- Protein maximum likelihood program
with molecular clock

© Copyright 2000-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the maximum likelihood method for protein amino acid sequences under the constraint that the trees estimated must be consistent with a molecular clock. The molecular clock is the assumption that the tips of the tree are all equidistant, in branch length, from its root. This program is indirectly related to PROML. It uses the Dayhoff probability model of change between amino acids. Its algorithmic details are not yet published, but many of them are similar to DNAMLK.

The assumptions of the model are:

  1. Each position in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. Each position undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  4. All relevant positions are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  5. The probabilities of change between amino acids are given by the model of Dayhoff (Dayhoff and Eck, 1968; Dayhoff et. al., 1979).

Note the assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those positions that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different amino acid positions. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of positions all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant positions. The program computes the likelihood by summing it over all possible assignments of rates to positions, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a position having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive positions with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all positions to rate 2.4, or that fail to have consecutive positions that have the same rate.

The Hidden Markov Model framework for rate variation among positions was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant positions.

This feature effectively removes the artificial assumption that all positions have the same rate, and also means that we need not know in advance the identities of the positions that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each positions (for example, we might want amino acid positions in the active site of a protein to change more slowly than other positions. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of amino acid positions in the different categories. For example, we might specify that positions in the active site evolve at relative rates of 0.2 compared to 1.0 at other positions. If we are assuming that a particular position maintains a cysteine bridge to another, we may want to put it in a category of positions (including perhaps the initial position of the protein sequence which maintains methionine) which changes at a rate of 0.0.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a position is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

INPUT FORMAT AND OPTIONS

Subject to these assumptions, the program is a correct maximum likelihood method. The input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of amino acid positions.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter amino acid code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:


Amino acid sequence
   Maximum Likelihood method with molecular clock, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  P   JTT or PAM amino acid change model?  Jones-Taylor-Thornton model
  C   One category of substitution rates?  Yes
  R           Rate variation among sites?  constant rate of change
  G                Global rearrangements?  No
  W                       Sites weighted?  No
  J   Randomize input order of sequences?  No. Use input order
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes
  5   Reconstruct hypothetical sequences?  No

Are these settings correct? (type Y or the letter for one to change)

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options U, W, J, O, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The R (Hidden Markov Model rates) option allows the user to approximate a Gamma distribution of rates among positions, or a Gamma distribution plus a class of invariant positions, or to specify how many categories of substitution rates there will be in a Hidden Markov Model of rate variation, and what are the rates and probabilities for each. By repeatedly selecting the R option one toggles among no rate variation, the Gamma, Gamma+I, and general HMM possibilities.

If you choose Gamma or Gamma+I the program will ask how many rate categories you want. If you have chosen Gamma+I, keep in mind that one rate category will be set aside for the invariant class and only the remaining ones used to approximate the Gamma distribution. For the approximation we do not use the quantile method of Yang (1995) but instead use a quadrature method using generalized Laguerre polynomials. This should give a good approximation to the Gamma distribution with as few as 5 or 6 categories.

In the Gamma and Gamma+I cases, the user will be asked to supply the coefficient of variation of the rate of substitution among positions. This is different from the parameters used by Nei and Jin (1990) but related to them: their parameter a is also known as "alpha", the shape parameter of the Gamma distribution. It is related to the coefficient of variation by

     CV = 1 / a1/2

or

     a = 1 / (CV)2

(their parameter b is absorbed here by the requirement that time is scaled so that the mean rate of evolution is 1 per unit time, which means that a = b). As we consider cases in which the rates are less variable we should set a larger and larger, as CV gets smaller and smaller.

If the user instead chooses the general Hidden Markov Model option, they are first asked how many HMM rate categories there will be (for the moment there is an upper limit of 9, which should not be restrictive). Then the program asks for the rates for each category. These rates are only meaningful relative to each other, so that rates 1.0, 2.0, and 2.4 have the exact same effect as rates 2.0, 4.0, and 4.8. Note that an HMM rate category can have rate of change 0, so that this allows us to take into account that there may be a category of amino acid positions that are invariant. Note that the run time of the program will be proportional to the number of HMM rate categories: twice as many categories means twice as long a run. Finally the program will ask for the probabilities of a random amino acid position falling into each of these regional rate categories. These probabilities must be nonnegative and sum to 1. Default for the program is one category, with rate 1.0 and probability 1.0 (actually the rate does not matter in that case).

If more than one HMM rate category is specified, then another option, A, becomes visible in the menu. This allows us to specify that we want to assume that positions that have the same HMM rate category are expected to be clustered so that there is autocorrelation of rates. The program asks for the value of the average patch length. This is an expected length of patches that have the same rate. If it is 1, the rates of successive positions will be independent. If it is, say, 10.25, then the chance of change to a new rate will be 1/10.25 after every position. However the "new rate" is randomly drawn from the mix of rates, and hence could even be the same. So the actual observed length of patches with the same rate will be a bit larger than 10.25. Note below that if you choose multiple patches, there will be an estimate in the output file as to which combination of rate categories contributed most to the likelihood.

Note that the autocorrelation scheme we use is somewhat different from Yang's (1995) autocorrelated Gamma distribution. I am unsure whether this difference is of any importance -- our scheme is chosen for the ease with which it can be implemented.

The C option allows user-defined rate categories. The user is prompted for the number of user-defined rates, and for the rates themselves, which cannot be negative but can be zero. These numbers, which must be nonnegative (some could be 0), are defined relative to each other, so that if rates for three categories are set to 1 : 3 : 2.5 this would have the same meaning as setting them to 2 : 6 : 5. The assignment of rates to amino acid positions is then made by reading a file whose default name is "categories". It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. Thus the categories file might look like this:

122231111122411155
1155333333444

With the current options R, A, and C the program has a good ability to infer different rates at different positions and estimate phylogenies under a more realistic model. Note that Likelihood Ratio Tests can be used to test whether one combination of rates is significantly better than another, provided one rate scheme represents a restriction of another with fewer parameters. The number of parameters needed for rate variation is the number of regional rate categories, plus the number of user-defined rate categories less 2, plus one if the regional rate categories have a nonzero autocorrelation.

The G (global search) option causes, after the last species is added to the tree, each possible group to be removed and re-added. This improves the result, since the position of every species is reconsidered. It approximately triples the run-time of the program.

The User tree (option U) is read from a file whose default name is intree. The trees can be multifurcating. This allows us to test the hypothesis that a given branch has zero length.

If the U (user tree) option is chosen another option appears in the menu, the L option. If it is selected, it signals the program that it should take any branch lengths that are in the user tree and simply evaluate the likelihood of that tree, without further altering those branch lengths. In the case of a clock, if some branches have lengths and others do not, the program does not estimate the lengths of those that do not have lengths given in the user tree. If any of the branches do not have lengths, the program re-estimates the lengths of all of them. This is done because estimating some and not others is hard in the case of a clock.

The W (Weights) option is invoked in the usual way, with only weights 0 and 1 allowed. It selects a set of positions to be analyzed, ignoring the others. The positions selected are those with weight 1. If the W option is not invoked, all positions are analyzed. The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file.

The M (multiple data sets) option will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets from the input file. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights. Note also that when we use multiple weights for bootstrapping we can also then maintain different rate categories for different positions in a meaningful way. You should not use the multiple data sets option without using multiple weights, you should not at the same time use the user-defined rate categories option (option C).

The algorithm used for searching among trees is faster than it was in version 3.5, thanks to using a technique invented by David Swofford and J. S. Rogers. This involves not iterating most branch lengths on most trees when searching among tree topologies, This is of necessity a "quick-and-dirty" search but it saves much time.

OUTPUT FORMAT

The output starts by giving the number of species, the number of amino acid positions.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of positions is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten amino acids. The trees found are printed as a rooted tree topology. The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen.

A table is printed showing the length of each tree segment, and the time (in units of expected amino acid substitutions per position) of each fork in the tree, measured from the root of the tree. I have not attempted in include code for approximate confidence limits on branch points, as I have done for branch lengths in PROML, both because of the extreme crudeness of that test, and because the variation of times for different forks would be highly correlated.

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the relative rate of change in the active site and in the rest of the protein to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

This program makes possible a (reasonably) legitimate statistical test of the molecular clock. To do such a test, run PROML and PROMLK on the same data. If the trees obtained are of the same topology (when considered as unrooted), it is legitimate to compare their likelihoods by the likelihood ratio test. In PROML the likelihood has been computed by estimating 2n-3 branch lengths, if their are n tips on the tree. In PROMLK it has been computed by estimating n-1 branching times (in effect, n-1 branch lengths). The difference in the number of parameters is (2n-3)-(n-1) = n-2. To perform the test take the difference in log likelihoods between the two runs (PROML should be the higher of the two, barring numerical iteration difficulties) and double it. Look this up on a chi-square distribution with n-2 degrees of freedom. If the result is significant, the log likelihood has been significantly increased by allowing all 2n-3 branch lengths to be estimated instead of just n-1, and molecular clock may be rejected.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different amino acid positions, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across amino acid positions. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across amino acid positions are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring positions (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of expected numbers of amino acid substitutions, scaled so that the average rate of change, averaged over all the positions analyzed, is set to 1.0. if there are multiple categories of positions. This means that whether or not there are multiple categories of positions, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same position and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the amino acid sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what amino acid position categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each position which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). The symbol printed out is for the amino acid which accounts for the largest fraction of the likelihood at that position. In that table, if a position has an amino acid which accounts for more than 95% of the likelihood, its symbol printed in capital letters (W rather than w). One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed amino acids are based on only the single assignment of rates to positions which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

PROGRAM CONSTANTS

The constants defined at the beginning of the program include "maxtrees", the maximum number of user trees that can be processed. It is small (100) at present to save some further memory but the cost of increasing it is not very great. Other constants include "maxcategories", the maximum number of position categories, "namelength", the length of species names in characters, and three others, "smoothings", "iterations", and "epsilon", that help "tune" the algorithm and define the compromise between execution speed and the quality of the branch lengths found by iteratively maximizing the likelihood. Reducing iterations and smoothings, and increasing epsilon, will result in faster execution but a worse result. These values will not usually have to be changed.

The program spends most of its time doing real arithmetic. The algorithm, with separate and independent computations occurring for each pattern, lends itself readily to parallel processing.

PAST AND FUTURE OF THE PROGRAM

This program was developed in version 3.6 by Lucas Mix by combining code from DNAMLK and from PROML.


TEST DATA SET

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC


CONTENTS OF OUTPUT FILE (with all numerical options on)

(It was run with HMM rates having gamma-distributed rates approximated by 5 rate categories, with coefficient of variation of rates 1.0, and with patch length parameter = 1.5. Two user-defined rate categories were used, one for the first 6 positions, the other for the last 7, with rates 1.0 : 2.0. Weights were used, with sites 1 and 13 given weight 0, and all others weight 1.)


Amino acid sequence
   Maximum Likelihood method with molecular clock, version 3.6a3

 5 species,  13  sites

    Site categories are:

             1111112222 222


    Sites are weighted as follows:

             0111111111 111

Jones-Taylor-Thornton model of amino acid change


Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         ..G..C.... ..C
Gamma        C.TT.C.T.. C.A
Delta        GGTA.TT.GG CC.
Epsilon      GGGA.CT.GG CCC



Discrete approximation to gamma distributed rates
 Coefficient of variation of rates = 1.000000  (alpha = 1.000000)

State in HMM    Rate of change    Probability

        1           0.264            0.522
        2           1.413            0.399
        3           3.596            0.076
        4           7.086            0.0036
        5          12.641            0.000023

Expected length of a patch of sites having the same rate =    1.500


Site category   Rate of change

        1           1.000
        2           2.000






                                               +-----------------Epsilon   
     +-----------------------------------------4  
  +--3                                         +-----------------Delta     
  !  !  
--2  +-----------------------------------------------------------Gamma     
  !  
  !                                +--------------------------Beta      
  +--------------------------------1  
                                   +--------------------------Alpha     


Ln Likelihood =  -138.46858

 Ancestor      Node      Node Height     Length
 --------      ----      ---- ------     ------
 root            2      
   2             3          0.00010      0.00010
   3             4          6.92817      6.92807
   4          Epsilon       9.99990      3.07173
   4          Delta         9.99990      3.07173
   3          Gamma         9.99990      9.99980
   2             1          5.47444      5.47444
   1          Beta          9.99990      4.52546
   1          Alpha         9.99990      4.52546

Combination of categories that contributes the most to the likelihood:

             3333333333 333

Most probable category at each site if > 0.95 probability ("." otherwise)

             .......... ...



Probable sequences at interior nodes:

  node       Reconstructed sequence (caps if > 0.95)

    2        .AeDesDDdd eSe
    3        .AeDesDDDd eSe
    4        .GEDssDEDD ESs
 Epsilon     GGGATCTCGG CCC
 Delta       GGTATTTCGG CCT
 Gamma       CATTTCGTCA CAA
    1        .AeDEdDDds sSE
 Beta        AAGGTCGCCA AAC
 Alpha       AACGTGGCCA AAT

PHYLIPNEW-3.69.650/doc/draw.html0000664000175000017500000012321107712247475012675 00000000000000 main

version 3.6

DRAWTREE and DRAWGRAM

© Copyright 1986-2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DRAWTREE and DRAWGRAM are interactive tree-plotting programs that take a tree description in a file and read it, and then let you interactively make various settings and then plot the tree on a laser printer, plotter, or dot matrix printer. In many cases (with Macintosh or PC graphics, with X windows, or with a Tektronix-compatible graphics terminal) you can preview the resulting tree. This allows you to modify the tree until you like the result, then plot the result. DRAWTREE plots unrooted trees and DRAWGRAM plots rooted cladograms and phenograms. On good laser printers or as files for good drawing programs both can produce fully publishable results. On dot matrix printers the results look grainy but are good enough for overhead transparencies or slides for presentations.

These programs are descended from PLOTGRAM and PLOTREE written by Christopher Meacham. I have incorporated his code for fonts and his plotter drivers, and in DRAWTREE have used some of his code for drawing unrooted trees. In both programs I have also included some plotter driver code by David Swofford, Julian Humphries and George D.F. "Buz" Wilson, to all of whom I am very grateful. Mostly, however, they consist of my own code and that of my programmers. The font files are printable-character recodings of the public-domain Hershey fonts, recoded by Christopher Meacham.

This document will describe the features common to both programs. The documents for DRAWTREE and DRAWGRAM describe the particular choices you can make in each of those programs. The Appendix to this documentation file contains some pieces of C code that can be inserted to make the program handle another plotting device -- the plotters by Calcomp.

A Short Introduction

To use DRAWTREE and DRAWGRAM, you must have

(1)
The compiled version of the program. If you have not obtained a version of PHYLIP precompiled for your machine, you will have to take the source code given here and modify it for your C compiler and then compile it. This is not too hard: it is discussed below.

(2)
A tree file. Trees are described in the nested-parenthesis notation used throughout PHYLIP and standardized in an informal meeting of program authors in Durham, New Hampshire in June, 1986. Trees for both programs may be either bifurcating or multifurcating, and may either have or not have branch lengths. Tree files produced by the PHYLIP programs are in this form. There is further description of the tree file format later in this document.

(3)
A font file. There are six font files distributed with PHYLIP: these consist of three Roman, two Italic, and one Russian Cyrillic font, all from the public-domain Hershey Fonts, in ASCII readable form. The details of font representation need not concern you; all you need to do is to copy the font file corresponding to the font you want into the appropriate directory under the appropriate file name, and let the program use it. Or you can let the program ask you for the name of the font file, which it will do if it does not find one itself. The six fonts are, respectively, a one- and a two-stroke sans-serif Roman font, a three-stroke serifed Roman font, a two- and a three- stroke serifed Italic font, and a two-stroke Cyrillic font for the Russian language. If this is not clear just try them all. Note that for some printers several built-in fonts such as Times-Roman and Courier can be used too.

(4)
A plotting device, and if possible a screen on which you can preview the plot. The programs work with Postscript-compatible laser printers, laser printers compatible with the PCL printer language of the Hewlett-Packard Laserjet series, IBM PC graphics screens, the PICT format for the MacDraw drawing program, the PCX file format for the PC Paintbrush painting program, the file format for the freeware X-windows drawing programs xfig and idraw, the X Bitmap format for X-windows, plotters including Hewlett-Packard models, dot matrix printers including models by Epson and Apple, graphics terminals from DEC and Tektronix, the input format for the freeware ray-tracing (3-dimensional rendering) programs POV and rayshade, and, strangest and most wonderful of all, the Virtual Reality Markup Language (VRML) which is a file format that is used by freely-available virtual reality programs like Cosmo Player. You can choose the plotting and previewing devices from a menu at run time, and these can be different. There are places in the source code for the program where you can insert code for a new plotter, should you want to do that.

Once you have all these, the programs should be fairly self explanatory, particular if you can preview your plots so that you can discover the meaning of the different options by trying them out.

Once you have a compiled version of the appropriate program, say DRAWGRAM, and a file called (say) treefile with the tree in it, and a font file (say font2 which you have copied as a file called fontfile), all you do is run the program DRAWGRAM. It should automatically read the font and tree files, and will allow you to change the graphics devices. Then it will let you see the options it has chosen, and ask you if you want to change these. Once you have modified those that you want to, you can tell it to accept those. The program will then allow you to preview the tree on your screen, if you have told it that you have an appropriate graphics screen. After previewing the tree, the program will want to know whether you are ready to plot the tree. In Windows you answer this using the File menu of the preview window. In X Windows and Macintosh systems you can close the preview window by clicking on its corner. Whether or not you close it, if you get back to the text window that had the menus, and it accepts typing in that window, you will be asked whether you want to accept the plot as is. If you say no, it will once again allow you to change options and will the allow you to preview the tree again, and so on as many times as you want. If you say yes, then it will write a file called (say) plotfile. If you then copy this file to your printer or plotter, it should result in a beautifully plotted tree. If the final plotting device is a Macintosh or PC graphics screen, it may not write a plot file but will plot directly on the screen.

Having read the above, you may be ready to run the program. Below you Will find more information about representation of trees in the tree file, on the different kinds of graphics devices supported by this program, and on how to recompile these programs.

Trees

The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses, noticed in 1857 by the famous English mathematician Arthur Cayley. If we have this rooted tree:

                         A                 D
                          \         E     /
                           \   C   /     /
                            \  !  /     /
                             \ ! /     /
                        B     \!/     /
                         \     o     /
                          \    !    /
                           \   !   /
                            \  !  /
                             \ ! /
                              \!/
                               o
                               !
                               !

then in the tree file it is represented by the following sequence of printable characters, starting at the beginning of the file:

(B,(A,C,E),D);

The tree ends with a semicolon. Everything after the semicolon in the input file is ignored, including any other trees. The bottommost node in the tree is an interior node, not a tip. Interior nodes are represented by a pair of matched parentheses. Between them are representations of the nodes that are immediately descended from that node, separated by commas. In the above tree, the immediate descendants are B, another interior node, and D. The other interior node is represented by a pair of parentheses, enclosing representations of its immediate descendants, A, C, and E.

Tips are represented by their names. A name can be any string of printable characters except blanks, colons, semcolons, parentheses, and square brackets. In the programs a maximum of 20 characters are allowed for names: this limit can easily be increased by recompiling the program and changing the constant declaration for "MAXNCH" in phylip.h.

Because you may want to include a blank in a name, it is assumed that an underscore character ("_") stands for a blank; any of these in a name will be converted to a blank when it is read in. Any name may also be empty: a tree like

(,(,,),);

is allowed. Trees can be multifurcating at any level (while in many of the programs multifurcations of user-defined trees are not allowed or restricted to a trifurcation at the bottommost level, these programs do make any such restriction).

Branch lengths can be incorporated into a tree by putting a real number, with or without decimal point, after a node and preceded by a colon. This represents the length of the branch immediately below that node. Thus the above tree might have lengths represented as:

(B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0);

These programs will be able to make use of this information only if lengths exist for every branch, except the one at the bottom of the tree.

The tree starts on the first line of the file, and can continue to subsequent lines. It is best to proceed to a new line, if at all, immediately after a comma. Blanks can be inserted at any point except in the middle of a species name or a branch length.

The above description is of a subset of the Newick Standard. For example, interior nodes can have names in that standard, but if any are included the present programs will omit them.

To help you understand this tree representation, here are some trees in the above form:

((raccoon:19.19959,bear:6.80041):0.84600,((sea_lion:11.99700,
seal:12.00300):7.52973,((monkey:100.85930,cat:47.14069):20.59201,
weasel:18.87953):2.09460):3.87382,dog:25.46154);

(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460);

(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);

();

((A,B),(C,D));

(Alpha,Beta,Gamma,Delta,,Epsilon,,,);

The Newick Standard was adopted June 26, 1986 by an informal committee meeting during the Society for the Study of Evolution meetings in Durham, New Hampshire and consisting of James Archie, William H.E. Day, Wayne Maddison, Christopher Meacham, F. James Rohlf, David Swofford, and myself. A web page describing it will be found at http://evolution.gs.washington.edu/phylip/newicktree.html.

Plotter file formats

When the programs run they have a menu which allows you to set (on its option P) the final plotting device, and another menu which allows you to set the type of preview screen. The choices for previewing are a subset of those available for plotting, and they can be different (the most useful combination will usually be a previewing graphics screen with a hard-copy plotter or a drawing program graphics file format).

The plotting device menu looks like this:

   type:       to choose one compatible with:

        L         Postscript printer file format
        M         PICT format (for drawing programs)
        J         HP Laserjet PCL file format
        W         MS-Windows Bitmap
        F         FIG 2.0 drawing program format          
        A         Idraw drawing program format            
        Z         VRML Virtual Reality Markup Language file
        P         PCX file format (for drawing programs)
        K         TeKtronix 4010 graphics terminal
        X         X Bitmap format
        V         POVRAY 3D rendering program file
        R         Rayshade 3D rendering program file
        H         Hewlett-Packard pen plotter (HPGL file format)
        D         DEC ReGIS graphics (VT240 terminal)
        E         Epson MX-80 dot-matrix printer
        C         Prowriter/Imagewriter dot-matrix printer
        T         Toshiba 24-pin dot-matrix printer
        O         Okidata dot-matrix printer
        B         Houston Instruments plotter
        U         other: one you have inserted code for
 Choose one: 

Here are the choices, with some comments on each:

Postscript printer file format. This means that the program will generate a file containing Postscript commands as its plot file. This can be printed on any Postscript-compatible laser printer. The page size is assumed to be 8.5 by 11 inches, but as plotting is within this limit A4 metric paper should work well too. This is the best quality output option. For this printer the menu options in DRAWGRAM and DRAWTREE that allow you to select one of the built-in fonts will work. The programs default to Times-Roman when this plotting option is in effect. I have been able to use fonts Courier, Times-Roman, and Helvetica. The others have eluded me for some reason known only to those who really understand Postscript.

If your laser printer, supposedly Postcript-compatible, refuses to print the plot file, you might consider whether the first line of the plot file, which starts with %! needs to be altered somehow or eliminated. If your Laserwriter is hooked to a Macintosh it will be necessary to persuade it to print the plot file. In recent versions of the Macintosh operating systems this can supposedly be done by dragging the file icon onto the printer icon on the desktop. In earlier versions of the MacOS operating system you might have to use a utility called the Laserwriter Font Utility, which was distributed with the operating system.

PICT format (for drawing programs). This file format is read by many drawing programs (an early example was MacDraw). It has support for some fonts, though if fonts are used the species names can only be drawn horizontally or vertically, not at other angles in between. The control over line widths is a bit rough also, so that some lines at different angles may turn out to be different widths when you do not want them to be. If you are working on a Macintosh system and have not been able to persuade it to print a Postscript file, this option may be the best solution, as you could then read the file into a drawing program and then order it to print the resulting screen. The PICT file format has font support, and the default font for this plotting option is set to Times. You can also choose font attributes for the labels such as Bold, Italic, Outline, and Shadowed.

HP Laserjet PCL file format. Hewlett-Packard's extremely popular line of laser printers has been emulated by many other brands of laser printer, so that this format is compatible with more printers than any other. One limitation of the PCL4 command language for these printers is that it does not have primitive operations for drawing arbitrary diagonal lines. This means that they must be treated by these programs as if they were dot matrix printers with a great many dots. This makes output files large, and output can be slow. The user will be asked to choose the dot resoluton (75, 150, or 300 dots per inch). The 300 dot per inch setting should not be used if the laser printer's memory is less than 512k bytes. The quality of output is also not as good as it might be so that the Postscript file format will usually produce better results even at the same resolution. I am grateful to Kevin Nixon for inadvertently pointing out that on Laserjets one does not have to dump the complete bitmap of a page to plot a tree.

MS-Windows Bitmap. This file format is used by most Windows drawing and paint programs, including Windows Paint which comes with the Windows operating system. It asks you to choose the height and width of the graphic image in pixels. For the moment, the image is set to be a monochrome image which can only be black or white. We hope to change that soon, but note that by pasting the image into a copy of Paint that is set to have a color image of the appropriate size, one can get a version whose color can be changed. Note also that Windows Bitmap files can be used as "wallpaper" images for the background of a desktop.

IBM PC graphics screens. The code for this in the programs is available in the precompiled PC executables or if you compile the programs yourself in C. The graphics modes supported are CGA, EGA, VGA, Hercules, and AT&T (Olivetti). This option is also available for previewing plots, and in either previewing or final plotting it draws directly on the screen and does not make a plot file.

FIG 2.0 drawing program format. This is the file format of the free drawing program Xfig, available for X-windows systems on Unix or Linux systems. Xfig can be obtained from http://duke.usask.ca/~macphed/soft/fig/

You should also get transfig, which contains the fig2dev program which converts xfig output to the various printer languages. Transfig is on the same machine in

    /contrib/R5fixes/transfig-patches/transfig.2.1.6.tar.Z.

The present format does not write the species labels in fonts recognized by Xfig but draws them with lines. This often makes the names look rather bumpy. We hope to change this soon.

Idraw drawing program format. Idraw is a free drawing program for X windows systems (such as Unix and Linux systems). Its interface is loosely based on MacDraw, and I find it much more useable than Xfig. Though it was unsupported for a number of years, it has more recently been actively supported by Scott Johnston, of Vectaport, Inc. (http://www.vectaport.com). He has produced, in his ivtools package, a number of specialized versions of Idraw, and he also distributes the original Idraw as part of it. Linux executables for all these are available from Vectaport, or from various archive machines.

The Idraw file format that our programs produce can be read into Idraw, or can be imported into the other Ivtools programs. The file format saved from Idraw (or which can be exported from the other Ivtools programs) is Postscript, and if one does not print directly from Idraw one can simply send the file to the printer. But the format we produce is missing some of the header information and will not work directly as a Postscript file. However if you read it into Idraw and then save it (or import it into one of the other Ivtools programs and then export it) you will get a Postscript version that is fully useable.

DRAWGRAM and DRAWTREE have font support in their Idraw file format options. The default font is Times-Bold but you can also enter the name of any other font that is supported by your Postscript printer. Idraw labels can be rotated to any angle.

VRML Virtual Reality Markup Language file. This is by far the most interesting plotting file format. VRML files describe objects in 3-dimensional space with lighting on them. A number of freely available "virtual reality browsers" such as Cosmo Player can read VRML files. A list of available virtual reality browsers and browser plugins can be found at http://www.web3d.org/vrml/browpi.htm. These allow you to wander around looking at the tree from various angles, including from behind! At the moment our VRML output is primitive, with labels that always look the same no matter what angle you look at them from, and with a "sun" that is always behind you. The tree is made of three-dimensional tubes but is basically flat. We hope to change these soon. What's next? Trees whose branches stick out in three dimensions? Animated trees whose forks rotate slowly? A video game involving combat among schools of systematists?

PCX file format (for drawing programs). A bitmap format that was formerly much used on the PC platform, this has been largely superseded by the Windows Bitmap (BMP) format, but it is still useful. This file format is simple and is read by many other programs as well. The user must choose one of three resolutions for the file, 640x480, 800x600, or 1024x768. The file is a monochrome paint file. Our PCX format is correct but is not read correctly by versions of Microsoft Paint (PBrush) that are running on systems that have loaded Word97.

Tektronix 4010 graphics terminal. The plot file will contain commands for driving the Tektronix series of graphics terminals. Other graphics terminals were compatible with the Tektronix 4010 and its immediate descendants. The PCDOS version of the public domain communications program Kermit, versions 2.30 and later, can emulate a Tektronix graphics terminal if the command "set terminal tek" is given. Of course that assumes that you are communicating with another computer. There are also similar terminal emulation programs for Macintoshes that emulate Tektronix graphics. On workstations with X windows you can use one option of the "xterm" utility to create a Tektronix-compatible window. On Sun workstations there used to be a Tektronix emulator you can run called "tektool" which can be used to view the trees. The Tektronix option is also available in our programs for previewing the plots, in which case the plotting commands will be not be written into a file but will be sent directly to your terminal.

X Bitmap format. This produces an X-bitmap for the X Windows system on Unix or Linux systems, which can be displayed on X screens. You will be asked for the size of the bitmap (e.g., 16x16, or 256x256, etc.). This format cannot be printed out without further format conversion but is usable for backgrounds of windows ("wallpaper"). This can be a very bulky format if you choose a large bitmap. The bitmap is a structure that can actually be compiled into a C program (and thus built in to it), if you should have some reason for doing that.

POVRAY 3D rendering program file. This produces a file for the free ray-tracing program POVRay (Persistence of Vision Raytracer), which is available at http://www.povray.org/. It shows a tree floating above a flat landscape. The tree is flat but made out of tubes (as are the letters of the species names). It casts a realistic shadow across the landscape. lit from over the left shoulder of the viewer. You will be asked to confirm the colors of the tree branches, the species names, the background, and the bottom plane. These default to Blue, Yellow, White, and White respectively.

Rayshade 3D rendering program file. The input format for the free ray-tracing program "rayshade" which is available at http://www-graphics.stanford.edu/~cek/rayshade/rayshade.html for many kinds of systems. Rayshade takes files of this format and turns them into color scenes in "raw" raster format (also called "MTV" format after a raytracing program of that name). If you get the pbmplus package (available from http://sourceforge.net/projects/netpbm/). and compile it on your system you can use the "mtvtoppm" and "ppmtogif" programs to convert this into the widely-used GIF raster format. (the pbmplus package will also allow you to convert into tiff, pcx and many other formats) The resultant image will show a tree floating above a landscape, rendered in a real-looking 3-dimensional scene with shadows and illumination. It is possible to use Rayshade to make two scenes that together are a stereo pair. When producing output for Rayshade you will be asked by the DRAWGRAM or DRAWTREE whether you want to reset the values for the colors you want for the tree, the species names, the background, and the desired resolution.

Hewlett-Packard pen plotter (HPGL file format). This means that the program will generate a file as its plot file which uses the HPGL graphics language. Hewlett-Packard 7470, 7475, and many other plotters are compatible with this. The paper size is again assumed to be 8.5 by 11 inches (again, A4 should work well too). It is assumed that there are two pens, a finer one for drawing names, and the HPGL commands will call for switching between these. The Hewlett-Packard Laserjet III printer can emulate an HP plotter, and this feature is included in its PCL5 command language (but not in the PCL4 command languages of earlier Hewlett-Packard models). As plotters are now rare the main use of HPGL will be when they are emulated by laser printers, but other file formats such as PCL and Postscript will be better choices in those cases.

DEC ReGIS graphics (VT240 terminal). The DEC ReGIS standard is used by the VT240 and VT340 series terminals by DEC (Digital Equipment Corporation). There used to be many graphics terminals that emulate the VT240 or VT340 as well. The DECTerm windows in many versions of Digital's (now Compaq's) DECWindows windowing system do so. This option is available in our programs for previewing trees as well. In preview mode it does not write a plot file but sends the commands directly to the screen; in final mode it writes a plot file. In DEC's version of Unix, Ultrix version 4.1 and later, the windowing system allows DEC ReGIS graphics as a default.

Epson MX-80 dot-matrix printer. This file format is for the dot-matrix printers by Epson (starting with the MX80 and continuing on to many other models), as well as the IBM Graphics printers. The code here plots in double-density graphics mode. Many of the later models are capable of higher-density graphics but not with every dot printed. This density was chosen for reasonably wide compatibility. Many other dot-matrix printers on the market have graphics modes compatible with the Epson printers. I cannot guarantee that the plot files generated by these programs will be compatible with all of these, but they do work on Epsons. They have also worked, in our hands, on IBM Graphics Printers. There used to be many printers that claimed compatibility with these too, but I do not know whether it will work on all of them. If you have trouble with any of these you might consider trying in the epson option of procedure initplotter to put in a fprintf statement that writes to plotfile an escape sequence that changes line spacing. As dot matrix printers are rare these days, I suspect this option will not get much testing.

Prowriter/Imagewriter dot-matrix printer. The trading firm C. Itoh distributed this line of dot-matrix printers, which was made by Tokyo Electric (TEC) and also was sold by NEC under the product number PC8023. These were 9-pin dot matrix printers. In a slightly modified form they were also the Imagewriter printer sold by Apple for their Macintosh line. The same escape codes seem to work on both machines, the Apple version being a serial interface version. They are not related to the IBM Proprinter, despite the name.

Toshiba 24-pin dot-matrix printer. The 24-pin printers from Toshiba were covered by this option. These included the P1340, P1350, P1351, P351, 321, and later models. For a 24-pin printer the plot file can get fairly large as it contains a bit map of the image and there are more bits with a 24-pin image. Printing was usually slow.

Okidata dot-matrix printer. The ML81, 82, 83 and ML181, 182, 183 line of dot-matrix printers from Okidata had their own graphics codes and those are dealt with by this option. The later Okidata ML190 series emulated IBM Graphics Printers so that you would not want to use this option for them but the option for that printer.

Houston Instruments plotter. The Houston Instruments line of plotters were also known as Bausch and Lomb plotters. The code in the programs for these has not been tested recently; I would appreciate anyone who tries it out telling me whether it works. I do not have access to such a plotter myself, and doubt most users will come across one.

Conversion from these formats to others is also possible. There is a free program by Jef Poskanzer called "PBMPLUS" that interconverts many bitmap formats (see above under Rayshade).

Drivers for Preview of Plots

Plots may be previewed in a number of formats which are chosen using the menu option. Previewing defaults to different drivers depending on which kind of system you are running the programs on. For Unix or Linux systems it defaults to X Windows, for Windows systems to Windows graphics, and for Macintosh systems to macintosh graphics screens.

We have already mentioned (above) some of the options that are also used for previewing. These include:

MSDOS Graphics Screens. These were mentioned above as possible output images.

Macintosh graphics screens. Using the windowing features of Codewarrior C from Metrowerks, our Macintosh executables open a graphics window and draw preview trees in it. We have not provided this option for final plotting of the tree. The window is about 2/3 the height of the desktop screen and has the tree drawn in black on a white background. After the preview appears, you can dismiss the window by closing it using the usual little box in its corner, or by typing Command-Q.

X Windows display. Our Unix and Linux code tries to do previews in X Windows. We hope that the Unix/Linux Makefle will find the correct libraries to link from. An X window appears with the preview of the tree in it. To dismiss this window one needs to put the mouse over the text window that had the menus in it (or click on them) and then type Y or N to plot the tree or return to the menu.

MS Windows display. The executables produced using the Cygwin Gnu C++ compiler should produce this graphics preview window. The preview window can be dismissed using its File menu. In its menu the Change Parameters otion will lead you back to the text menu to make more changes, and the Plot option will cause the final plot file to be written. The Quit option will interrupt the program, causing no plot file to b produced. Normally you will not want to use that option.

Tektronix 4010 graphics terminal. This previewing option was described above as a final plot option.

DEC ReGIS graphics (VT240 terminal). This previewing option was described above as a final plot option.

Problems Copying Files to Printers

A problem may arose in how to get the plot files to the plotting device or printer. One has to copy them directly, but one should be careful to not let your serial or parallel port strip off the high-order bits in the bytes if you are using one of the options that generate nonprintable characters. This will be true for most of the dot matrix printers and for bitmaps dumped to an HP Laserjet-compatible printer. This can be a problem under Unix or MSDOS. If, for example, you have a dot-matrix printer connected to a parallel port under PCDOS, to copy the file PLOTFILE to the printer without losing the high-order bits, you must use the /B switch on the COPY command:

  COPY/B PLOTFILE PRN:

The VAX VMS Line Length Problem

A problem that may occur under some operating systems, particularly the VMS operating system for Digital VAXes, is having a plot file with lines that exceed some operating system limit such as 255 characters. This can happen if you are using the Tektronix option. You should set your terminal type with the command

   $ SET TERM/NOWRAP/ESCAPE

which will allow Tektronix and DEC ReGIS plots to successfully appear on your terminal. That way, if you have a terminal capable of plotting one of these kinds of plots, the operating system will not interfere with the process. It will not be possible to use files of Tektronix commands as final plot files, however, as the TYPE command usually used to get them to appear on the screen does not allow lines longer than 2048 bytes, and Tektronix plots are single lines longer than that.

Other problems and opportunities

Another problem is adding labels (such as vertical scales and branch lengths) to the plots produced by this program. This may require you to use the BMP, PICT, Idrawm, Xfig, PCX or Postscript file format and use a draw or paint program to add them.

I would like to add more fonts. The present fonts are recoded versions of the Hershey fonts. They are legally publicly distributable. Most other font families on the market are not public domain and I cannot afford to license them for distribution. Some people have noticed that the Hershey fonts, which are drawn by a series of straight lines, have noticeable angles in what are supposed to be curves, when they are printed on modern laser printers and looked at closely. This is less a problem than one might think since, fortunately, when scientific journals print a tree it is usually shrunk so small that these imperfections (and often the tree itself) are hard to see!

One more font that could be added from the Hershey font collection would be a Greek font. If Greek users would find that useful I could add it, but my impression is that they publish mostly in English anyway.

Writing Code for a new Plotter, Printer or File Format

The C version of these programs consists of two C programs, "drawgram.c" and "drawtree.c". Each of these has common sections of code compiled into it called "draw.c", "draw2.c" and a common header file, "draw.h". In addition the Macintosh version requires two more files, "interface.c" and "interface.h". All of the graphics commands that are common to both programs will be found in "draw.c" and "draw2.c". The following instructions for writing your own code to drive a different kind of printer, plotter, or graphics file format, require you only to make changes in "drawgraphics.c". The two programs can then be recompiled.

If you want to write code for other printers, plotters, or vector file formats, this is not too hard. The plotter option "U" is provided as a place for you to insert your own code. Chris Meacham's system was to draw everything, including the characters in the names and all curves, by drawing a series of straight lines. Thus you need only master your plotter's commands for drawing straight lines. In function "plotrparms" you must set up the values of variables "xunitspercm" and "yunitspercm", which are the number of units in the x and y directions per centimeter, as well as variables "xsize" and "ysize" which are the size of the plotting area in centimeters in the x direction and the y direction. A variable "penchange" of a user-defined type is set to "yes" or "no" depending on whether the commands to change the pen must be issued when switching between plotting lines and drawing characters. Even though dot-matrix printers do not have pens, penchange should be set to "yes" for them. In function "plot" you must issue commands to draw a line from the current position (which is at (xnow, ynow) in the plotter's units) to the position (xabs, yabs), under the convention that the lower-left corner of the plotting area is (0.0, 0.0). In functions "initplotter" and "finishplotter" you must issue commands to initialize the plotter and to finish plotting, respectively. If the pen is to be changed an appropriate piece of code must be inserted in function "penchange".

For dot matrix printers and raster graphics matters are a bit more complex. The functions "plotrparms", "initplotter", "finishplotter" and "plot" still respectively set up the parameters for the plotter, initialize it, finish a plot, and plot one line. But now the plotting consists of drawing dots into a two-dimensional array called "stripe". Once the plot is finished this array is printed out. In most cases the array is not as tall as a full plot: instead it is a rectangular strip across it. When the program has finished drawing in ther strip, it prints it out and then moves down the plot to the next strip. For example, for Hewlett-Packard Laserjets we have defined the strip as 2550 dots wide and 20 dots deep. When the program goes to draw a line, it draws it into the strip and ignores any part of it that falls outside the strip. Thus the program does a complete plotting into the strip, then prints it, then moves down the diagram by (in this case) 20 dots, then does a complete plot into that strip, and so on.

To work with a new raster or dot matrix format, you will have to define the desired width of a strip ("strpwide"), the desired depth ("strpdeep"), and how many lines of bytes must be printed out to print a strip. For example Toshiba P351 printers in graphics mode print strips of dots 1350 bits wide by 24 bits deep, each column of 24 bits printing out as consecutive four bytes with 6 bits each. In that case, one prints out a strip by printing up to 1350 groups of 4 bytes. "strpdiv" is 4, and "strpwide" is 1350, and "strpdeep" is 24. Procedure "striprint" is the one that prints out a strip, and has special-case code for the different printers and file formats. For file formats, all of which print out a single row of dots at a time, the variable "strpdiv" is not used. The variable "dotmatrix" is set to "true" or "false" in function "plotrparms" according to whether or not "strpdiv" is to be used. Procedure "plotdot" sets a single dot in the array "strip" to 1 at position (xabs, yabs). The coordinates run from 1 at the top of the plot to larger numbers as we proceed down the page. Again, there is special-case code for different printers and file formats in that function. You will probably want to read the code for some of the dot matrix or file format options if you want to write code for one of them. Many of them have provision for printing only part of a line, ignoring parts of it that have no dots to print.

I would be happy to obtain the resulting code from you to consider adding it to this listing so we can cover more kinds of plotters, printers, and file formats.


APPENDIX 1.

Code to drive some other graphics devices. These pieces of code are to be inserted in the places reserved for the "Y" plotter option. The variables necessary to run this have already been incorporated into the programs.

Calcomp plotters:

Calcomp's industrial-strength plotters are not as much a fixture of University computer centers as they one were, but just in case you need to use one, this code should work:

A global declaration needed near the front of drawtree.c:

Char cchex[16];

Code to be inserted into function plotrparms:

  case 'Y':
    plotter = other;
    xunitspercm = 39.37;
    yunitspercm = 39.37;
    xsize = 25.0;
    ysize = 25.0;
    xposition = 12.5;
    yposition = 0.0;
    xoption = center;
    yoption = above;
    rotation = 0.0;
    break;

Code to be inserted into function plot:

Declare these variables at the beginning of the function:

long n, inc, xinc, yinc, xlast, ylast, xrel,
   yrel, xhigh, yhigh, xlow, ylow;
Char quadrant;

and insert this into the switch statement:

  case other:
    if (penstatus == pendown)
      putc('H', plotfile);
    else
      putc('D', plotfile);
    xrel = (long)floor(xabs + 0.5) - xnow;
    yrel = (long)floor(yabs + 0.5) - ynow;
    xnow = (long)floor(xabs + 0.5);
    ynow = (long)floor(yabs + 0.5);
    if (xrel > 0) {
      if (yrel > 0)
	quadrant = 'P';
      else
	quadrant = 'T';
    } else if (yrel > 0)
      quadrant = 'X';
    else
      quadrant = '1';
    xrel = labs(xrel);
    yrel = labs(yrel);
    if (xrel > yrel)
      n = xrel / 255 + 1;
    else
      n = yrel / 255 + 1;
    xinc = xrel / n;
    yinc = yrel / n;
    xlast = xrel % n;
    ylast = yrel % n;
    xhigh = xinc / 16;
    yhigh = yinc / 16;
    xlow = xinc & 15;
    ylow = yinc & 15;
    for (i = 1; i <= n; i++)
      fprintf(plotfile, "%c%c%c%c%c",
	      quadrant, cchex[xhigh - 1], cchex[xlow - 1], cchex[yhigh - 1],
	      cchex[ylow - 1]);
    if (xlast != 0 || ylast != 0)
      fprintf(plotfile, "%c%c%c%c%c",
	      quadrant, cchex[-1], cchex[xlast - 1], cchex[-1],
	      cchex[ylast - 1]);
    break;

Code to be inserted into function initplotter:

  case other:
    cchex[-1] = 'C';
    cchex[0] = 'D';
    cchex[1] = 'H';
    cchex[2] = 'L';
    cchex[3] = 'P';
    cchex[4] = 'T';
    cchex[5] = 'X';
    cchex[6] = '1';
    cchex[7] = '5';
    cchex[8] = '9';
    cchex[9] = '/';
    cchex[10] = '=';
    cchex[11] = '#';
    cchex[12] = '"';
    cchex[13] = '\'';
    cchex[14] = '^';
    xnow = 0.0;
    ynow = 0.0;
    fprintf(plotfile, "CCCCCCCCCC");
    break;

Code to be inserted into function finishplotter:

  case other:
    plot(penup, 0.0, yrange + 50.0);
    break;
PHYLIPNEW-3.69.650/doc/protpars.html0000664000175000017500000003336507712247475013624 00000000000000 protpars
version 3.6

PROTPARS -- Protein Sequence Parsimony Method

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program infers an unrooted phylogeny from protein sequences, using a new method intermediate between the approaches of Eck and Dayhoff (1966) and Fitch (1971). Eck and Dayhoff (1966) allowed any amino acid to change to any other, and counted the number of such changes needed to evolve the protein sequences on each given phylogeny. This has the problem that it allows replacements which are not consistent with the genetic code, counting them equally with replacements that are consistent. Fitch, on the other hand, counted the minimum number of nucleotide substitutions that would be needed to achieve the given protein sequences. This counts silent changes equally with those that change the amino acid.

The present method insists that any changes of amino acid be consistent with the genetic code so that, for example, lysine is allowed to change to methionine but not to proline. However, changes between two amino acids via a third are allowed and counted as two changes if each of the two replacements is individually allowed. This sometimes allows changes that at first sight you would think should be outlawed. Thus we can change from phenylalanine to glutamine via leucine in two steps total. Consulting the genetic code, you will find that there is a leucine codon one step away from a phenylalanine codon, and a leucine codon one step away from glutamine. But they are not the same leucine codon. It actually takes three base substitutions to get from either of the phenylalanine codons TTT and TTC to either of the glutamine codons CAA or CAG. Why then does this program count only two? The answer is that recent DNA sequence comparisons seem to show that synonymous changes are considerably faster and easier than ones that change the amino acid. We are assuming that, in effect, synonymous changes occur so much more readily that they need not be counted. Thus, in the chain of changes TTT (Phe) --> CTT (Leu) --> CTA (Leu) --> CAA (Glu), the middle one is not counted because it does not change the amino acid (leucine).

To maintain consistency with the genetic code, it is necessary for the program internally to treat serine as two separate states (ser1 and ser2) since the two groups of serine codons are not adjacent in the code. Changes to the state "deletion" are counted as three steps to prevent the algorithm from assuming unnecessary deletions. The state "unknown" is simply taken to mean that the amino acid, which has not been determined, will in each part of a tree that is evaluated be assumed be whichever one causes the fewest steps.

The assumptions of this method (which has not been described in the literature), are thus something like this:

  1. Change in different sites is independent.
  2. Change in different lineages is independent.
  3. The probability of a base substitution that changes the amino acid sequence is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.
  6. The probability of a base change that is synonymous is much higher than the probability of a change that is not synonymous.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the works by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986).

The input for the program is fairly standard. The first line contains the number of species and the number of amino acid positions (counting any stop codons that you want to include).

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The protein sequences are given by the one-letter code used by described in the Molecular Sequence Programs documentation file. Note that if two polypeptide chains are being used that are of different length owing to one terminating before the other, they should be coded as (say)

             HIINMA*????
             HIPNMGVWABT

since after the stop codon we do not definitely know that there has been a deletion, and do not know what amino acid would have been there. If DNA studies tell us that there is DNA sequence in that region, then we could use "X" rather than "?". Note that "X" means an unknown amino acid, but definitely an amino acid, while "?" could mean either that or a deletion. The distinction is often significant in regions where there are deletions: one may want to encode a six-base deletion as "-?????" since that way the program will only count one deletion, not six deletion events, when the deletion arises. However, if there are overlapping deletions it may not be so easy to know what coding is correct.

One will usually want to use "?" after a stop codon, if one does not know what amino acid is there. If the DNA sequence has been observed there, one probably ought to resist putting in the amino acids that this DNA would code for, and one should use "X" instead, because under the assumptions implicit in this parsimony method, changes to any noncoding sequence are much easier than changes in a coding region that change the amino acid, so that they shouldn't be counted anyway!

The form of this information is the standard one described in the main documentation file. For the U option the tree provided must be a rooted bifurcating tree, with the root placed anywhere you want, since that root placement does not affect anything.

The options are selected using an interactive menu. The menu looks like this:

Protein parsimony algorithm, version 3.6

Setting for this run:
  U                 Search for best tree?  Yes
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  T              Use Threshold parsimony?  No, use ordinary parsimony
  C               Use which genetic code?  Universal
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, VT52, ANSI)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4          Print out steps in each site  No
  5  Print sequences at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options U, J, O, T, W, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs. Option C allows the user to select among various nuclear and mitochondrial genetic codes. There is no provision for coping with data where different genetic codes have been used in different organisms.

In the U (User tree) option, the trees should not be preceded by a line with the number of trees on it.

Output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each position. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across positions. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual positions, and a conclusion as to whether that tree is or is not significantly worse than the best one.

The program is derived from MIX but has had some rather elaborate bookkeeping using sets of bits installed. It is not a very fast program but is speeded up substantially over version 3.2.


TEST DATA SET

     5    10
Alpha     ABCDEFGHIK
Beta      AB--EFGHIK
Gamma     ?BCDSFG*??
Delta     CIKDEFGHIK
Epsilon   DIKDEFGHIK


CONTENTS OF OUTPUT FILE (with all numerical options on)


Protein parsimony algorithm, version 3.6



     3 trees in all found




     +--------Gamma     
     !  
  +--2     +--Epsilon   
  !  !  +--4  
  !  +--3  +--Delta     
  1     !  
  !     +-----Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     16.000

steps in each position:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       3   1   5   3   2   0   0   2   0
   10!   0                                    

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)


         1                ANCDEFGHIK 
  1      2         no     .......... 
  2   Gamma        yes    ?B..S..*?? 
  2      3         yes    ..?....... 
  3      4         yes    ?IK....... 
  4   Epsilon     maybe   D......... 
  4   Delta        yes    C......... 
  3   Beta         yes    .B--...... 
  1   Alpha       maybe   .B........ 





           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     16.000

steps in each position:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       3   1   5   3   2   0   0   2   0
   10!   0                                    

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)


         1                ANCDEFGHIK 
  1      2         no     .......... 
  2      3        maybe   ?......... 
  3      4         yes    .IK....... 
  4   Epsilon     maybe   D......... 
  4   Delta        yes    C......... 
  3   Gamma        yes    ?B..S..*?? 
  2   Beta         yes    .B--...... 
  1   Alpha       maybe   .B........ 





           +--Epsilon   
     +-----4  
     !     +--Delta     
  +--3  
  !  !     +--Gamma     
  1  +-----2  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     16.000

steps in each position:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       3   1   5   3   2   0   0   2   0
   10!   0                                    

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)


         1                ANCDEFGHIK 
  1      3         no     .......... 
  3      4         yes    ?IK....... 
  4   Epsilon     maybe   D......... 
  4   Delta        yes    C......... 
  3      2         no     .......... 
  2   Gamma        yes    ?B..S..*?? 
  2   Beta         yes    .B--...... 
  1   Alpha       maybe   .B........ 


PHYLIPNEW-3.69.650/doc/dolmove.html0000664000175000017500000004466707712247475013426 00000000000000 dolmove

version 3.6

DOLMOVE -- Interactive Dollo and Polymorphism Parsimony

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DOLMOVE is an interactive parsimony program which uses the Dollo and Polymorphism parsimony criteria. It is inspired on Wayne Maddison and David Maddison's marvellous program MacClade, which is written for Apple MacIntosh computers. DOLMOVE reads in a data set which is prepared in almost the same format as one for the Dollo and polymorhism parsimony program DOLLOP. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different characters and the way their states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different characters are affected by changes in the tree topology.

This program is compatible with fewer computer systems than the other programs in PHYLIP. It can be adapted to PCDOS systems or to any system whose screen or terminals emulate DEC VT100 terminals (such as Telnet programs for logging in to remote computers over a TCP/IP network, VT100-compatible windows in the X windowing system, and any terminal compatible with ANSI standard terminals). For any other screen types, there is a generic option which does not make use of screen graphics characters to display the character states. This will be less effective, as the states will be less easy to see when displayed.

The input data file is set up almost identically to the data files for DOLLOP.

The user interaction starts with the program presenting a menu. The menu looks like this:


Interactive Dollo or polymorphism parsimony, version 3.6a3

Settings for this run:
  P                        Parsimony method?  Dollo
  A                     Use ancestral states?  No
  F                  Use factors information?  No
  W                           Sites weighted?  No
  T                 Use Threshold parsimony?  No, use ordinary parsimony
  A      Use ancestral states in input file?  No
  U Initial tree (arbitrary, user, specify)?  Arbitrary
  0      Graphics type (IBM PC, ANSI, none)?  (none)
  L               Number of lines on screen?  24
  S                Width of terminal screen?  80


Are these settings correct? (type Y or the letter for one to change)

The P (Parsimony Method) option is the one that toggles between polymorphism parsimony and Dollo parsimony. The program defaults to Dollo parsimony.

The T (Threshold), F (Factors), A (Ancestors), and 0 (Graphics type) options are the usual ones and are described in the main documentation page and in the Discrete Characters Program documentation page. (Note: at present DOLMOVE actully does not use the A (Ancestral states) information). The F (Factors) option is used to inform the program which groups of characters are to be counted together in computing the number of characters compatible with the tree. Thus if three binary characters are all factors of the same multistate character, the multistate character will be counted as compatible with the tree only if all three factors are compatible with it.

The L option allows the program to take advantage of larger screens if available. The X (Mixed Methods option is not available in DOLMOVE. The U (initial tree) option allows the user to choose whether the initial tree is to be arbitrary, interactively specified by the user, or read from a tree file. Typing U causes the program to change among the three possibilities in turn. I would recommend that for a first run, you allow the tree to be set up arbitrarily (the default), as the "specify" choice is difficult to use and the "user tree" choice requires that you have available a tree file with the tree topology of the initial tree. Its default name is intree. The program will ask you for its name if it looks for the input tree file and does not find one of this name. If you wish to set up some particular tree you can also do that by the rearrangement commands specified below. The T (threshold) option allows a continuum of methods between parsimony and compatibility. Thresholds less than or equal to 0 do not have any meaning and should not be used: they will result in a tree dependent only on the input order of species and not at all on the data! Note that the usual W (Weights) option is not available in MOVE. We hope to add it soon.

After the initial menu is displayed and the choices are made, the program then sets up an initial tree and displays it. Below it will be a one-line menu of possible commands, which looks like this:

NEXT? (Options: R # + - S . T U W O F C H ? X Q) (H or ? for Help)

If you type H or ? you will get a single screen showing a description of each of these commands in a few words. Here are slightly more detailed descriptions:

R
("Rearrange"). This command asks for the number of a node which is to be removed from the tree. It and everything to the right of it on the tree is to be removed (by breaking the branch immediately below it). The command also asks for the number of a node below which that group is to be inserted. If an impossible number is given, the program refuses to carry out the rearrangement and asks for a new command. The rearranged tree is displayed: it will often have a different number of steps than the original. If you wish to undo a rearrangement, use the Undo command, for which see below.

#
This command, and the +, - and S commands described below, determine which character has its states displayed on the branches of the trees. The initial tree displayed by the program does not show states of sites. When # is typed, the program does not ask the user which character is to be shown but automatically shows the states of the next binary character that is not compatible with the tree (the next character that does not perfectly fit the current tree). The search for this character "wraps around" so that if it reaches the last character without finding one that is not compatible with the tree, the search continues at the first character; if no incompatible character is found the current character is shown, and if no current character is shown then the first character is shown. If the last character has been reached, using + again causes the first character to be shown. The display takes the form of different symbols or textures on the branches of the tree. The state of each branch is actually the state of the node above it. A key of the symbols or shadings used for states 0, 1 and ? are shown next to the tree. State ? means that either state 0 or state 1 could exist at that point on the tree, and that the user may want to consider the different possibilities, which are usually apparent by inspection.
+
This command is the same as # except that it goes forward one character, showing the states of the next character. If no character has been shown, using + will cause the first character to be shown. Once the last character has been reached, using + again will show the first character.

-
This command is the same as + except that it goes backwards, showing the states of the previous character. If no character has been shown, using - will cause the last character to be shown. Once character number 1 has been reached, using - again will show the last character.

S
("Show"). This command is the same as + and - except that it causes the program to ask you for the number of a character. That character is the one whose states will be displayed. If you give the character number as 0, the program will go back to not showing the states of the characters.

. (dot)
This command simply causes the current tree to be redisplayed. It is of use when the tree has partly disappeared off of the top of the screen owing to too many responses to commands being printed out at the bottom of the screen.

T
("Try rearrangements"). This command asks for the name of a node. The part of the tree at and above that node is removed from the tree. The program tries to re-insert it in each possible location on the tree (this may take some time, and the program reminds you to wait). Then it prints out a summary. For each possible location the program prints out the number of the node to the right of the place of insertion and the number of steps required in each case. These are divided into those that are better, tied, or worse than the current tree. Once this summary is printed out, the group that was removed is inserted into its original position. It is up to you to use the R command to actually carry out any the arrangements that have been tried.

U
("Undo"). This command reverses the effect of the most recent rearrangement, outgroup re-rooting, or flipping of branches. It returns to the previous tree topology. It will be of great use when rearranging the tree and when a rearrangement proves worse than the preceding one -- it permits you to abandon the new one and return to the previous one without remembering its topology in detail.

W
("Write"). This command writes out the current tree onto a tree output file. If the file already has been written to by this run of DOLMOVE, it will ask you whether you want to replace the contents of the file, add the tree to the end of the file, or not write out the tree to the file. The tree is written in the standard format used by PHYLIP (a subset of the Newick standard). It is in the proper format to serve as the User-Defined Tree for setting up the initial tree in a subsequent run of the program.

O
("Outgroup"). This asks for the number of a node which is to be the outgroup. The tree will be redisplayed with that node as the left descendant of the bottom fork. The number of steps required on the tree may change on re-rooting. Note that it is possible to use this to make a multi-species group the outgroup (i.e., you can give the number of an interior node of the tree as the outgroup, and the program will re-root the tree properly with that on the left of the bottom fork).

F
("Flip"). This asks for a node number and then flips the two branches at that, so that the left-right order of branches at that node is changed. This does not actually change the tree topology (or the number of steps on that tree) but it does change the appearance of the tree.

C
("Clade"). When the data consist of more than 12 species (or more than half the number of lines on the screen if this is not 24), it may be difficult to display the tree on one screen. In that case the tree will be squeezed down to one line per species. This is too small to see all the interior states of the tree. The C command instructs the program to print out only that part of the tree (the "clade") from a certain node on up. The program will prompt you for the number of this node. Remember that thereafter you are not looking at the whole tree. To go back to looking at the whole tree give the C command again and enter "0" for the node number when asked. Most users will not want to use this option unless forced to.

H
("Help"). Prints a one-screen summary of what the commands do, a few words for each command.

?
("huh?"). A synonym for H. Same as Help command.

X
("Exit"). Exit from program. If the current tree has not yet been saved into a file, the program will ask you whether it should be saved.

Q
("Quit"). A synonym for X. Same as the eXit command.

OUTPUT

If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest changes (according to the criterion in use). If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and many of these will be shown as "?". If the A option is not used, then the program will assume 0 as the ancestral state.

When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1.

When DOLMOVE calculates the number of characters compatible with the tree, it will take the F option into account and count the multistate characters as units, counting a character as compatible with the tree only when all of the binary characters corresponding to it are compatible with the tree.

ADAPTING THE PROGRAM TO YOUR COMPUTER AND TO YOUR TERMINAL

As we have seen, the initial menu of the program allows you to choose among three screen types (PC, ANSI, and none). If you want to avoid having to make this choice every time, you can change some of the constants in the file phylip.h to have the terminal type initialize itself in the proper way, and recompile. The constants that need attention are ANSICRT and IBMCRT. Currently these are both set to "false" on Macintosh and on Unix/Linux systems, and IBMCRT is set to "true" on Windows systems. If your system has an ANSI compatible terminal, you might want to find the definition of ANSICRT in phylip.h and set it to "true", and IBMCRT to "false".

MORE ABOUT THE PARSIMONY CRITERION

DOLMOVE uses as its numerical criterion the Dollo and polymorphism parsimony methods. The program defaults to carrying out Dollo parsimony.

The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary.

The assumptions of this method are in effect:

  1. We know which state is the ancestral one (state 0).
  2. The characters are evolving independently.
  3. Different lineages evolve independently.
  4. The probability of a forward change (0-->1) is small over the evolutionary times involved.
  5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change.
  6. Retention of polymorphism for both states (0 and 1) is highly improbable.
  7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment.

One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred.

The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained.

The assumptions of the polymorphism parsimony method are in effect:

  1. The ancestral state (state 0) is known in each character.
  2. The characters are evolving independently of each other.
  3. Different lineages are evolving independently.
  4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group.
  5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change.
  6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism.
  7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Below is a test data set, but we cannot show the output it generates because of the interactive nature of the program.


TEST DATA SET

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110
PHYLIPNEW-3.69.650/doc/distance.html0000664000175000017500000004034407712247475013537 00000000000000 distance

version 3.6

Distance matrix programs

© Copyright 1986-2000 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that:

  1. Each distance is measured independently from the others: no item of data contributes to more than one distance.
  2. The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation.

These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious.

THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b).

For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better.

Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST.

For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true).

FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time.

The input format for distance data is straightforward. The first line of the input file contains the number of species. There follows species data, starting, as with all other programs, with a species name. The species name is ten characters long, and must be padded out with blanks if shorter. For each species there then follows a set of distances to all the other species (options selected in the programs' menus allow the distance matrix to be upper or lower triangular or square). The distances can continue to a new line after any of them. If the matrix is lower-triangular, the diagonal entries (the distances from a species to itself) will not be read by the programs. If they are included anyway, they will be ignored by the programs, except for the case where one of them starts a new line, in which case the program will mistake it for a species name and get very confused.

For example, here is a sample input matrix, with a square matrix:

     5
Alpha      0.000 1.000 2.000 3.000 3.000
Beta       1.000 0.000 2.000 3.000 3.000
Gamma      2.000 2.000 0.000 3.000 3.000
Delta      3.000 3.000 0.000 0.000 1.000
Epsilon    3.000 3.000 3.000 1.000 0.000

and here is a sample lower-triangular input matrix with distances continuing to new lines as needed:

   14
Mouse     
Bovine      1.7043
Lemur       2.0235  1.1901
Tarsier     2.1378  1.3287  1.2905
Squir Monk  1.5232  1.2423  1.3199  1.7878
Jpn Macaq   1.8261  1.2508  1.3887  1.3137  1.0642
Rhesus Mac  1.9182  1.2536  1.4658  1.3788  1.1124  0.1022
Crab-E.Mac  2.0039  1.3066  1.4826  1.3826  0.9832  0.2061  0.2681
BarbMacaq   1.9431  1.2827  1.4502  1.4543  1.0629  0.3895  0.3930  0.3665
Gibbon      1.9663  1.3296  1.8708  1.6683  0.9228  0.8035  0.7109  0.8132
  0.7858
Orang       2.0593  1.2005  1.5356  1.6606  1.0681  0.7239  0.7290  0.7894
  0.7140  0.7095
Gorilla     1.6664  1.3460  1.4577  1.5935  0.9127  0.7278  0.7412  0.8763
  0.7966  0.5959  0.4604
Chimp       1.7320  1.3757  1.7803  1.7119  1.0635  0.7899  0.8742  0.8868
  0.8288  0.6213  0.5065  0.3502
Human       1.7101  1.3956  1.6661  1.7599  1.0557  0.6933  0.7118  0.7589
  0.8542  0.5612  0.4700  0.3097  0.2712

Note that the name "Mouse" in this matrix must be padded out by blanks to the full length of 10 characters.

In general the distances are assumed to all be present: at the moment there is only one way we can have missing entries in the distance matrix. If the S option (which allows the user to specify the degree of replication of each distance) is invoked, with some of the entries having degree of replication zero, if the U (User Tree) option is in effect, and if the tree being examined is such that every branch length can be estimated from the data, it will be possible to solve for the branch lengths and sum of squares when there is some missing data. You may not get away with this if the U option is not in effect, as a tree may be tried on which the program will calculate a branch length by dividing zero by zero, and get upset.

The present version of NEIGHBOR does allow the Subreplication option to be used and the number of replicates to be in the input file, but it actally does nothing with this information except read it in. It makes use of the average distances in the cells of the input data matrix. This means that you cannot use the S option to treat zero cells. We hope to modify NEIGHBOR in the future to allow Subreplication. Of course the U (User tree) option is not available in NEIGHBOR in any case.

The present versions of FITCH and KITSCH will do much better on missing values than did previous versions, but you will still have to be careful about them. Nevertheless you might (just) be able to explore relevant alternative tree topologies one at a time using the U option when there is missing data.

Alternatively, if the missing values in one cell always correspond to a cell with non-missing values on the opposite side of the main diagonal (i.e., if D(i,j) missing implies that D(j,i) is not missing), then use of the S option will always be sufficient to cope with missing values. When it is used, the missing distances should be entered as if present (any number can be used) and the degree of replication for them should be given as 0.

Note that the algorithm for searching among topologies in FITCH and KITSCH is the same one used in other programs, so that it is necessary to try different orders of species in the input data. The J (Jumble) menu option may be sufficient for most purposes.

The programs FITCH and KITSCH carry out the method of Fitch and Margoliash (1967) for fitting trees to distance matrices. They also are able to carry out the least squares method of Cavalli-Sforza and Edwards (1967), plus a variety of other methods of the same family (see the discussion of the P option below). They can also be set to use the Minimum Evolution method (Nei and Rzhetsky, 1993; Kidd and Sgaramella-Zonta, 1971).

The objective of these methods is to find that tree which minimizes

                      __  __
                      \   \    nij ( Dij  - dij)2  
  Sum of squares  =   /_  /_  ------------------
                       i   j       Dijp

(the symbol made up of \, / and _ characters is of course a summation sign) where D is the observed distance between species i and j and d is the expected distance, computed as the sum of the lengths (amounts of evolution) of the segments of the tree from species i to species j. The quantity n is the number of times each distance has been replicated. In simple cases this is taken to be one, but the user can, as an option, specify the degree of replication for each distance. The distance is then assumed to be a mean of those replicates. The power P is what distinguished the various methods. For the Fitch- Margoliash method, which is the default method with this program, P is 2.0. For the Cavalli-Sforza and Edwards least squares method it should be set to 0 (so that the denominator is always 1). An intermediate method is also available in which P is 1.0, and any other value of P, such as 4.0 or -2.3, can also be used. This generates a whole family of methods.

The P (Power) option is not available in the Neighbor-Joining program NEIGHBOR. Implicitly, in this program P is 0.0 (though it is hard to prove this). The UPGMA option of NEIGHBOR will assign the same branch lengths to the particular tree topology that it finds as will KITSCH when given the same tree and Power = 0.0.

All these methods make the assumptions of additivity and independent errors. The difference between the methods is how they weight departures of observed from expected. In effect, these methods differ in how they assume that the variance of measurement of a distance will rise as a function of the expected value of the distance.

These methods assume that the variance of the measurement error is proportional to the P-th power of the expectation (hence the standard deviation will be proportional to the P/2-th power of the expectation). If you have reason to think that the measurement error of a distance is the same for small distances as it is for large, then you should set P=0 and use the least squares method, but if you have reason to think that the relative (percentage) error is more nearly constant than the absolute error, you should use P=2, the Fitch-Margoliash method. In between, P=1 would be appropriate if the sizes of the errors were proportional to the square roots of the expected distance.

One question which arises frequently is what the units of branch length are in the resulting trees. In general, they are not time but units of distance. Thus if two species have a distance 0.3 between them, they will tend to be separated by branches whose total length is about 0.3. In the case of DNA distances, for example, the unit of branch length will be subsxtitutions per base. (In the case of protein distances, it will be amino acid substitutions per amino acid posiiton. tend to be sd

OPTIONS

Here are the options available in all three programs. They are selected using the menu of options.

U
the User tree option. The trees in FITCH are regarded as unrooted, and are specified with a trifurcation (three-way split) at their base: e. g.:

((A,B),C,(D,E));

while in KITSCH they are to be regarded as rooted and have a bifurcation at the base:

((A,B),(C,(D,E)));

Be careful not to move User trees from FITCH to KITSCH without changing their form appropriately (you can use RETREE to do this). User trees are not available in NEIGHBOR. In FITCH if you specify the branch lengths on one or more branches, you can select the L (use branch Lengths) option to avoid having those branches iterated, so that the tree is evaluated with their lengths fixed.

P
indicates that you are going to set the Power (P in the above formula). The default value is 2 (the Fitch-Margoliash method). The power, a real number such as 1.0, is prompted for by the programs. This option is not available in NEIGHBOR.

-
indicates that negative segment lengths are to be allowed in the tree (default is to require that all branch lengths be nonnegative). This option is not available in NEIGHBOR.

O
is the usual Outgroup option, available in FITCH and NEIGHBOR but not in KITSCH, nor when the UPGMA option of NEIGHBOR is used.

L
indicates that the distance matrix is input in Lower-triangular form (the lower-left half of the distance matrix only, without the zero diagonal elements).

R
indicates that the distance matrix is input in uppeR-triangular form (the upper-right half of the distance matrix only, without the zero diagonal elements).

S
is the Subreplication option. It informs the program that after each distance will be provided an integer indicating that the distance is a mean of that many replicates. There is no auxiliary information, but the presence of the S option indicates that the data will be in a different form. Each distance must be followed by an integer indicating the number of replicates, so that a line of data looks like this:

Delta      3.00 5  3.21 3  1.84 9

the 5, 3, and 9 being the number of times the measurement was replicated. When the number of replicates is zero, a distance value must still be provided, although its vale will not afect the result. This option is not available in NEIGHBOR.

G
is the usual Global branch-swapping option. It is available in FITCH and KITSCH but is not relevant to NEIGHBOR.

J
indicates the usual J (Jumble) option for entering species in a random order. In FITCH and KITSCH if you do multiple jumbles in one run the program will print out the best tree found overall.

M
is the usal Multiple data sets option, available in all of these programs. It allows us (when the output tree file is analyzed in CONSENSE) to do a bootstrap (or delete-half-jackknife) analysis with the distance matrix{ programs.

The numerical options are the usual ones and should be clear from the menu.

Note that when the options L or R are used one of the species, the first or last one, will have its name on an otherwise empty line. Even so, the name should be padded out to full length with blanks. Here is a sample lower- triangular data set.

     5
Alpha      
Beta       1.00
Gamma      3.00 3.00
Delta      3.00 3.00 2.00
Epsilon    3.00 3.00 2.00 1.00
<--- note: five blanks should follow the name "Alpha"



Be careful if you are using lower- or upper-triangular trees to make the corresponding selection from the menu (L or R), as the program may get horribly confused otherwise, but it still gives a result even though the result is then meaningless. With the menu option selected all should be well. PHYLIPNEW-3.69.650/doc/proml.html0000664000175000017500000010221707712247475013074 00000000000000 dnaml

version 3.6

ProML -- Protein Maximum Likelihood program

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the maximum likelihood method for protein amino acid sequences. It uses the either the Jones-Taylor-Thornton or the Dayhoff probability model of change between amino acids. The assumptions of these present models are:

  1. Each position in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. Each position undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  4. All relevant positions are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  5. The probabilities of change between amino acids are given by the model of Jones, Taylor, and Thornton (1992) or by the PAM model of Dayhoff (Dayhoff and Eck, 1968; Dayhoff et. al., 1979).

Note the assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those positions that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different amino acid positions. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of positions all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant positions. The program computes the the likelihood by summing it over all possible assignments of rates to positions, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a position having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive positions with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all positions to rate 2.4, or that fail to have consecutive positions that have the same rate.

The Hidden Markov Model framework for rate variation among positions was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant positions.

This feature effectively removes the artificial assumption that all positions have the same rate, and also means that we need not know in advance the identities of the positions that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each positions (for example, we might want amino acid positions in the active site of a protein to change more slowly than other positions. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of amino acid positions in the different categories. For example, we might specify that positions in the active site evolve at relative rates of 0.2 compared to 1.0 at other positions. If we are assuming that a particular position maintains a cysteine bridge to another, we may want to put it in a category of positions (including perhaps the initial position of the protein sequence which maintains methionine) which changes at a rate of 0.0.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a position is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

INPUT FORMAT AND OPTIONS

Subject to these assumptions, the program is a correct maximum likelihood method. The input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of amino acid positions.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter amino acid code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:

Amino acid sequence Maximum Likelihood method, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  P   JTT or PAM amino acid change model?  Jones-Taylor-Thornton model
  C                One category of sites?  Yes
  R           Rate variation among sites?  constant rate of change
  W                       Sites weighted?  No
  S        Speedier but rougher analysis?  Yes
  G                Global rearrangements?  No
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes
  5   Reconstruct hypothetical sequences?  No

  Y to accept these or type the letter for one to change

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options U, W, J, O, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The P option toggles between two models of amino acid change. One is the Jones-Taylor-Thornton model, the other the Dayhoff PAM matrix model. These are both based on Margaret Dayhoff's (Dayhoff and Eck, 1968; Dayhoff et. al., 1979) method of empirical tabulation of changes of amino acid sequences, and conversion of these to a probability model of amino acid change which is used to make a transition probability matrix which allows prediction of the probability of changing from any one amino acid to any other, and also predicts equilibrium amino acid composition.

The default method is that of Jones, Taylor, and Thornton (1992). This is similar to the Dayhoff PAM model, except that it is based on a recounting of the number of observed changes in amino acids, using a much larger sample of protein sequences than did Dayhoff. Because its sample is so much larger this model is to be preferred over the original Dayhoff PAM model. The Dayhoff model uses Dayhoff's PAM 001 matrix from Dayhoff et. al. (1979), page 348.

The R (Hidden Markov Model rates) option allows the user to approximate a Gamma distribution of rates among positions, or a Gamma distribution plus a class of invariant positions, or to specify how many categories of substitution rates there will be in a Hidden Markov Model of rate variation, and what are the rates and probabilities for each. By repeatedly selecting the R option one toggles among no rate variation, the Gamma, Gamma+I, and general HMM possibilities.

If you choose Gamma or Gamma+I the program will ask how many rate categories you want. If you have chosen Gamma+I, keep in mind that one rate category will be set aside for the invariant class and only the remaining ones used to approximate the Gamma distribution. For the approximation we do not use the quantile method of Yang (1995) but instead use a quadrature method using generalized Laguerre polynomials. This should give a good approximation to the Gamma distribution with as few as 5 or 6 categories.

In the Gamma and Gamma+I cases, the user will be asked to supply the coefficient of variation of the rate of substitution among positions. This is different from the parameters used by Nei and Jin (1990) but related to them: their parameter a is also known as "alpha", the shape parameter of the Gamma distribution. It is related to the coefficient of variation by

     CV = 1 / a1/2

or

     a = 1 / (CV)2

(their parameter b is absorbed here by the requirement that time is scaled so that the mean rate of evolution is 1 per unit time, which means that a = b). As we consider cases in which the rates are less variable we should set a larger and larger, as CV gets smaller and smaller.

If the user instead chooses the general Hidden Markov Model option, they are first asked how many HMM rate categories there will be (for the moment there is an upper limit of 9, which should not be restrictive). Then the program asks for the rates for each category. These rates are only meaningful relative to each other, so that rates 1.0, 2.0, and 2.4 have the exact same effect as rates 2.0, 4.0, and 4.8. Note that an HMM rate category can have rate of change 0, so that this allows us to take into account that there may be a category of amino acid positions that are invariant. Note that the run time of the program will be proportional to the number of HMM rate categories: twice as many categories means twice as long a run. Finally the program will ask for the probabilities of a random amino acid position falling into each of these regional rate categories. These probabilities must be nonnegative and sum to 1. Default for the program is one category, with rate 1.0 and probability 1.0 (actually the rate does not matter in that case).

If more than one HMM rate category is specified, then another option, A, becomes visible in the menu. This allows us to specify that we want to assume that positions that have the same HMM rate category are expected to be clustered so that there is autocorrelation of rates. The program asks for the value of the average patch length. This is an expected length of patches that have the same rate. If it is 1, the rates of successive positions will be independent. If it is, say, 10.25, then the chance of change to a new rate will be 1/10.25 after every position. However the "new rate" is randomly drawn from the mix of rates, and hence could even be the same. So the actual observed length of patches with the same rate will be a bit larger than 10.25. Note below that if you choose multiple patches, there will be an estimate in the output file as to which combination of rate categories contributed most to the likelihood.

Note that the autocorrelation scheme we use is somewhat different from Yang's (1995) autocorrelated Gamma distribution. I am unsure whether this difference is of any importance -- our scheme is chosen for the ease with which it can be implemented.

The C option allows user-defined rate categories. The user is prompted for the number of user-defined rates, and for the rates themselves, which cannot be negative but can be zero. These numbers, which must be nonnegative (some could be 0), are defined relative to each other, so that if rates for three categories are set to 1 : 3 : 2.5 this would have the same meaning as setting them to 2 : 6 : 5. The assignment of rates to amino acid positions is then made by reading a file whose default name is "categories". It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. Thus the categories file might look like this:

122231111122411155
1155333333444

With the current options R, A, and C the program has a good ability to infer different rates at different positions and estimate phylogenies under a more realistic model. Note that Likelihood Ratio Tests can be used to test whether one combination of rates is significantly better than another, provided one rate scheme represents a restriction of another with fewer parameters. The number of parameters needed for rate variation is the number of regional rate categories, plus the number of user-defined rate categories less 2, plus one if the regional rate categories have a nonzero autocorrelation.

The G (global search) option causes, after the last species is added to the tree, each possible group to be removed and re-added. This improves the result, since the position of every species is reconsidered. It approximately triples the run-time of the program.

The User tree (option U) is read from a file whose default name is intree. The trees can be multifurcating. They must be preceded in the file by a line giving the number of trees in the file.

If the U (user tree) option is chosen another option appears in the menu, the L option. If it is selected, it signals the program that it should take any branch lengths that are in the user tree and simply evaluate the likelihood of that tree, without further altering those branch lengths. This means that if some branches have lengths and others do not, the program will estimate the lengths of those that do not have lengths given in the user tree. Note that the program RETREE can be used to add and remove lengths from a tree.

The U option can read a multifurcating tree. This allows us to test the hypothesis that a certain branch has zero length (we can also do this by using RETREE to set the length of that branch to 0.0 when it is present in the tree). By doing a series of runs with different specified lengths for a branch we can plot a likelihood curve for its branch length while allowing all other branches to adjust their lengths to it. If all branches have lengths specified, none of them will be iterated. This is useful to allow a tree produced by another method to have its likelihood evaluated. The L option has no effect and does not appear in the menu if the U option is not used.

The W (Weights) option is invoked in the usual way, with only weights 0 and 1 allowed. It selects a set of positions to be analyzed, ignoring the others. The positions selected are those with weight 1. If the W option is not invoked, all positions are analyzed. The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file.

The M (multiple data sets) option will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets from the input file. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights. Note also that when we use multiple weights for bootstrapping we can also then maintain different rate categories for different positions in a meaningful way. You should not use the multiple data sets option without using multiple weights, you should not at the same time use the user-defined rate categories option (option C).

The algorithm used for searching among trees uses a technique invented by David Swofford and J. S. Rogers. This involves not iterating most branch lengths on most trees when searching among tree topologies, This is of necessity a "quick-and-dirty" search but it saves much time. There is a menu option (option S) which can turn off this search and revert to the earlier search method which iterated branch lengths in all topologies. This will be substantially slower but will also be a bit more likely to find the tree topology of highest likelihood. If the Swofford/Rogers search finds the best tree topology, the branch lengths inferred will be almost precisely the same as they would be with the more thorough search, as the maximization of likelihood with respect to branch lengths for the final tree is not different in the two kinds of search.

OUTPUT FORMAT

The output starts by giving the number of species and the number of amino acid positions.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of positions is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them, with the sequences printed in groups of ten amino acids. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. Note that the trees printed out have a trifurcation at the base. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. The unit of branch length is the expected fraction of amino acids changed (so that 1.0 is 100 PAMs).

A table is printed showing the length of each tree segment (in units of expected amino acid substitutions per position), as well as (very) rough confidence limits on their lengths. If a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow.

In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant. YOU ARE WARNED AGAINST TAKING IT TOO SERIOUSLY. If you want to get a better likelihood curve for a branch length you can do multiple runs with different prespecified lengths for that branch, as discussed above in the discussion of the L option.

One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that).

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the relative rate of change in the active site and in the rest of the protein to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different amino acid positions, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across amino acid positions. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across amino acid positions are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring positions (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of expected numbers of amino acid substitutions, scaled so that the average rate of change, averaged over all the positions analyzed, is set to 1.0. if there are multiple categories of positions. This means that whether or not there are multiple categories of positions, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same position and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the amino acid sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what amino acid position categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each position which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). The symbol printed out is for the amino acid which accounts for the largest fraction of the likelihood at that position. In that table, if a position has an amino acid which accounts for more than 95% of the likelihood, its symbol printed in capital letters (W rather than w). One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed amino acids are based on only the single assignment of rates to positions which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

PROGRAM CONSTANTS

The constants defined at the beginning of the program include "maxtrees", the maximum number of user trees that can be processed. It is small (100) at present to save some further memory but the cost of increasing it is not very great. Other constants include "maxcategories", the maximum number of position categories, "namelength", the length of species names in characters, and three others, "smoothings", "iterations", and "epsilon", that help "tune" the algorithm and define the compromise between execution speed and the quality of the branch lengths found by iteratively maximizing the likelihood. Reducing iterations and smoothings, and increasing epsilon, will result in faster execution but a worse result. These values will not usually have to be changed.

The program spends most of its time doing real arithmetic. The algorithm, with separate and independent computations occurring for each pattern, lends itself readily to parallel processing.

PAST AND FUTURE OF THE PROGRAM

This program is derived in version 3.6 by Lucas Mix from DNAML, with which it shares many of its data structures and much of its strategy.


TEST DATA SET

(Note that although these may look like DNA sequences, they are being treated as protein sequences consisting entirely of alanine, cystine, glycine, and threonine).

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC


CONTENTS OF OUTPUT FILE (with all numerical options on)

(It was run with HMM rates having gamma-distributed rates approximated by 5 rate categories, with coefficient of variation of rates 1.0, and with patch length parameter = 1.5. Two user-defined rate categories were used, one for the first 6 positions, the other for the last 7, with rates 1.0 : 2.0. Weights were used, with sites 1 and 13 given weight 0, and all others weight 1.)


Amino acid sequence Maximum Likelihood method, version 3.6a3

 5 species,  13  sites

    Site categories are:

             1111112222 222


    Sites are weighted as follows:

             0111111111 111

Jones-Taylor-Thornton model of amino acid change


Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         ..G..C.... ..C
Gamma        C.TT.C.T.. C.A
Delta        GGTA.TT.GG CC.
Epsilon      GGGA.CT.GG CCC



Discrete approximation to gamma distributed rates
 Coefficient of variation of rates = 1.000000  (alpha = 1.000000)

States in HMM   Rate of change    Probability

        1           0.264            0.522
        2           1.413            0.399
        3           3.596            0.076
        4           7.086            0.0036
        5          12.641            0.000023



Site category   Rate of change

        1           1.000
        2           2.000



  +Beta      
  |  
  |                                       +Epsilon   
  |         +-----------------------------3  
  1---------2                             +-------------------Delta     
  |         |  
  |         +--------------------------Gamma     
  |  
  +-----------------Alpha     


remember: this is an unrooted tree!

Ln Likelihood =  -121.49044

 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------

     1          Alpha            60.18362     (     zero,   135.65380) **
     1          Beta              0.00010     (     zero,    infinity)
     1             2             32.56292     (     zero,    96.08019) *
     2             3            141.85557     (     zero,   304.10906) **
     3          Epsilon           0.00010     (     zero,    infinity)
     3          Delta            68.68682     (     zero,   151.95402) **
     2          Gamma            89.79037     (     zero,   198.93830) **

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01

Combination of categories that contributes the most to the likelihood:

             1122121111 112

Most probable category at each site if > 0.95 probability ("." otherwise)

             ....1..... ...

Probable sequences at interior nodes:

  node       Reconstructed sequence (caps if > 0.95)

    1        .AGGTCGCCA AAC
 Beta        AAGGTCGCCA AAC
    2        .AggTCGCCA CAC
    3        .GGATCTCGG CCC
 Epsilon     GGGATCTCGG CCC
 Delta       GGTATTTCGG CCT
 Gamma       CATTTCGTCA CAA
 Alpha       AACGTGGCCA AAT

PHYLIPNEW-3.69.650/doc/fitch.html0000664000175000017500000002257607712247475013051 00000000000000 fitch

version 3.6

FITCH -- Fitch-Margoliash and Least-Squares Distance Methods

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program carries out Fitch-Margoliash, Least Squares, and a number of similar methods as described in the documentation file for distance methods.

The options for FITCH are selected through the menu, which looks like this:


Fitch-Margoliash method version 3.6a3

Settings for this run:
  D      Method (F-M, Minimum Evolution)?  Fitch-Margoliash
  U                 Search for best tree?  Yes
  P                                Power?  2.00000
  -      Negative branch lengths allowed?  No
  O                        Outgroup root?  No, use as outgroup species  1
  L         Lower-triangular data matrix?  No
  R         Upper-triangular data matrix?  No
  S                        Subreplicates?  No
  G                Global rearrangements?  No
  J     Randomize input order of species?  No. Use input order
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

Most of the input options (U, P, -, O, L, R, S, J, and M) are as given in the documentation page for distance matrix programs, and their input format is the same as given there. The U (User Tree) option has one additional feature when the N (Lengths) option is used. This menu option will appear only if the U (User Tree) option is selected. If N (Lengths) is set to "Yes" then if any branch in the user tree has a branch length, that branch will not have its length iterated. Thus you can prevent all branches from having their lengths changed by giving them all lengths in the user tree, or hold only one length unchanged by giving only that branch a length (such as, for example, 0.00). You may find program RETREE useful for adding and removing branch lengths from a tree. This option can also be used to compute the Average Percent Standard Deviation for a tree obtained from NEIGHBOR, for comparison with trees obtained by FITCH or KITSCH.

The D (methods) option allows choice between the Fitch-Margoliash criterion and the Minimum Evolution method (Kidd and Sgaramella-Zonta, 1971; Rzhetsky and Nei, 1993). Minimum Evolution (not to be confused with parsimony) uses the Fitch-Margoliash criterion to fit branch lengths to each topology, but then chooses topologies based on their total branch length (rather than the goodness of fit sum of squares). There is no constraint on negative branch lengths in the Minimum Evolution method; it sometimes gives rather strange results, as it can like solutions that have large negative branch lengths, as these reduce the total sum of branch lengths!

Another input option available in FITCH that is not available in KITSCH or NEIGHBOR is the G (Global) option. G is the Global search option. This causes, after the last species is added to the tree, each possible group to be removed and re-added. This improves the result, since the position of every species is reconsidered. It approximately triples the run-time of the program. It is not an option in KITSCH because it is the default and is always in force there. The O (Outgroup) option is described in the main documentation file of this package. The O option has no effect if the tree is a user-defined tree (if the U option is in effect). The U (User Tree) option requires an unrooted tree; that is, it require that the tree have a trifurcation at its base:

     ((A,B),C,(D,E));

The output consists of an unrooted tree and the lengths of the interior segments. The sum of squares is printed out, and if P = 2.0 Fitch and Margoliash's "average percent standard deviation" is also computed and printed out. This is the sum of squares, divided by N-2, and then square-rooted and then multiplied by 100 (n is the number of species on the tree):

     APSD = ( SSQ / (N-2) )1/2 x 100.

where N is the total number of off-diagonal distance measurements that are in the (square) distance matrix. If the S (subreplication) option is in force it is instead the sum of the numbers of replicates in all the non-diagonal cells of the distance matrix. But if the L or R option is also in effect, so that the distance matrix read in is lower- or upper-triangular, then the sum of replicates is only over those cells actually read in. If S is not in force, the number of replicates in each cell is assumed to be 1, so that N is n(n-1), where n is the number of species. The APSD gives an indication of the average percentage error. The number of trees examined is also printed out.

The constants available for modification at the beginning of the program are: "smoothings", which gives the number of passes through the algorithm which adjusts the lengths of the segments of the tree so as to minimize the sum of squares, "delta", which controls the size of improvement in sum of squares that is used to control the number of iterations improving branch lengths, and "epsilonf", which defines a small quantity needed in some of the calculations. There is no feature saving multiply trees tied for best, partly because we do not expect exact ties except in cases where the branch lengths make the nature of the tie obvious, as when a branch is of zero length.

The algorithm can be slow. As the number of species rises, so does the number of distances from each species to the others. The speed of this algorithm will thus rise as the fourth power of the number of species, rather than as the third power as do most of the others. Hence it is expected to get very slow as the number of species is made larger.


TEST DATA SET

    7
Bovine      0.0000  1.6866  1.7198  1.6606  1.5243  1.6043  1.5905
Mouse       1.6866  0.0000  1.5232  1.4841  1.4465  1.4389  1.4629
Gibbon      1.7198  1.5232  0.0000  0.7115  0.5958  0.6179  0.5583
Orang       1.6606  1.4841  0.7115  0.0000  0.4631  0.5061  0.4710
Gorilla     1.5243  1.4465  0.5958  0.4631  0.0000  0.3484  0.3083
Chimp       1.6043  1.4389  0.6179  0.5061  0.3484  0.0000  0.2692
Human       1.5905  1.4629  0.5583  0.4710  0.3083  0.2692  0.0000


OUTPUT FROM TEST DATA SET (with all numerical options on)


   7 Populations

Fitch-Margoliash method version 3.6a3

                  __ __             2
                  \  \   (Obs - Exp)
Sum of squares =  /_ /_  ------------
                                2
                   i  j      Obs

Negative branch lengths not allowed


Name                       Distances
----                       ---------

Bovine        0.00000   1.68660   1.71980   1.66060   1.52430   1.60430
              1.59050
Mouse         1.68660   0.00000   1.52320   1.48410   1.44650   1.43890
              1.46290
Gibbon        1.71980   1.52320   0.00000   0.71150   0.59580   0.61790
              0.55830
Orang         1.66060   1.48410   0.71150   0.00000   0.46310   0.50610
              0.47100
Gorilla       1.52430   1.44650   0.59580   0.46310   0.00000   0.34840
              0.30830
Chimp         1.60430   1.43890   0.61790   0.50610   0.34840   0.00000
              0.26920
Human         1.59050   1.46290   0.55830   0.47100   0.30830   0.26920
              0.00000


  +---------------------------------------------Mouse     
  ! 
  !                                +------Human     
  !                             +--5 
  !                           +-4  +--------Chimp     
  !                           ! ! 
  !                        +--3 +---------Gorilla   
  !                        !  ! 
  1------------------------2  +-----------------Orang     
  !                        ! 
  !                        +---------------------Gibbon    
  ! 
  +------------------------------------------------------Bovine    


remember: this is an unrooted tree!

Sum of squares =     0.01375

Average percent standard deviation =     1.85418

Between        And            Length
-------        ---            ------
   1          Mouse             0.76985
   1             2              0.41983
   2             3              0.04986
   3             4              0.02121
   4             5              0.03695
   5          Human             0.11449
   5          Chimp             0.15471
   4          Gorilla           0.15680
   3          Orang             0.29209
   2          Gibbon            0.35537
   1          Bovine            0.91675


PHYLIPNEW-3.69.650/doc/discrete.html0000664000175000017500000005015107712247475013544 00000000000000 discrete

version 3.6

DOCUMENTATION FOR (0,1) DISCRETE CHARACTER PROGRAMS

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

COMPARISON OF METHODS

The methods used in these programs make different assumptions about evolutionary rates, probabilities of different kinds of events, and our knowledge about the characters or about the character state trees. Basic references on these assumptions are my 1979, 1981b and 1983b papers, particularly the latter. The assumptions of each method are briefly described in the documentation file for the corresponding program. In most cases my assertions about what are the assumptions of these methods are challenged by others, whose papers I also cite at that point. Personally, I believe that they are wrong and I am right. I must emphasize the importance of understanding the assumptions underlying the methods you are using. No matter how fancy the algorithms, how maximum the likelihood or how minimum the number of steps, your results can only be as good as the correspondence between biological reality and your assumptions!

INPUT FORMAT

The input format is as described in the general documentation file. The input starts with a line containing the number of species and the number of characters.

In PARS, each character can have up to 8 states plus a "?" state. In any character, the first 8 symbols encountered will be taken to represent these states. Any of the digits 0-9, letters A-Z and a-z, and even symbols such as + and -, can be used (and in fact which 8 symbols are used can be different in different characters).

In the other discrete characters programs the allowable states are, 0, 1, P, B, and ?. Blanks may be included between the states (i. e. you can have a species whose data is DISCOGLOSS0 1 1 0 1 1 1). It is possible for extraneous information to follow the end of the character state data on the same line. For example, if there were 7 characters in the data set, a line of species data could read "DISCOGLOSS0110111 Hello there").

The discrete character data can continue to a new line whenever needed. The characters are not in the "aligned" or "interleaved" format used by the molecular sequence programs: they have the name and entire set of characters for one species, then the name and entire set of characters for the next one, and so on. This is known as the sequential format. Be particularly careful when you use restriction sites data, which can be in either the aligned or the sequential format for use in RESTML but must be in the sequential format for these discrete character programs.

For PARS the discrete character data can be in either Sequential or Interleaved format; the latter is the default.

Errors in the input data will often be detected by the programs, and this will cause them to issue an error message such as 'BAD OUTGROUP NUMBER: ' together with information as to which species, character, or in this case outgroup number is the incorrect one. The program will them terminate; you will have to look at the data and figure out what went wrong and fix it. Often an error in the data causes a lack of synchronization between what is in the data file and what the program thinks is to be there. Thus a missing character may cause the program to read part of the next species name as a character and complain about its value. In this type of case you should look for the error earlier in the data file than the point about which the program is complaining.

OPTIONS GENERALLY AVAILABLE

Specific information on options will be given in the documentation file associated with each program. However, some options occur in many programs. Options are selected from the menu in each program, but the Old Style programs CLIQUE and FACTOR require information to be put into the beginning of the input file (Particularly the Ancestors, Factors, Weights, and Mixtures options). The options information described here is for the other programs. See the documentation page for CLIQUE and FACTOR to find out how they get their options information.

  • The A (Ancestral states) option. This indicates that we are specifying the ancestral states for each character. In the menu the ancestors (A) option must be selected. An ancestral states input file is read, whose default name is ancestors. It contains a line or lines giving the ancestral states for each character. These may be 0, 1 or ?, the latter indicating that the ancestral state is unknown.

    An example is:

    001??11

    The ancestor information can be continued to a new line and can have blanks between any of the characters in the same way that species character data can. In the program CLIQUE the ancestor is instead to be included as a regular species and no A option is available.

  • The F (Factors) option. This is used in programs MOVE, DOLMOVE, and FACTOR. It specifies which binary characters correspond to which multistate characters. To use the F option you choose the F option in the program menu. After that the program will read a factors file (default name factors Which consists of a line or lines containing a symbol for each binary character. The symbol can be anything, provided that it is the same for binary characters that correspond to the same multistate character, and changes between multistate characters. A good practice is to make it the lower-order digit of the number of the multistate character.

    For example, if there were 20 binary characters that had been generated by nine multistate characters having respectively 4, 3, 3, 2, 1, 2, 2, 2, and 1 binary factors you would make the factors file be:

    11112223334456677889

    although it could equivalently be:

    aaaabbbaaabbabbaabba

    All that is important is that the symbol for each binary character change only when adjacent binary characters correspond to different mutlistate characters. The factors file contents can continue to a new line at any time except during the initial characters filling out the length of a species name.

    In programs CLIQUE and FACTOR the factors information is given in the Old Style system of putting that information into the input data file. The method for doing so is described in the documentation files for these programs. We hope to change this in the next release to use an input factors file.

  • The J (Jumble) option. This causes the species to be entered into the tree in a random order rather than in their order in the input file. The program prompts you for a random number seed. This option is described in the main documentation file.

  • The M (Multiple data sets) option. This has also been described in the main documentation file. It is not to be confused with the M option specified in the input file, which is the Mixture of methods option (yes, I know this is confusing).

  • The O (outgroup) option. This has also already been discussed in the general documentation file. It specifies the number of the particular species which will be used as the outgroup in rerooting the final tree when it is printed out. It will not have any effect if the tree is already rooted or is a user-defined tree. This option is not available in DOLLOP, DOLMOVE, or DOLPENNY, which always infer a rooted tree, or CLIQUE, which requires you to work out the rerooting by hand. The menu selection will cause you to be prompted for the number of the outgroup.

  • The T (threshold) option. This sets a threshold such that if the number of steps counted in a character is higher than the threshold, it will be taken to be the threshold value rather than the actual number of steps. This option has already been described in the main documentation file. The user is prompted for the threshold value. My 1981 paper (Felsenstein, 1981b) explains the logic behind the Threshold option, which is an attarctive alternative to successive weighting of characters.

  • The U (User tree) option. This has already been described in the main documentation file. For all of these programs user trees are to be specified as bifurcating trees, even in the cases where the tree that is inferred by the programs is to be regarded as unrooted.

  • The W (Weights) option. This allows us to specify weights on the characters, including the possibility of omitting characters from the analysis. It has already been described in the main documentation file. If the Weights option is used there must be a W on the first line of the input file.

  • The X (miXture) option. In the programs MIX, MOVE, and PENNY the user can specify for each character which parsimony method is in effect. This is done by selecting menu option X (not M) and having an input mixture file, whose default name is mixture. It contains a line or lines with and one letter for each character. These letters are C or S if the character is to be reconstructed according to Camin-Sokal parsimony, W or ? if the character is to be reconstructed according to Wagner parsimony. So if there are 20 characters the line giving the mixture might look like this:

    WWWCC WWCWC
    

    Note that blanks in the seqence of characters (after the first ones that are as long as the species names) will be ignored, and the information can go on to a new line at any point. So this could equally well have been specified by

    WW
    CCCWWCWC
    

30! 1 2 1 1 1 2 1 3 1 1 40! 1

The numbers across the top and down the side indicate which character is being referred to. Thus character 23 is column "3" of row "20" and has 2 steps in this case.

I cannot emphasize too strongly that just because the tree diagram which the program prints out contains a particular branch DOES NOT MEAN THAT WE HAVE EVIDENCE THAT THE BRANCH IS OF NONZERO LENGTH. In program PARS the branches have lengths estimated and there can be trifurcations, but in all other discrete characters programs the procedure which prints out the tree cannot cope with a trifurcation, nor can the internal data structures used in my programs. Therefore, even when we have no resolution and a multifurcation, successive bifurcations will be printed out, although some of the branches shown will in fact actually be of zero length. To find out which, you will have to work out character by character where the placements of the changes on the tree are, under all possible ways that the changes can be placed on that tree.

In PARS the trees are truly multifurcating, and the search is over both bifurcating and multifurcating trees. A branch is retained in a tree only if there is at least one character, under at least one possible most parsimonious reconstruction of the placement of changes, that has a change in that branch. This means that two branches can both be present which are, however, not both in existence at the same time (in that there is no most parsimonious reconstruction of changes n the characters that has changes in both these branches at the same time).

In PARS, MIX, PENNY, DOLLOP, and DOLPENNY the trees will be (if the user selects the option to see them) accompanied by tables showing the reconstructed states of the characters in the hypothetical ancestral nodes in the interior of the tree. This will enable you to reconstruct where the changes were in each of the characters. In some cases the state shown in an interior node will be "?", which means that either 0 or 1 would be possible at that point. In such cases you have to work out the ambiguity by hand. A unique assignment of locations of changes is often not possible in the case of the Wagner parsimony method. There may be multiple ways of assigning changes to segments of the tree with that method. Printing only one would be misleading, as it might imply that certain segments of the tree had no change, when another equally valid assignment would put changes there. It must be emphasized that all these multiple assignments have exactly equal numbers of total changes, so that none is preferred over any other.

I have followed the convention of having a "." printed out in the table of character states of the hypothetical ancestral nodes whenever a state is 0 or 1 and its immediate ancestor is the same. This has the effect of highlighting the places where changes might have occurred and making it easy for the user to reconstruct all the alternative patterns of the characters states in the hypothetical ancestral nodes. In PARS you can, using the menu, turn off this dot-differencing convention and see all states at all hypothetical ancestral nodes of the tree.

On the line in that table corresponding to each branch of the tree will also be printed "yes", "no" or "maybe" as an answer to the question of whether this branch is of nonzero length. If there is no evidence that any character has changed in that branch, then "no" will be printed. If there is definite evidence that one has changed, then "yes" will be printed. If the matter is ambiguous, then "maybe" will be printed. You should keep in mind that all of these conclusions assume that we are only interested in the assignment of states that requires the least amount of change. In reality, the confidence limit on tree topology usually includes many different topologies, and presumably also then the confidence limits on amounts of change in branches are also very broad.

In addition to the table showing numbers of events, a table may be printed out showing which ancestral state causes the fewest events for each occurred and making it easy for the user to reconstruct all the alternative patterns of the characters states in the hypothetical ancestral nodes. In PARS you can, using the menu, turn off this dot-differencing convention and see all states at all hypothetical ancestral nodes of the tree.

On the line in that table corresponding to each branch of the tree will also be printed "yes", "no" or "maybe" as an answer to the question of whether this branch is of nonzero length. If there is no evidence that any character has changed in that branch, then "no" will be printed. If there is definite evidence that one has changed, then "yes" will be printed. If the matter is ambiguous, then "maybe" will be printed. You should keep in mind that all of these conclusions assume that we are only interested in the assignment of states that requires the least amount of change. In reality, the confidence limit on tree topology usually includes many different topologies, and presumably also then the confidence limits on amounts of change in branches are also very broad.

In addition to the table showing numbers of events, a table may be printed out showing which ancestral state causes the fewest events for each character. This will not always be done, but only when the tree is rooted and some ancestral states are unknown. This can be used to infer states of ancestors. For example, if you use the O (Outgroup) and A (Ancestral states) options together, with at least some of the ancestral states being given as "?", then inferences will be made for those characters, as the outgroup makes the tree rooted if it was not already.

In programs MIX and PENNY, if you are using the Camin-Sokal parsimony option with ancestral state "?" and it turns out that the program cannot decide between ancestral states 0 and 1, it will fail to even attempt reconstruction of states of the hypothetical ancestors, printing them all out as "." for those characters. This is done for internal bookkeeping reasons -- to reconstruct their changes would require a fair amount of additional code and additional data structures. It is not too hard to reconstruct the internal states by hand, trying the two possible ancestral states one after the other. A similar comment applies to the use of ancestral state "?" in the Dollo or Polymorphism parsimony methods (programs DOLLOP and DOLPENNY) which also can result in a similar hesitancy to print the estimate of the states of the hypothetical ancestors. In all of these cases the program will print "?" rather than "no" when it describes whether there are any changes in a branch, since there might or might not be changes in those characters which are not reconstructed.

For further information see the documentation files for the individual programs. PHYLIPNEW-3.69.650/doc/treedist.html0000664000175000017500000002746607712247476013603 00000000000000 treedist

version 3.6

TREEDIST -- distances between trees

© Copyright 2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program computes distances between trees. The distance that is computed is the Symmetric Distance of Robinson and Foulds (1981). This does not use branch length information, only the tree topologies. It must also be borne in mind that the distance does not have any immediate statistical interpretation -- we cannot say whether a larger distance is significantly larger than a smaller one.

The Symmetric Distance is computed by considering each of the branches of the two trees. Each branch divides the set of species into two groups -- the ones connected to one end of the branch and the ones connected to the other. This makes a partition of the full set of species. (in Newick notation)

  ((A,C),(D,(B,E))) 
has two internal branches. One induces the partition {A, C  |  B, D, E} and the other induces the partition {A, C, D  |  B, E}. A different tree with the same set of species,
  (((A,D),C),(B,E))) 
has internal branches that correspond to the two partitions {A, C, D  |  B, E} and {A, D  |  B, C, E}. Note that the other branches, all of which are external branches, induce partitions that separate one species from all the others. Thus there are 5 partitions like this: {C  |  A, B, D, E} on each of these trees. These are always present on all trees, provided that each tree has each species at the end of its own branch.

The Symmetric Distance is simply a count of how many partitions there are, among the two trees, that are on one tree and not on the other. In the example above there are two partitions, {A, C  |  B, D, E} and {A, D  |  B, C, E}, each of which is present on only one of the two trees. The Symmetric Distance between the two trees is therefore 2. When the two trees are fully resolved bifurcating trees, their symmetric distance must be an even number; it can range from 0 to twice the number of internal branches, which for n species is 4n-6.

We have assumed that nothing is lost if the trees are treated as unrooted trees. It is easy to define a counterpart to the Symmetric Distance for rooted trees. each branch then defines a set of species, namely the clade defined by that branch. Thus if the first of the two trees above were considered as a rooted tree it would define the three clades {A, C}, {B, D, E}, and {B, E}. The symmetric distance between two rooted trees is simply the count of the number of clades that are defined by one but not by the other. For the second tree the clades would be {A, D}, {B, C, E}, and {B, E}. The Symmetric Distance between thee two rooted trees would then be 4.

Although the examples we have discussed have involved fully bifurcating trees, the input trees can have multifurcations. This can lead to distances that are odd numbers.

INPUT AND OPTIONS

The program reads one or two input tree files. If there is one input tree file, its default name is intree. If there are two their default names are intree and intree2. The tree files may either have the number of trees on their first line, or not. If the number of trees is given, it is actually ignored and all trees in the tree file are considered, even if there are more trees than indicated by the number. (This is a bug and it will be fixed in the future).

The options are selected from a menu, which looks like this:


Tree distance program, version 3.6a3

Settings for this run:
 O                         Outgroup root:  No, use as outgroup species  1
 R         Trees to be treated as Rooted:  No
 T    Terminal type (IBM PC, ANSI, none):  (none)
 1  Print indications of progress of run:  Yes
 2                 Tree distance submenu:  Distance between adjacent pairs

Are these settings correct? (type Y or the letter for one to change)

The O option allows you to root the trees using an outgroup. It is specified by giving its number, where the species are numbered in the order they appear in the first tree. Outgroup-rooting all the trees does not affect the unrooted Symmetric Distance, and if it is done and trees are treated as rooted, the distances turn out to be the same as the unrooted ones. Thus it is unlikely that you will find this option of interest.

The R option controls whether the Summetric Distance that is computed is to treat the trees as unrooted or rooted. Unrooted is the default.

The terminal type (0) and progress (1) options do not need description here.

Option 2 controls how many tree files are read in, which trees are to be compared, and how the output is to be presented. It causes another menu to appear:

Tree Pairing Submenu:
 A     Distances between adjacent pairs in tree file.
 P     Distances between all possible pairs in tree file.
 C     Distances between corresponding pairs in one tree file and another.
 L     Distances between all pairs in one tree file and another.

Option A computes the distances between successive pairs of trees in the tree input file -- between trees 1 and 2, trees 3 and 4, trees 5 and 6, and so on. If there are an odd number of trees in the input tree file the last tree will be ignored and a warning message printed to remind the user that nothing was done with it.

Option P computes distances between all pairs of trees in the input tree file. Thus with 10 trees 10 x 10 = 100 distances will be computed, including distances between each tree and itself.

Option C takes input from two tree files and cmputes distances between corresponding members of the two tree files. Thus distances will be computed between tree 1 of the first tree file and tree 1 of the second one, between tree 2 of the first file and tree 2 of the second one, and so on. If the number of trees in the two files differs, the extra trees in the file that has more of them are ignored and a warning is printed out.

Option L computes distances between all pairs of trees, where one tree is taken from one tree file and the other from the other tree file. Thus if the first tree file has 7 trees and the second has 5 trees, 7 x 5 = 35 different distances will be computed. Note -- this option seems not to work at the moment. We hope to fix this soon.

If option 2 is not selected, the program defaults to looking at one tree file and computing distances of adjacent pairs (so that option A is the default).

OUTPUT

The results of the analysis are written onto an output file whose default file name is outfile.

If any of the four types of analysis are selected, the program asks the user how they want the results presented. Here is that menu for options P or L:


Distances output options:
 F     Full matrix.
 V     One pair per line, verbose.
 S     One pair per line, sparse.

 Choose one: (F,V,S)

The Full matrix (choice F) is a table showing all distances. It is written onto the output file. The table is presented as groups of 10 columns. Here is the Full matrix for the 12 trees in the input tree file which is given as an example at the end of this page.


Tree distance program, version 3.6

Symmetric differences between all pairs of trees in tree file:

          1     2     3     4     5     6     7     8     9    10    
      \------------------------------------------------------------
    1 |   0     4     2    10    10    10    10    10    10    10  
    2 |   4     0     2    10     8    10     8    10     8    10  
    3 |   2     2     0    10    10    10    10    10    10    10  
    4 |  10    10    10     0     2     2     4     2     4     0  
    5 |  10     8    10     2     0     4     2     4     2     2  
    6 |  10    10    10     2     4     0     2     2     4     2  
    7 |  10     8    10     4     2     2     0     4     2     4  
    8 |  10    10    10     2     4     2     4     0     2     2  
    9 |  10     8    10     4     2     4     2     2     0     4  
   10 |  10    10    10     0     2     2     4     2     4     0  
   11 |   2     2     0    10    10    10    10    10    10    10  
   12 |  10    10    10     2     4     2     4     0     2     2  


         11    12    
      \------------
    1 |   2    10  
    2 |   2    10  
    3 |   0    10  
    4 |  10     2  
    5 |  10     4  
    6 |  10     2  
    7 |  10     4  
    8 |  10     0  
    9 |  10     2  
   10 |  10     2  
   11 |   0    10  
   12 |  10     0  


The Full matrix is only available for analyses P and L (not for A or C).

Option V (Verbose) writes one distance per line. The Verbose output is the default. Here it is for the example data set given below:


Tree distance program, version 3.6a3

Symmetric differences between adjacent pairs of trees:

Trees 1 and 2:    4
Trees 3 and 4:    10
Trees 5 and 6:    4
Trees 7 and 8:    4
Trees 9 and 10:    4
Trees 11 and 12:    10

Option S (Sparse or terse) is similar except that all that is given on each line are the numbers of the two trees and the distance, separated by blanks. This may be a convenient format if you want to write a program to read these numbers in, and you want to spare yourself the effort of having the program wade through the words on each line in the Verbose output. The first four lines of the Sparse output are titles that your program would want to skip past. Here is the Sparse output for the example trees.


Tree distance program, version 3.6

Symmetric differences between adjacent pairs of trees:

1 2 4
3 4 10
5 6 4
7 8 4
9 10 4
11 12 10

CREDITS AND FUTURE

TREEDIST was written by Dan Fineman. In the future we hope to expand it to consider a distance based on branch lengths as well as tree topologies. The Branch Score distance defined by Kuhner and Felsenstein (1994) is the one we have in mind (the Branch Score defined by them is actually the square of the distance). We also hope to compute a distance based on quartets shared and not shared by trees (implicit in the work of Estabrook, McMorris, and Meacham, 1985).


TEST DATA SET

(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));
(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));
(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));
(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));
(A,(B,(E,(G,((F,I),(((J,H),D),C))))));
(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));
(A,(B,(E,((F,I),(G,(((J,H),D),C))))));
(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));
(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));
(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));
(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));
(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));

The output from default settings for this test set is given above (it is the Verbose output example). PHYLIPNEW-3.69.650/doc/factor.html0000664000175000017500000003112407712247475013217 00000000000000 factor

version 3.6

FACTOR - Program to factor multistate characters.

© Copyright 1986-2002 by The University of Washington. Written by Christopher Meacham and Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

Note: Factor is an Old Style program. This means that it takes some of its options information, notably the Ancestral states and Factors options from the input file rather than from separate files of their own as the New Style programs in this version of PHYLIP do.

Programmed by C. Meacham, Botany, Univ. of Georgia, Athens, Georgia .ce (current address: University of California, Berkeley, California 94720) .ce additional code and documentation by Joe Felsenstein

This program factors a data set that contains multistate characters, creating a data set consisting entirely of binary (0,1) characters that, in turn, can be used as input to any of the other discrete character programs in this package, except for PARS. Besides this primary function, FACTOR also provides an easy way of deleting characters from a data set. The input format for FACTOR is very similar to the input format for the other discrete character programs except for the addition of character-state tree descriptions.

Note that this program has no way of converting an unordered multistate character into binary characters. This is a weakness of the Old Style discrete characters programs in this package. Fortunately, PARS has joined the package, and it enables unordered multistate characters, in which any state can change to any other in one step, to be analyzed with parsimony.

FACTOR is really for a different case, that in which there are multiple states related on a "character state tree", which specifies for each state which other states it can change to. That graph of states is assumed to be a tree, with no loops in it.

The first line of the input file should contain the number of species and the number of multistate characters. This first line is followed by the lines describing the character-state trees, one description per line. The species information constitutes the last part of the file. Any number of lines may be used for a single species.

FIRST LINE

The first line is free format with the number of species first, separated by at least one blank (space) from the number of multistate characters, which in turn is separated by at least one blank from the options, if present.

OPTIONS

The options are selected from a menu that looks like this:


Factor -- multistate to binary recoding program, version 3.6a3

Settings for this run:
  A      put ancestral states in output file?  No
  F   put factors information in output file?  No
  0       Terminal type (IBM PC, ANSI, none)?  (none)
  1      Print indications of progress of run  Yes

Are these settings correct? (type Y or the letter for one to change)

The options particular to this program are:

A
Choosing the A (Ancestors) options toggles on and off the setting that causes a line to be written in the output that describes the states of the ancestor as indicated by the character-state tree descriptions (see below). If the ancestral state is not specified by a particular character-state tree, a "?" signifying an unknown character state will be written. The multistate characters are factored in such a way that the ancestral state in the factored data set will always be "0". The ancestor line does not get counted as a species.

F
Choosing the F (Factors) option toggles on and off a setting that will cause a "FACTORS" line to be written in the output. This line will indicate to other programs which factors came from the same multistate character. Of the programs currently in the package only SEQBOOT, MOVE, and DOLMOVE use this information.

CHARACTER-STATE TREE DESCRIPTIONS

The character-state trees are described in free format. The character number of the multistate character is given first followed by the description of the tree itself. Each description must be completed on a single line. Each character that is to be factored must have a description, and the characters must be described in the order that they occur in the input, that is, in numerical order.

The tree is described by listing the pairs of character states that are adjacent to each other in the character-state tree. The two character states in each adjacent pair are separated by a colon (":"). If character fifteen has this character state tree for possible states "A", "B", "C", and "D":

                         A ---- B ---- C
                                |
                                |
                                |
                                D

then the character-state tree description would be

                        15  A:B B:C D:B

Note that either symbol may appear first. The ancestral state is identified, if desired, by putting it "adjacent" to a period. If we wanted to root character fifteen at state C:

                         A <--- B <--- C
                                |
                                |
                                V
                                D

we could write

                      15  B:D A:B C:B .:C

Both the order in which the pairs are listed and the order of the symbols in each pair are arbitrary. However, each pair may only appear once in the list. Any symbols may be used for a character state in the input except the character that signals the connection between two states (in the distribution copy this is set to ":"), ".", and, of course, a blank. Blanks are ignored completely in the tree description so that even B:DA:BC:B.:C or B : DA : BC : B. : C would be equivalent to the above example. However, at least one blank must separate the character number from the tree description.

DELETING CHARACTERS FROM A DATA SET

If no description line appears in the input for a particular character, then that character will be omitted from the output. If the character number is given on the line, but no character-state tree is provided, then the symbol for the character in the input will be copied directly to the output without change. This is useful for characters that are already coded "0" and "1". Characters can be deleted from a data set simply by listing only those that are to appear in the output.

TERMINATING THE LIST OF TREE DESCRIPTIONS

The last character-state tree description should be followed by a line containing the number "999". This terminates processing of the trees and indicates the beginning of the species information.

SPECIES INFORMATION

The format for the species information is basically identical to the other discrete character programs. The first ten character positions are allotted to the species name (this value may be changed by altering the value of the constant nmlngth at the beginning of the program). The character states follow and may be continued to as many lines as desired. There is no current method for indicating polymorphisms. It is possible to either put blanks between characters or not.

There is a method for indicating uncertainty about states. There is one character value that stands for "unknown". If this appears in the input data then "?" is written out in all the corresponding positions in the output file. The character value that designates "unknown" is given in the constant unkchar at the beginning of the program, and can be changed by changing that constant. It is set to "?" in the distribution copy.

OUTPUT

The first line of output will contain the number of species and the number of binary characters in the factored data set followed by the letter "A" if the A option was specified in the input. If option F was specified, the next line will begin "FACTORS". If option A was specified, the line describing the ancestor will follow next. Finally, the factored characters will be written for each species in the format required for input by the other discrete programs in the package. The maximum length of the output lines is 80 characters, but this maximum length can be changed prior to compilation.

In fact, the format of the output file for the A and F options is not correct for the current release of PHYLIP. We need to change their output to write a factors file and an ancestors file instead of putting the Factors and Ancestors information into the data file.

ERRORS

The output should be checked for error messages. Errors will occur in the character-state tree descriptions if the format is incorrect (colons in the wrong place, etc.), if more than one root is specified, if the tree contains loops (and hence is not a tree), and if the tree is not connected, e.g.

                             A:B B:C D:E

describes

                  A ---- B ---- C          D ---- E

This "tree" is in two unconnected pieces. An error will also occur if a symbol appears in the data set that is not in the tree description for that character. Blanks at the end of lines when the species information is continued to a new line will cause this kind of error.

CONSTANTS AVAILABLE TO BE CHANGED

At the beginning of the program a number of are available to be changed to accomodate larger data sets. These are "maxstates", "maxoutput", "sizearray", "factchar" and "unkchar". The constant "maxstates" gives the maximum number of states per character (set at 20 in the distribution copy). The constant "maxoutput" gives the maximum width of a line in the output file (80 in the distribution copy). The constant "sizearray" must be less than the sum of squares of the numbers of states in the characters. It is initially set to set to 2000, so that although 20 states are allowed (at the initial setting of maxstates) per character, there cannot be 20 states in all of 100 characters.

Particularly important constants are "factchar" and "unkchar" which are not numerical values but a character. Initially set to the colon ":", "factchar" is the character that will be used to separate states in the input of character state trees. It can be changed by changing this constant. (We could have used a hyphen ("-") but didn't because that would make the minus-sign ("-") unavailable as a character state in +/- characters). The constant "unkchar" is the character value in the input data that indicates that the state is unknown. It is set to "?" in the distribution copy. If your computer is one that lacks the colon ":" in its character set or uses a nonstandard character code such as EBCDIC, you will want to change the constant "factchar".

INPUT AND OUTPUT FILES

The input file for the program has the default file name "infile" and the output file, the one that has the binary character state data, has the name "outfile".

----SAMPLE INPUT----- -----Comments (not part of input file) -----
 
   4   6  A
1 A:B B:C        
2 A:B B:.        
4                
5 0:1 1:2 .:0    
6 .:# #:$ #:%    
999              
Alpha     CAW00# 
Beta      BBX01%
Gamma     ABY12#
Epsilon   CAZ01$


     4 species; 6 characters; A option on
     A ---- B ---- C
     B ---> A
     Character 3 deleted; 4 unchanged
     0 ---> 1 ---> 2
     % <--- # ---> $
     Signals end of trees
     Species information begins

     
    
---SAMPLE OUTPUT----- -----Comments (not part of output file) -----
    5    8    A
ANCESTOR  ??0?0000
Alpha     11100000
Beta      10001001
Gamma     00011100
Epsilon   11101010
 
     5 species (incl. anc.); 8 factors
     Chars. 1 and 2 come from old number 1
     Char. 3 comes from old number 2
     Char. 4 is old number 4
     Chars. 5 and 6 come from old number 5
     Chars. 7 and 8 come from old number 6
PHYLIPNEW-3.69.650/doc/dnacomp.html0000664000175000017500000002704507712247475013371 00000000000000 dnacomp

version 3.6

DNACOMP -- DNA Compatibility Program

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the compatibility method for DNA sequence data. For a four-state character without a character-state tree, as in DNA sequences, the usual clique theorems cannot be applied. The approach taken in this program is to directly evaluate each tree topology by counting how many substitutions are needed in each site, comparing this to the minimum number that might be needed (one less than the number of bases observed at that site), and then evaluating the number of sites which achieve the minimum number. This is the evaluation of the tree (the number of compatible sites), and the topology is chosen so as to maximize that number.

Compatibility methods originated with Le Quesne's (1969) suggestion that one ought to look for trees supported by the largest number of perfectly fitting (compatible) characters. Fitch (1975) showed by counterexample that one could not use the pairwise compatibility methods used in CLIQUE to discover the largest clique of jointly compatible characters.

The assumptions of this method are similar to those of CLIQUE. In a paper in the Biological Journal of the Linnean Society (1981b) I discuss this matter extensively. In effect, the assumptions are that:

  1. Each character evolves independently.
  2. Different lineages evolve independently.
  3. The ancestral base at each site is unknown.
  4. The rates of change in most sites over the time spans involved in the the divergence of the group are very small.
  5. A few of the sites have very high rates of change.
  6. We do not know in advance which are the high and which the low rate sites.

That these are the assumptions of compatibility methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that arguments such as mine are invalid and that parsimony (and perhaps compatibility) methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986).

There is, however, some reason to believe that the present criterion is not the proper way to correct for the presence of some sites with high rates of change in nucleotide sequence data. It can be argued that sites showing more than two nucleotide states, even if those are compatible with the other sites, are also candidates for sites with high rates of change. It might then be more proper to use DNAPARS with the Threshold option with a threshold value of 2.

Change from an occupied site to a gap is counted as one change. Reversion from a gap to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a gap N bases long is N separate events. This may be an overcorrection. When we have nonoverlapping gaps, we could instead code a gap as a single event by changing all but the first "-" in the gap into "?" characters. In this way only the first base of the gap causes the program to infer a change.

The input data is standard. The first line of the input file contains the number of species and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:


DNA compatibility algorithm, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4  Print steps & compatibility at sites  No
  5  Print sequences at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options U, J, O, W, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The O (outgroup) option has no effect if the U (user-defined tree) option is in effect. The user-defined trees (option U) fed in must be strictly bifurcating, with a two-way split at their base.

The interpretation of weights (option W) in the case of a compatibility method is that they count how many times the character (in this case the site) is counted in the analysis. Thus a character can be dropped from the analysis by assigning it zero weight. On the other hand, giving it a weight of 5 means that in any clique it is in, it is counted as 5 characters when the size of the clique is evaluated. Generally, weights other than 0 or 1 do not have much meaning when dealing with DNA sequences.

Output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each character. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a gap may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of weighted compatibility differences between trees, taken across sites. If the two trees compatibilities are more than 1.96 standard deviations different then the trees are declared significantly different.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of weighted compatibilities of sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected compatibility, compatibilities for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest compatibility exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the compatibility of each tree, the differences of each from the highest one, the variance of that quantity as determined by the compatibility differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one.

The algorithm is a straightforward modification of DNAPARS, but with some extra machinery added to calculate, as each species is added, how many base changes are the minimum which could be required at that site. The program runs fairly quickly.

The constants which can be changed at the beginning of the program are: the name length "nmlngth", "maxtrees", the maximum number of trees which the program will store for output, and "maxuser", the maximum number of user trees that can be used in the paired sites test.


TEST DATA SET

    5   13
Alpha     AACGUGGCCAAAU
Beta      AAGGUCGCCAAAC
Gamma     CAUUUCGUCACAA
Delta     GGUAUUUCGGCCU
Epsilon   GGGAUCUCGGCCC

CONTENTS OF OUTPUT FILE (if all numerical options are turned on)


DNA compatibility algorithm, version 3.6a3

 5 species,  13  sites

Name            Sequences
----            ---------

Alpha        AACGUGGCCA AAU
Beta         AAGGUCGCCA AAC
Gamma        CAUUUCGUCA CAA
Delta        GGUAUUUCGG CCU
Epsilon      GGGAUCUCGG CCC



One most parsimonious tree found:




           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


total number of compatible sites is       11.0

steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       2   1   3   2   0   2   1   1   1
   10|   1   1   1   3                        

 compatibility (Y or N) of each site with this tree:

      0123456789
     *----------
   0 ! YYNYYYYYY
  10 !YYYN      

From    To     Any Steps?    State at upper node
                            
          1                AABGTSGCCA AAY
   1      2        maybe   AABGTCGCCA AAY
   2      3         yes    VAKDTCGCCA CAY
   3      4         yes    GGKATCTCGG CCY
   4   Epsilon     maybe   GGGATCTCGG CCC
   4   Delta        yes    GGTATTTCGG CCT
   3   Gamma        yes    CATTTCGTCA CAA
   2   Beta        maybe   AAGGTCGCCA AAC
   1   Alpha       maybe   AACGTGGCCA AAT


PHYLIPNEW-3.69.650/doc/seqboot.html0000664000175000017500000004460707712247476013430 00000000000000 seqboot

version 3.6

SEQBOOT -- Bootstrap, Jackknife, or Permutation Resampling
of Molecular Sequence, Restriction Site,
Gene Frequency or Character Data

© Copyright 1991-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into NEXUS and a new XML sequence alignment format.

To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis.

This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does.

If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input.

The resampling methods available are three:

  • The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.

  • Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Künsch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last  B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3.

  • Delete-half-jackknifing. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups.

  • Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just a pair of sibling species).

The data input file is of standard form for molecular sequences (either in interleaved or sequential form), restriction sites, gene frequencies, or binary morphological characters.

When the program runs it first asks you for a random number seed. This should be an integer greater than zero (and probably less than 32767) and which is of the form 4n+1, that is, it leaves a remainder of 1 when divided by 4. This can be judged by looking at the last two digits of the integer (for instance 7651 is not of form 4n+1 as 51, when divided by 4, leaves the remainder 3). The random number seed is used to start the random number generator. If the randum number seed is not odd, the program will request it again. Any odd number can be used, but may result in a random number sequence that repeats itself after less than the full one billion numbers. Usually this is not a problem. As the random numbers appear to be unpredictable, there is no such thing as a "good" seed -- the numbers produced from one seed are indistinguishable from those produced by another, and it is not true that the numbers produced from one seed (say 4533) are similar to those produced from a nearby seed (say 4537).

Then the program shows you a menu to allow you to choose options. The menu looks like this:


Bootstrapping algorithm, version 3.6a3

Settings for this run:
  D      Sequence, Morph, Rest., Gene Freqs?  Molecular sequences
  J  Bootstrap, Jackknife, Permute, Rewrite?  Bootstrap
  B      Block size for block-bootstrapping?  1 (regular bootstrap)
  R                     How many replicates?  100
  W              Read weights of characters?  No
  C                Read categories of sites?  No
  F     Write out data sets or just weights?  Data sets
  I             Input sequences interleaved?  Yes
  0      Terminal type (IBM PC, ANSI, none)?  (none)
  1       Print out the data at start of run  No
  2     Print indications of progress of run  Yes

  Y to accept these or type the letter for one to change

The user selects options by typing one of the letters in the left column, and continues to do so until all options are correctly set. Then the program can be run by typing Y.

It is important to select the correct data type (the D selection). Each time D is typed the program will change data type, proceeding successively through Molecular Sequences, Discrete Morphological Characters, Restriction Sites, and Gene Frequencies. Some of these will cause additional entries to appear in the menu. If Molecular Sequences or Restriction Sites settings and chosen the I (Interleaved) option appears in the menu (and as Molecular Sequences are also the default, it therefore appears in the first menu). It is the usual I option discussed in the Molecular Sequences document file and in the main documentation files for the package, and is on by default.

If the Restriction Sites option is chosen the menu option E appears, which asks whether the input file contains a third number on the first line of the file, for the number of restriction enzymes used to detect these sites. This is necessary because data sets for RESTML need this third number, but other programs do not, and SEQBOOT needs to know what to expect.

If the Gene Frequencies option is chosen an menu option A appears which allows the user to specify that all alleles at each locus are in the input file. The default setting is that one allele is absent at each locus.

The J option allows the user to select Bootstrapping, Delete-Half-Jackknifing, or the Archie-Faith permutation of species within characters. It changes successively among these three each time J is typed.

The B option selects the Block Bootstrap. When you select option B the program will ask you to enter the block length. When the block length is 1, this means that we are doing regular bootstrapping rather than block-bootstrapping.

The R option allows the user to set the number of replicate data sets. This defaults to 100. Most statisticians would be happiest with 1000 to 10,000 replicates in a bootstrap, but 100 gives a rough picture. You will have to decide this based on how long a running time you are willing to tolerate.

The W (Weights) option allows weights to be read from a file whose default name is "weights". The weights follow the format described in the main documentation file. Weights can only be 0 or 1, and act to select the characters (or sites) that will be used in the resampling, the others being ignored and always omitted from the output data sets. Note: At present, if you use W together with the F (just weights) option, you write a file of weights, but with only weights for the sites that had input weights of 1, the others being omitted. Thus if you had 100 characters, and gave 60 of them weights of 1, when you produce the output weights these will only have 60 weights, not 100. Thus they could only be used together with a data file that had been edited to remove the sites that you gave 0 weights to. This is clumsy and we need to correct it.

The C (Categories) option can be used with molecular sequence programs to allow assignment of sites or amino acid positions to user-defined rate categories. The assignment of rates to sites is then made by reading a file whose default name is "categories". It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. Thus the categories file might look like this:

122231111122411155
1155333333444

The only use of the Categories information in SEQBOOT is that they are sampled along with the sites (or amino acid positions) and are written out onto a file whose default name is "outcategories", which has one set of categories information for each bootstrap or jackknife replicate.

The F option is a particularly important one. It is used whether to produce multiple output files or multiple weights. If your data set is large, a file with (say) 1000 such data sets can be very large and may use up too much space on your system. If you choose the F option, the program will instead produce a weights file with multiple sets of weights. The default name of this file is "outweights". Except for some programs that cannot handle multiple sets of weights, the programs have an M (multiple data sets) option that asks the user whether to use multiple data sets or multiple sets of weights. If the latter is selected when running those programs, they read one data set, but analyze it multiple times, each time reading a new set of weights. As both bootstrapping and jackknifing can be thought of as reweighting the characters, this accomplishes the same thing (the multiple weights option is not available for Archie/Faith permutation). As the file with multiple sets of weights is much smaller than a file with multiple data sets, this can be an attractive way to save file space. When multiple sets of weights is chosen, they reflect the sampling as well as any set of weights that was read in, so that you can use SEQBOOT's W option as well.

The 0 (Terminal type) option is the usual one.

Input File

The data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programs.

The only option that can be present in the input file is F (Factors), the latter only in the case of binary (0,1) characters. The Factors option allows us to specify that groups of binary characters represent one multistate character. When sampling is done they will be sampled or omitted together, and when permutations of species are done they will all have the same permutation, as would happen if they really were just one column in the data matrix. For futher description of the F (Factors) option see the Discrete Characters Programs documentation file.

Output

The output file will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters.

The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor.

The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file.

Size and Speed

The program runs moderately quickly, though more slowly when the Permutation resampling method is used than with the others.

Future

I hope in the future to include code to pass on the Ancestors option from the input file (for use in programs MIX and DOLLOP) to the output file, a serious omission in the current version.


TEST DATA SET

    5    6
Alpha     AACAAC
Beta      AACCCC
Gamma     ACCAAC
Delta     CCACCA
Epsilon   CCAAAC


CONTENTS OF OUTPUT FILE

(If Replicates are set to 10 and seed to 4333)

    5     6
Alpha     ACAAAC
Beta      ACCCCC
Gamma     ACAAAC
Delta     CACCCA
Epsilon   CAAAAC
    5     6
Alpha     AAAACC
Beta      AACCCC
Gamma     CCAACC
Delta     CCCCAA
Epsilon   CCAACC
    5     6
Alpha     ACAAAC
Beta      ACCCCC
Gamma     CCAAAC
Delta     CACCCA
Epsilon   CAAAAC
    5     6
Alpha     ACCAAA
Beta      ACCCCC
Gamma     ACCAAA
Delta     CAACCC
Epsilon   CAAAAA
    5     6
Alpha     ACAAAC
Beta      ACCCCC
Gamma     ACAAAC
Delta     CACCCA
Epsilon   CAAAAC
    5     6
Alpha     AAAACA
Beta      AAAACC
Gamma     AAACCA
Delta     CCCCAC
Epsilon   CCCCAA
    5     6
Alpha     AAACCC
Beta      CCCCCC
Gamma     AAACCC
Delta     CCCAAA
Epsilon   AAACCC
    5     6
Alpha     AAAACC
Beta      AACCCC
Gamma     AAAACC
Delta     CCCCAA
Epsilon   CCAACC
    5     6
Alpha     AAAAAC
Beta      AACCCC
Gamma     CCAAAC
Delta     CCCCCA
Epsilon   CCAAAC
    5     6
Alpha     AACCAC
Beta      AACCCC
Gamma     AACCAC
Delta     CCAACA
Epsilon   CCAAAC

PHYLIPNEW-3.69.650/doc/dnamlk.html0000664000175000017500000010463007712247475013212 00000000000000 dnamlk

version 3.6

DnaMLK -- DNA Maximum Likelihood program
with molecular clock

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the maximum likelihood method for DNA sequences under the constraint that the trees estimated must be consistent with a molecular clock. The molecular clock is the assumption that the tips of the tree are all equidistant, in branch length, from its root. This program is indirectly related to DNAML. Details of the algorithm are not yet published, but many aspects of it are similar to DNAML, and these are published in the paper by Felsenstein and Churchill (1996). The model of base substitution allows the expected frequencies of the four bases to be unequal, allows the expected frequencies of transitions and transversions to be unequal, and has several ways of allowing different rates of evolution at different sites.

The assumptions of the model are:

  1. Each site in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. There is a molecular clock.
  4. Each site undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  5. All relevant sites are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  6. A substitution consists of one of two sorts of events:
    (a)
    The first kind of event consists of the replacement of the existing base by a base drawn from a pool of purines or a pool of pyrimidines (depending on whether the base being replaced was a purine or a pyrimidine). It can lead either to no change or to a transition.
    (b)
    The second kind of event consists of the replacement of the existing base by a base drawn at random from a pool of bases at known frequencies, independently of the identity of the base which is being replaced. This could lead either to a no change, to a transition or to a transversion.

    The ratio of the two purines in the purine replacement pool is the same as their ratio in the overall pool, and similarly for the pyrimidines.

    The ratios of transitions to transversions can be set by the user. The substitution process can be diagrammed as follows: Suppose that you specified A, C, G, and T base frequencies of 0.24, 0.28, 0.27, and 0.21.

    • First kind of event:

      1. Determine whether the existing base is a purine or a pyrimidine.
      2. Draw from the proper pool:

              Purine pool:                Pyrimidine pool:
        
             |               |            |               |
             |   0.4706 A    |            |   0.5714 C    |
             |   0.5294 G    |            |   0.4286 T    |
             | (ratio is     |            | (ratio is     |
             |  0.24 : 0.27) |            |  0.28 : 0.21) |
             |_______________|            |_______________|
        

    • Second kind of event:

      Draw from the overall pool:

      
                    |                  |
                    |      0.24 A      |
                    |      0.28 C      |
                    |      0.27 G      |
                    |      0.21 T      |
                    |__________________|
      

    Note that if the existing base is, say, an A, the first kind of event has a 0.4706 probability of "replacing" it by another A. The second kind of event has a 0.24 chance of replacing it by another A. This rather disconcerting model is used because it has nice mathematical properties that make likelihood calculations far easier. A closely similar, but not precisely identical model having different rates of transitions and transversions has been used by Hasegawa et. al. (1985b). The transition probability formulas for the current model were given (with my permission) by Kishino and Hasegawa (1989). Another explanation is available in the paper by Felsenstein and Churchill (1996).

Note the assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those sites that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different sites. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of sites all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant sites. The program computes the the likelihood by summing it over all possible assignments of rates to sites, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a site having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive sites with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all sites to rate 2.4, or that fail to have consecutive sites that have the same rate.

The Hidden Markov Model framework for rate variation among sites was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant sites.

This feature effectively removes the artificial assumption that all sites have the same rate, and also means that we need not know in advance the identities of the sites that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each site (for example, we might want first, second, and third codon positions in a protein coding sequence to be three different categories. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of sites in the different categories. For example, we might specify that first, second, and third positions evolve at relative rates of 1.0, 0.8, and 2.7.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a site is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

INPUT FORMAT AND OPTIONS

Subject to these assumptions, the program is a correct maximum likelihood method. The input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:


Nucleic acid sequence
   Maximum Likelihood method with molecular clock, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  T        Transition/transversion ratio:  2.0
  F       Use empirical base frequencies?  Yes
  C   One category of substitution rates?  Yes
  R           Rate variation among sites?  constant rate
  G                Global rearrangements?  No
  W                       Sites weighted?  No
  J   Randomize input order of sequences?  No. Use input order
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes
  5   Reconstruct hypothetical sequences?  No

Are these settings correct? (type Y or the letter for one to change)
  

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options U, W, J, O, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The T option in this program does not stand for Threshold, but instead is the Transition/transversion option. The user is prompted for a real number greater than 0.0, as the expected ratio of transitions to transversions. Note that this is not the ratio of the first to the second kinds of events, but the resulting expected ratio of transitions to transversions. The exact relationship between these two quantities depends on the frequencies in the base pools. The default value of the T parameter if you do not use the T option is 2.0.

The F (Frequencies) option is one which may save users much time. If you want to use the empirical frequencies of the bases, observed in the input sequences, as the base frequencies, you simply use the default setting of the F option. These empirical frequencies are not really the maximum likelihood estimates of the base frequencies, but they will often be close to those values (what they are is maximum likelihood estimates under a "star" or "explosion" phylogeny). If you change the setting of the F option you will be prompted for the frequencies of the four bases. These must add to 1 and are to be typed on one line separated by blanks, not commas.

The R (Hidden Markov Model rates) option allows the user to approximate a Gamma distribution of rates among sites, or a Gamma distribution plus a class of invariant sites, or to specify how many categories of substitution rates there will be in a Hidden Markov Model of rate variation, and what are the rates and probabilities for each. By repeatedly selecting the R option one toggles among no rate variation, the Gamma, Gamma+I, and general HMM possibilities.

If you choose Gamma or Gamma+I the program will ask how many rate categories you want. If you have chosen Gamma+I, keep in mind that one rate category will be set aside for the invariant class and only the remaining ones used to approximate the Gamma distribution. For the approximation we do not use the quantile method of Yang (1995) but instead use a quadrature method using generalized Laguerre polynomials. This should give a good approximation to the Gamma distribution with as few as 5 or 6 categories.

In the Gamma and Gamma+I cases, the user will be asked to supply the coefficient of variation of the rate of substitution among sites. This is different from the parameters used by Nei and Jin (1990) but related to them: their parameter a is also known as "alpha", the shape parameter of the Gamma distribution. It is related to the coefficient of variation by

     CV = 1 / a1/2

or

     a = 1 / (CV)2

(their parameter b is absorbed here by the requirement that time is scaled so that the mean rate of evolution is 1 per unit time, which means that a = b). As we consider cases in which the rates are less variable we should set a larger and larger, as CV gets smaller and smaller.

If the user instead chooses the general Hidden Markov Model option, they are first asked how many HMM rate categories there will be (for the moment there is an upper limit of 9, which should not be restrictive). Then the program asks for the rates for each category. These rates are only meaningful relative to each other, so that rates 1.0, 2.0, and 2.4 have the exact same effect as rates 2.0, 4.0, and 4.8. Note that an HMM rate category can have rate of change 0, so that this allows us to take into account that there may be a category of sites that are invariant. Note that the run time of the program will be proportional to the number of HMM rate categories: twice as many categories means twice as long a run. Finally the program will ask for the probabilities of a random site falling into each of these regional rate categories. These probabilities must be nonnegative and sum to 1. Default for the program is one category, with rate 1.0 and probability 1.0 (actually the rate does not matter in that case).

If more than one HMM rate category is specified, then another option, A, becomes If more than one category is specified, then another option, A, becomes visible in the menu. This allows us to specify that we want to assume that sites that have the same HMM rate category are expected to be clustered so that there is autocorrelation of rates. The program asks for the value of the average patch length. This is an expected length of patches that have the same rate. If it is 1, the rates of successive sites will be independent. If it is, say, 10.25, then the chance of change to a new rate will be 1/10.25 after every site. However the "new rate" is randomly drawn from the mix of rates, and hence could even be the same. So the actual observed length of patches with the same rate will be a bit larger than 10.25. Note below that if you choose multiple patches, there will be an estimate in the output file as to which combination of rate categories contributed most to the likelihood.

Note that the autocorrelation scheme we use is somewhat different from Yang's (1995) autocorrelated Gamma distribution. I am unsure whether this difference is of any importance -- our scheme is chosen for the ease with which it can be implemented.

The C option allows user-defined rate categories. The user is prompted for the number of user-defined rates, and for the rates themselves, which cannot be negative but can be zero. These numbers, which must be nonnegative (some could be 0), are defined relative to each other, so that if rates for three categories are set to 1 : 3 : 2.5 this would have the same meaning as setting them to 2 : 6 : 5. The assignment of rates to sites is then made by reading a file whose default name is "categories". It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. Thus the categories file might look like this:

122231111122411155
1155333333444

With the current options R, A, and C the program has gained greatly in its ability to infer different rates at different sites and estimate phylogenies under a more realistic model. Note that Likelihood Ratio Tests can be used to test whether one combination of rates is significantly better than another, provided one rate scheme represents a restriction of another with fewer parameters. The number of parameters needed for rate variation is the number of regional rate categories, plus the number of user-defined rate categories less 2, plus one if the regional rate categories have a nonzero autocorrelation.

The G (global search) option causes, after the last species is added to the tree, each possible group to be removed and re-added. This improves the result, since the position of every species is reconsidered. It approximately triples the run-time of the program.

The User tree (option U) is read from a file whose default name is intree. The trees can be multifurcating. This allows us to test the hypothesis that a given branch has zero length.

If the U (user tree) option is chosen another option appears in the menu, the L option. If it is selected, it signals the program that it should take any branch lengths that are in the user tree and simply evaluate the likelihood of that tree, without further altering those branch lengths. In the case of a clock, if some branches have lengths and others do not, the program does not estimate the lengths of those that do not have lengths given in the user tree. If any of the branches do not have lengths, the program re-estimates the lengths of all of them. This is done because estimating some and not others is hard in the case of a clock.

The W (Weights) option is invoked in the usual way, with only weights 0 and 1 allowed. It selects a set of sites to be analyzed, ignoring the others. The sites selected are those with weight 1. If the W option is not invoked, all sites are analyzed. The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file.

The M (multiple data sets) option will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets from the input file. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights. Note also that when we use multiple weights for bootstrapping we can also then maintain different rate categories for different sites in a meaningful way. You should not use the multiple data sets option without using multiple weights, you should not at the same time use the user-defined rate categories option (option C).

The algorithm used for searching among trees is faster than it was in version 3.5, thanks to using a technique invented by David Swofford and J. S. Rogers. This involves not iterating most branch lengths on most trees when searching among tree topologies, This is of necessity a "quick-and-dirty" search but it saves much time.

OUTPUT FORMAT

The output starts by giving the number of species, the number of sites, and the base frequencies for A, C, G, and T that have been specified. It then prints out the transition/transversion ratio that was specified or used by default. It also uses the base frequencies to compute the actual transition/transversion ratio implied by the parameter.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of sites is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The trees found are printed as a rooted tree topology. The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen.

A table is printed showing the length of each tree segment, and the time (in units of expected nucleotide substitutions per site) of each fork in the tree, measured from the root of the tree. I have not attempted in include code for approximate confidence limits on branch points, as I have done for branch lengths in DNAML, both because of the extreme crudeness of that test, and because the variation of times for different forks would be highly correlated.

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the expected transition/transversion ratio to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

This program makes possible a (reasonably) legitimate statistical test of the molecular clock. To do such a test, run DNAML and DNAMLK on the same data. If the trees obtained are of the same topology (when considered as unrooted), it is legitimate to compare their likelihoods by the likelihood ratio test. In DNAML the likelihood has been computed by estimating 2n-3 branch lengths, if their are n tips on the tree. In DNAMLK it has been computed by estimating n-1 branching times (in effect, n-1 branch lengths). The difference in the number of parameters is (2n-3)-(n-1) = n-2. To perform the test take the difference in log likelihoods between the two runs (DNAML should be the higher of the two, barring numerical iteration difficulties) and double it. Look this up on a chi-square distribution with n-2 degrees of freedom. If the result is significant, the log likelihood has been significantly increased by allowing all 2n-3 branch lengths to be estimated instead of just n-1, and molecular clock may be rejected.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring sites (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what site categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like.

A second list will also be printed out, showing for each site which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). In that table, if a site has a base which accounts for more than 95% of the likelihood, it is printed in capital letters (A rather than a). If the best nucleotide accounts for less than 50% of the likelihood, the program prints out an ambiguity code (such as M for "A or C") for the set of nucleotides which, taken together, account for more half of the likelihood. The ambiguity codes are listed in the sequence programs documentation file. One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed nucleotides are based on only the single assignment of rates to sites which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

PROGRAM CONSTANTS

The constants defined at the beginning of the program include "maxtrees", the maximum number of user trees that can be processed. It is small (100) at present to save some further memory but the cost of increasing it is not very great. Other constants include "maxcategories", the maximum number of site categories, "namelength", the length of species names in characters, and three others, "smoothings", "iterations", and "epsilon", that help "tune" the algorithm and define the compromise between execution speed and the quality of the branch lengths found by iteratively maximizing the likelihood. Reducing iterations and smoothings, and increasing epsilon, will result in faster execution but a worse result. These values will not usually have to be changed.

The program spends most of its time doing real arithmetic. The algorithm, with separate and independent computations occurring for each pattern, lends itself readily to parallel processing.

PAST AND FUTURE OF THE PROGRAM

This program was developed in 1989 by combining code from DNAPARS and from DNAML. It was speeded up by two major developments, the use of aliasing of nucleotide sites (version 3.1) and pretabulation of some exponentials (added by Akiko Fuseki in version 3.4). In version 3.5 the Hidden Markov Model code was added and the method of iterating branch lengths was changed from an EM algorithm to direct search. The Hidden Markov Model code slows things down, especially if there is autocorrelation between sites, so this version is slower than version 3.4. Nevertheless we hope that the sacrifice is worth it.

One change that is needed in the future is to put in some way of allowing for base composition of nucleotide sequences in different parts of the phylogeny.


TEST DATA SET

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC


CONTENTS OF OUTPUT FILE (with all numerical options on)

(It was run with HMM rates having gamma-distributed rates approximated by 5 rate categories, with coefficient of variation of rates 1.0, and with patch length parameter = 1.5. Two user-defined rate categories were used, one for the first 6 sites, the other for the last 7, with rates 1.0 : 2.0. Weights were used, with sites 1 and 13 given weight 0, and all others weight 1.)


Nucleic acid sequence
   Maximum Likelihood method with molecular clock, version 3.6a3

 5 species,  13  sites

    Site categories are:

             1111112222 222


    Sites are weighted as follows:

             0111111111 111


Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         AAGGTCGCCA AAC
Gamma        CATTTCGTCA CAA
Delta        GGTATTTCGG CCT
Epsilon      GGGATCTCGG CCC



Empirical Base Frequencies:

   A       0.23333
   C       0.30000
   G       0.23333
  T(U)     0.23333

Transition/transversion ratio =   2.000000


Discrete approximation to gamma distributed rates
 Coefficient of variation of rates = 1.000000  (alpha = 1.000000)

State in HMM    Rate of change    Probability

        1           0.264            0.522
        2           1.413            0.399
        3           3.596            0.076
        4           7.086            0.0036
        5          12.641            0.000023

Expected length of a patch of sites having the same rate =    1.500


Site category   Rate of change

        1           1.000
        2           2.000






                                                   +-----Epsilon   
  +------------------------------------------------4  
  !                                                +-----Delta     
--3  
  !                                           +----------Gamma     
  +-------------------------------------------2  
                                              !       +--Beta      
                                              +-------1  
                                                      +--Alpha     


Ln Likelihood =   -68.25148

 Ancestor      Node      Node Height     Length
 --------      ----      ---- ------     ------
 root            3      
   3             4          4.37769      4.37769
   4          Epsilon       4.92983      0.55214
   4          Delta         4.92983      0.55214
   3             2          3.97954      3.97954
   2          Gamma         4.92983      0.95029
   2             1          4.64910      0.66957
   1          Beta          4.92983      0.28073
   1          Alpha         4.92983      0.28073

Combination of categories that contributes the most to the likelihood:

             1122121111 112

Most probable category at each site if > 0.95 probability ("." otherwise)

             .......... ...


Probable sequences at interior nodes:

  node       Reconstructed sequence (caps if > 0.95)

    3        .rymtyscsr ymy
    4        .GkaTcTCGG CCy
 Epsilon     GGGATCTCGG CCC
 Delta       GGTATTTCGG CCT
    2        .AykTcGcCA mAy
 Gamma       CATTTCGTCA CAA
    1        .AcGTcGCCA AAy
 Beta        AAGGTCGCCA AAC
 Alpha       AACGTGGCCA AAT

PHYLIPNEW-3.69.650/doc/dnapars.html0000664000175000017500000003320107712247475013367 00000000000000 main

version 3.6

DNAPARS -- DNA Parsimony Program

© Copyright 1986-2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are analogous to those of MIX:

  1. Each site evolves independently.
  2. Different lineages evolve independently.
  3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986).

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a deletion N bases long is N separate events.

Dnapars can handle both bifurcating and multifurcating trees. In doing its search for most parsimonious trees, it adds species not only by creating new forks in the middle of existing branches, but it also tries putting them at the end of new branches which are added to existing forks. Thus it searches among both bifurcating and multifurcating trees. If a branch in a tree does not have any characters which might change in that branch in the most parsimonious tree, it does not save that tree. Thus in any tree that results, a branch exists only if some character has a most parsimonious reconstruction that would involve change in that branch.

It also saves a number of trees tied for best (you can alter the number it saves using the V option in the menu). When rearranging trees, it tries rearrangements of all of the saved trees. This makes the algorithm slower than earlier versions of Dnapars.

The input data is standard. The first line of the input file contains the number of species and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:


DNA parsimony algorithm, version 3.6a3

Setting for this run:
  U                 Search for best tree?  Yes
  S                        Search option?  More thorough search
  V              Number of trees to save?  100
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  T              Use Threshold parsimony?  No, use ordinary parsimony
  N           Use Transversion parsimony?  No, count all steps
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4          Print out steps in each site  No
  5  Print sequences at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The N option allows you to choose transversion parsimony, which counts only transversions (changes between one of the purines A or G and one of the pyrimidines C or T). This setting is turned off by default.

The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file, with integer weights from 0 to 35 allowed by using the characters 0, 1, 2, ..., 9 and A, B, ... Z.

The User tree (option U) is read from a file whose default name is intree. The trees can be multifurcating. They must be preceded in the file by a line giving the number of trees in the file.

The options J, O, T, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The M (multiple data sets option) will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights.

The O (outgroup) option will have no effect if the U (user-defined tree) option is in effect. The T (threshold) option allows a continuum of methods between parsimony and compatibility. Thresholds less than or equal to 1.0 do not have any meaning and should not be used: they will result in a tree dependent only on the input order of species and not at all on the data!

Output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees. Each tree has branch lengths. These are computed using an algorithm published by Hochbaum and Pathria (1997) which I first heard of from Wayne Maddison who invented it independently of them. This algorithm averages the number of reconstructed changes of state over all sites a over all possible most parsimonious placements of the changes of state among branches. Note that it does not correct in any way for multiple changes that overlay each other.

If option 2 is toggled on a table of the number of changes of state required in each character is also printed. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item D appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a deletion may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across sites. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, this is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989) It uses the mean and variance of the differences in the number of steps between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different, then the trees are declared significantly different.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one.

Option 6 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

The program is a straightforward relative of MIX and runs reasonably quickly, especially with many sites and few species.


TEST DATA SET

 
   5   13
Alpha     AACGUGGCCAAAU
Beta      AAGGUCGCCAAAC
Gamma     CAUUUCGUCACAA
Delta     GGUAUUUCGGCCU
Epsilon   GGGAUCUCGGCCC


CONTENTS OF OUTPUT FILE (if all numerical options are on)


DNA parsimony algorithm, version 3.6a3

 5 species,  13  sites


Name            Sequences
----            ---------

Alpha        AACGUGGCCA AAU
Beta         ..G..C.... ..C
Gamma        C.UU.C.U.. C.A
Delta        GGUA.UU.GG CC.
Epsilon      GGGA.CU.GG CCC



One most parsimonious tree found:


                                            +-----Epsilon   
               +----------------------------3  
  +------------2                            +-------Delta     
  |            |  
  |            +----------------Gamma     
  |  
  1----Beta      
  |  
  +---------Alpha     


requires a total of     19.000

  between      and       length
  -------      ---       ------
     1           2       0.217949
     2           3       0.487179
     3      Epsilon      0.096154
     3      Delta        0.134615
     2      Gamma        0.275641
     1      Beta         0.076923
     1      Alpha        0.173077

steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       2   1   3   2   0   2   1   1   1
   10|   1   1   1   3                        

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

          1                AABGTCGCCA AAY
   1      2         yes    V.KD...... C..
   2      3         yes    GG.A..T.GG .C.
   3   Epsilon     maybe   ..G....... ..C
   3   Delta        yes    ..T..T.... ..T
   2   Gamma        yes    C.TT...T.. ..A
   1   Beta        maybe   ..G....... ..C
   1   Alpha        yes    ..C..G.... ..T


PHYLIPNEW-3.69.650/doc/drawgram.html0000664000175000017500000004142707712247475013554 00000000000000 drawgram

version 3.6

DRAWGRAM

© Copyright 1990-2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DRAWGRAM interactively plots a cladogram- or phenogram-like rooted tree diagram, with many options including orientation of tree and branches, style of tree, label sizes and angles, tree depth, margin sizes, stem lengths, and placement of nodes in the tree. Particularly if you can use your computer to preview the plot, you can very effectively adjust the details of the plotting to get just the kind of plot you want.

To understand the working of DRAWGRAM and DRAWTREE, you should first read the Tree Drawing Programs web page in this documentation.

As with DRAWTREE, to run DRAWGRAM you need a compiled copy of the program, a font file, and a tree file. The tree file has a default name of intree. The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Note that the program will get confused if the input tree file has the number of trees on the first line of the file, so that numbr may have to be removed.

Once these choices have been made you will see the central menu of the program, which looks like this:


Rooted tree plotting program version 3.6a3

Here are the settings: 
 0  Screen type (IBM PC, ANSI):  (none)
 P       Final plotting device:  Postscript printer
 V           Previewing device:  X Windows display
 H                  Tree grows:  Horizontally
 S                  Tree style:  Phenogram
 B          Use branch lengths:  Yes
 L             Angle of labels:  90.0
 R      Scale of branch length:  Automatically rescaled
 D       Depth/Breadth of tree:  0.53
 T      Stem-length/tree-depth:  0.05
 C    Character ht / tip space:  0.3333
 A             Ancestral nodes:  Weighted
 F                        Font:  Times-Roman
 M          Horizontal margins:  1.65 cm
 M            Vertical margins:  2.16 cm
 #              Pages per tree:  one page per tree

 Y to accept these or type the letter for one to change

These are the settings that control the appearance of the tree, which has already been read in. You can either accept these as is, in which case you would answer Y to the question and press the Return or Enter key, or you can answer N if you want to change one, or simply type the character corresponding to the one you want to change (if you answer N it will just immediately ask you for that number anyway).

For a first run, particularly if previewing is available, you might accept these default values and see what the result looks like. The program will then tell you it is about to preview the tree and ask you to press Return or Enter when you are ready to see this (you will probably have to press it twice). If you are on a Windows system (and have its graphics selected as your previewing option), on a Unix or Linux system and are using X windows for previewing, or are on a Macintosh system, a new window will open with the preview in it. If you are using the Tektronix preview option the preview will appear in the window where the menu was.

On X Windows, Macintosh, and Windows you can resize the preview window, though for some of these you may have to ask the system to redraw the preview to see it at the new window size.

Once you are finished looking at the preview, you will want to specify whether the program should make the final plot or change some of the settings. This is done differently on the different previews:

  • In X Windows you should make the menu window the active window. You may need to move the mouse over it, or click in it, or click on its top bar. You do not need to try to close the preview window yourself, and usually if you do this will cause trouble.
  • In Windows use the File menu in the preview window and choose either the Change Parameters menu item, or if you are ready to make the final plot, choose the Plot menu item.
  • On a Macintosh system, you can simply use the little box in the corner of the preview window to close it. The text window for the menu will then be active.
  • In PC graphics press on the Enter key. The screen with the preview should disappear and the settings menu reappear.
  • With a Tektronix preview, you may need to change your screen from a Tektronix-compatible mode to see the menu again.

Except with the Macintosh preview, the program will now ask you if the tree is now ready to be plotted. If you answer Y (for Yes) (or choose this option in the File menu of the preview window in the case of Windows) the program will usually write a plot file (with some plot options it will draw the tree on the screen). Then it will terminate.

But if you do not say that you are ready to plot the tree, it will go back to the above menu, allow you to change more options, and go through the whole process again. The easiest way to learn the meaning of the options is to try them, particularly if previewing is available. Below I will describe them one by one; you may prefer to skip reading this unless you are puzzled about one of them.

THE OPTIONS

O
This is an option that allows you to change the menu window to be an ANSI terminal or an IBM PC terminal. Generally you will not want to change this.

P
This allows you to choose the Plotting device or file format. We have discussed the possible choices in the draw programs documentation web page.

V
This allows you to change the type of preView window (or even turn off previewing. We have discussed the different possible choices in the draw programs documentation web page.

H
Whether the tree grows Horizontally or vertically. The horizontal growth will be from left to right. This option is self explanatory. The other options are designed so that when we switch this direction of growth the tree still looks the same, except for orientation and overall size. This option is toggled, that is, when it is chosen the orientation changes, going back and forth between Vertical and Horizontal. The default orientation is Horizontal.

S
The Style of the tree. There are six styles possible: Cladogram, Phenogram, Curvogram, Eurogram, Swoopogram, and Circular Tree. These are chosen by the letters C, P, V, E, S and O. These take a little explaining.

In spite of the words "cladogram" and "phenogram", there is no implication of the extent to which you consider these diagrams as being genealogies or phenetic clustering diagrams. The names refer to pictorial style, not your own intended final use for the diagram. The six styles can be described as follows (assuming a vertically growing tree):

Cladogram
nodes are connected to other nodes and to tips by straight lines going directly from one to the other. This gives a V-shaped appearance. The default settings if there are no branch lengths are designed to yield a V-shaped tree with a 90-degree angle at the base.

Phenogram
nodes are connected to other nodes and to other tips by a horizontal and then a vertical line. This gives a particularly precise idea of horizontal levels.

Curvogram
nodes are connected to other nodes and to tips by a curve which is one fourth of an ellipse, starting out horizontally and then curving upwards to become vertical. This pattern was suggested by Joan Rudd.

Eurogram
so-called because it is a version of cladogram diagram popular in Europe. Nodes are connected to other nodes and to tips by a diagonal line that goes outwards and goes at most one-third of the way up to the next node, then turns sharply straight upwards and is vertical. Unfortunately it is nearly impossible to guarantee, when branch lengths are used, that the angles of divergence of lines are the same.

Swoopogram
this option connects two nodes or a node and a tip using two curves that are actually each one-quarter of an ellipse. The first part starts out vertical and then bends over to become horizontal. The second part, which is at least two-thirds of the total, starts out horizontal and then bends up to become vertical. The effect is that two lineages split apart gradually, then more rapidly, then both turn upwards.

Circular Tree
This is a style introduced by David Swofford in PAUP*. The tree grows outward from a central point, being essentially a Phenogram style tree in polar coordinates. The tips form a 360-degree circle. The "vertical" lines run outward radially from the center, and the "horizontal" lines are arcs of circles centered on it.

You should experiment with these and decide which you want -- it depends very much on the effect you want.

B
Whether the tree has Branch lengths that are being used in the diagram. If the tree that was read in had a full set of branch lengths, it will be assumed as a default that you want to use them in the diagram, but you can specify that they are not to be used. If the tree does not have a full set of branch lengths then this will be indicated, and if you try to use branch lengths the program will refuse to allow you to do so. Note that when you change option B, the node position option A may change as well.

L
The angle of the Labels. The angle is always calculated relative to a vertical tree; whether the tree is horizontal or vertical, if the labels are at an angle of 90 degrees they run parallel to direction of tree growth. The default value is 90 degrees. The option allows you to choose any angle from 0 to 90 degrees.

R
How the branch lengths will be translated into distances on the output device. Note that when branch lengths have not been provided, there are implicit branch lengths specified by the type of tree being drawn. This option will toggle back and forth between automatic adjustment of branch lengths so that the diagram will just fit into the margins, and you specifying how many centimeters there will be per unit branch length. This is included so that you can plot different trees to a common scale, showing which ones have longer or shorter branches than others. Note that if you choose too large a value for centimeters per unit branch length, the tree will be so big it will overrun the plotting area and may cause failure of the diagram to display properly. Too small a value will cause the tree to be a nearly invisible dot.

D
The ratio between the Depth and the breadth of the tree. It is initially set near 0.5, to approximate a V-shaped tree, but you may want to try a larger value to get a longer and narrower tree. Depth and breadth are described as if the tree grew vertically, so that depth is always measured from the root to the tips (not including the length of the labels).

T
The length of the sTem of the tree as a fraction of the depth of the tree. You may want to either lengthen the stem or remove it entirely by giving a value of zero.

C
The Character height, measured as a fraction of the tip spacing. If the labels are rotated to a shallow angle, the character height will be automatically adjusted in hopes of avoiding collision of labels at different tips. This option allows you to change the size of the labels yourself. On output devices where line thicknesses can be varied, the thickness of the tree lines will automatically be adjusted to be proportional to the character height, which is an additional reason you may want to change character height.

A
Controls the positions of the Ancestral (interior) nodes. This can greatly affect the appearance of the tree. The vertical positions (these descriptions assume a a tree growing vertically) are not under your control except insofar as you specify the use or non-use of branch lengths. If you choose to change this option you will see the menu:

Should interior node positions:
 be Intermediate between their immediate descendants,
    Weighted average of tip positions
    Centered among their ultimate descendants
    iNnermost of immediate descendants
 or so that tree is V-shaped
 (type I, W, C, N or V):

The five methods (Intermediate, Weighted, Centered, Innermost, and V-shaped) are different horizontal positionings of the interior nodes. It will be helpful to you to try these out and see which you like best. Intermediate places the node halfway between its immediate descendants (horizontally), Weighted places it closer to that descendant who is closer vertically as well, and Centered centers the node below the horizontal positions of the tips that are descended from that node. You may want to choose that option that prevents lines from crossing each other.

V-shaped is another option, one designed, if there are no branch lengths being used, to yield a v-shaped tree of regular appearance. With branch lengths it will not necessarily do so. "Innermost" is the most unusual option: it chooses a center for the tree, and always places interior nodes below the innermost of their immediate descendants. This leads to a tree that has vertical lines in the center, like a tree with a trunk.

If the tree you are plotting has a full set of lengths, then when it is read in the node position option is automatically set to "intermediate", which is the setting with the least likelihood of lines in the tree crossing. If it does not have lengths the option is set to "V-shaped". If you change option B which tells the program whether to try to use the branch lengths, then the node position option will automatically be reset to the appropriate one of these defaults. This may be confusing if you do not realise that it is happening.

F
Allows you to select the name of the Font that you will use for the species names. This is allowed for some of the plotter drivers (this menu item does not appear for the others). You can select the name of any font that is available for your plotter, for example "Courier-Bold" or "Helvetica". The label will then be printed using that font rather than being drawn line-by-line as it is in the default Hershey font. In the preview of the tree, the Hershey font is always used (which means that it may look different from the final font). The size of the characters in the species names is scaled according to the label heights you have selected in the menu, whether plotter fonts or the Hershey font are used. Note that for some plotter drivers (particular Xfig and PICT) fonts can be used only if the species labels are horizontal or vertical (at angles of 0 degrees or 90 degrees).

M
The horizontal and vertical Margins in centimeters. You can enter new margins (you must enter new values for both horizontal and vertical margins, though these need not be different from the old values). For the moment I do not allow you to specify left and right margins separately, or top and bottom margins separately. In a future release I hope to do so.

#
The number of pages per tree. Defaults to one, but if you need a physically large tree you may want to choose a larger number. For example, to make a big tree for a poster, choose a larger number of pages horizontally and vertically (the program will ask you for these numbers), get out your scissors and paste or tape, and go to work.

I recommend that you try all of these options (particularly if you can preview the trees). It is of particular use to try combinations of the style of tree (option S) with the different methods of placing interior nodes (option A). You will find that a wide variety of effects can be achieved.

I would appreciate suggestions for improvements in DRAWGRAM, but please be aware that the source code is already very large and I may not be able to implement all suggestions. PHYLIPNEW-3.69.650/doc/phylip.gif0000664000175000017500000000163607712247475013054 00000000000000GIF87a€€ñÿÿQQûÿÿÿ,€€þœ©Ëí£œ´Ú+Þû† G≦Î&òjí‹1Í÷ î|Äâp ⣓¦ÉE…ͧSH Ú³z OJ—é¨äˆ×™nÚüžÙtÆ´ŒÈë|Å=¯$—5HXÒòèUÈÈxˆ˜¤è%ØXùˆ'©¹©v‰É)èù‰•9šÚ9g ©ú:ÔŠùš+ëJ«¹w{”«(È›BöKÊ"<Œú+(€¬òUì"à<ÚÊ\ͤª½ÝÓ-*× N$lnT9X¾nDÙ&?HMŸ¯¿ÏßïÏ6ˆBÀ1Y(Ø«3°!‚ %¤“0BÄ'<XQ! þ#\\3ñAÈ!|3rDI‘+œ “’ÁK’$Þyli%æ‚UlºÌ‚Ï!Î':Øói4 Òò–æÚàèÏN§öDU%U™E¹ÖÄÚ°‚;öXÜ 1+Ív1È š eWsµ²-eQž]5 êrQËò{mð{0EK ÊUü7lcG Æ›X2S (¯5'b©'<ÛA{ÙìaÈH‹0íGíÌÌW'ðäûšu_¥&E£Žpô)ÍƬ›6¸ƒ“¯6nõXËÅ¿–†.‘÷Yë¨o'îVwpÊò;*ä²U{·k˜{nðÓÇNn³óèÂþ è±ßÜ<˜”À\ wR`x…Ì'_/Ò§à~d¥§ßƒª‰0ÛpmÉ`Þ‚ÖH¸vÞ• ƒZP"тڙ(âu'ªâi5Ƈ[1z"77Z4ãã!ác2JÇ!…‹mèƒw)°ÅÙ1)Ãy3( $•W¾˜Œ‘"©"–6j™dƒrydŽg’¹%šzYž”ÀYÙ™œîX&žbÙy”O˜×%›ºIĘ]`a¢„a^~)š¨£E.º'¤‘ÒÑ¡¤ÇYúŒh:&§öñé}¢VªŽ…¤JשøµR*¢®ÂêS´Ê g?±V§æ?›ÖÆ ¾Ææá£ý Kz,€KöŠlj`þÚl°Jå'æ>¨VU­µ³m¨¢vkÁ¬à†{길Bj®†¦‹^¢ì¶b®ïJ{ì¼íÊkï²õæK.¾ü¦öoþœ,³÷+ìÁÆr«ð” 7Œã¾<ñ¹W1ÇøðÆ¡ q¶‹ü/ÉüšìC;PHYLIPNEW-3.69.650/doc/dolpenny.html0000664000175000017500000006363307712247475013603 00000000000000 dolpenny

version 3.6

DOLPENNY - Branch and bound
to find all most parsimonious trees
for Dollo, polymorphism parsimony criteria

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DOLPENNY is a program that will find all of the most parsimonious trees implied by your data when the Dollo or polymorphism parsimony criteria are employed. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion).

There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW.

The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times).

The Algorithm

The search strategy used by DOLPENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of losses. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated.

Actually the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. Its number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups):

     Make tree of first two species     (A,B)
          Add C in first place     ((A,B),C)
               Add D in first place     (((A,D),B),C)
               Add D in second place     ((A,(B,D)),C)
               Add D in third place     (((A,B),D),C)
               Add D in fourth place     ((A,B),(C,D))
               Add D in fifth place     (((A,B),C),D)
          Add C in second place: ((A,C),B)
               Add D in first place     (((A,D),C),B)
               Add D in second place     ((A,(C,D)),B)
               Add D in third place     (((A,C),D),B)
               Add D in fourth place     ((A,C),(B,D))
               Add D in fifth place     (((A,C),B),D)
          Add C in third place     (A,(B,C))
               Add D in first place     ((A,D),(B,C))
               Add D in second place     (A,((B,D),C))
               Add D in third place     (A,(B,(C,D)))
               Add D in fourth place     (A,((B,C),D))
               Add D in fifth place     ((A,(B,C)),D)

Among these fifteen trees you will find all of the four-species rooted bifurcating trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of losses (or retentions of polymorphism) is evaluated.

The point of this is that if a previously-found tree such as ((A,B),(C,D)) required fewer losses, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther.

The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time.

The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible.

The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer losses than these, it clears out the list and restarts the list with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems.

Controlling Run Times

Among the quantities available to be set at the beginning of a run of DOLPENNY, two (howoften and howmany) are of particular importance. As DOLPENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 300, the default settings. Every time 100 trees have been examined, DOLPENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far.

When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften X howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to pay for (or wait for). In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften and nothing will be printed out except some headings and then the final trees.

The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate.

A constant that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if "maxtrees" is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If "maxtrees" is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that "maxtrees" be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, "maxtrees" is set to 100 in the distribution copy.

Methods and Options

The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in DOLLOP, and thus the remainder of this document will be nearly identical to the DOLLOP document. The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary.

The assumptions of this method are in effect:

  1. We know which state is the ancestral one (state 0).
  2. The characters are evolving independently.
  3. Different lineages evolve independently.
  4. The probability of a forward change (0-->1) is small over the evolutionary times involved.
  5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change.
  6. Retention of polymorphism for both states (0 and 1) is highly improbable.
  7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment.

That these are the assumptions is established in several of my papers (1973a, 1978b, 1979, 1981b, 1983). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred.

The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained.

The assumptions of the polymorphism parsimony method are in effect:

  1. The ancestral state (state 0) is known in each character.
  2. The characters are evolving independently of each other.
  3. Different lineages are evolving independently.
  4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group.
  5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change.
  6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism.
  7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

The input format is the standard one, with "?", "P", "B" states allowed. Most of the options are selected using a menu:


Penny algorithm for Dollo or polymorphism parsimony, version 3.6a3
 branch-and-bound to find all most parsimonious trees

Settings for this run:
  P                     Parsimony method?  Dollo
  H        How many groups of  100 trees:  1000
  F        How often to report, in trees:  100
  S           Branch and bound is simple?  Yes
  T              Use Threshold parsimony?  No, use ordinary parsimony
  A                 Use ancestral states?  No
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4     Print out steps in each character  No
  5     Print states at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The P option toggles between the Polymorphism parsimony method and the default Dollo parsimony method.

The options T, A, and M are the usual Threshold, Ancestral States, and Multiple Data Sets options. They are described in the Main documentation file and in the Discrete Characters Programs documentation file.

Options F and H reset the variables howoften (F) and howmany (H). The user is prompted for the new values. By setting these larger the program will report its progress less often (howoften) and will run longer (howmany times howoften). These values default to 100 and 1000 which guarantees a search of 100,000 trees, but these can be changed. Note that option F in this program is not the Factors option available in some of the other programs in this section of the package.

The use of the A option allows implementation of the unordered Dollo parsimony and unordered polymorphism parsimony methods which I have described elsewhere (1984b). When the A option is used the ancestor is not to be counted as one of the species. The O (outgroup) option is not available since the tree produced is already rooted.

Setting T at or below 1.0 but above 0 causes the criterion to become compatibility rather than polymorphism parsimony, although there is no advantage to using this program instead of PENNY to do a compatibility method. Setting the threshold value higher brings about an intermediate between the Dollo or polymorphism parsimony methods and the compatibility method, so that there is some rationale for doing that.

Using a threshold value of 1.0 or lower, but above 0, one can obtain a rooted (or, if the A option is used with ancestral states of "?", unrooted) compatibility criterion, but there is no particular advantage to using this program for that instead of MIX. Higher threshold values are of course meaningful and provide intermediates between Dollo and compatibility methods.

The S (Simple) option alters a step in DOLPENNY which reconsiders the order in which species are added to the tree. Normally the decision as to what species to add to the tree next is made as the first tree is being constructucted; that ordering of species is not altered subsequently. The R option causes it to be continually reconsidered. This will probably result in a substantial increase in run time, but on some data sets of intermediate messiness it may help. It is included in case it might prove of use on some data sets.

The Factors option is not available in this program, as it would have no effect on the result even if that information were provided in the input file.

The output format is also standard. It includes a rooted tree and, if the user selects option 4, a table of the numbers of reversions or retentions of polymorphism necessary in each character. If any of the ancestral states has been specified to be unknown, a table of reconstructed ancestral states is also provided. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the tabulated counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1.

A table is available to be printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the best tree. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use DOLMOVE to display the tree and examine its interior states, as the algorithm in DOLMOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in DOLPENNY gives up more easily on displaying these states.

At the beginning of the program are a series of constants, which can be changed to help adapt the program to different computer systems. Two are the initial values of howmany and howoften, constants "often" and "many". Constant "maxtrees" is the maximum number of tied trees that will be stored.


TEST DATA SET

    7    6
Alpha1    110110
Alpha2    110110
Beta1     110000
Beta2     110000
Gamma1    100110
Delta     001001
Epsilon   001110


TEST SET OUTPUT (with all numerical options turned on)


Penny algorithm for Dollo or polymorphism parsimony, version 3.6a3
 branch-and-bound to find all most parsimonious trees

 7 species,   6 characters
Dollo parsimony method


Name         Characters
----         ----------

Alpha1       11011 0
Alpha2       11011 0
Beta1        11000 0
Beta2        11000 0
Gamma1       10011 0
Delta        00100 1
Epsilon      00111 0



requires a total of              3.000

    3 trees in all found




  +-----------------Delta     
  !  
--2  +--------------Epsilon   
  !  !  
  +--3  +-----------Gamma1    
     !  !  
     +--6  +--------Alpha2    
        !  !  
        +--1     +--Beta2     
           !  +--5  
           +--4  +--Beta1     
              !  
              +-----Alpha1    


 reversions in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       0   0   1   1   1   0            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

root      2         yes    ..1.. .
  2    Delta        yes    ..... 1
  2       3         yes    ...11 .
  3    Epsilon      no     ..... .
  3       6         yes    1.0.. .
  6    Gamma1       no     ..... .
  6       1         yes    .1... .
  1    Alpha2       no     ..... .
  1       4         no     ..... .
  4       5         yes    ...00 .
  5    Beta2        no     ..... .
  5    Beta1        no     ..... .
  4    Alpha1       no     ..... .





  +-----------------Delta     
  !  
--2  +--------------Epsilon   
  !  !  
  +--3  +-----------Gamma1    
     !  !  
     +--6        +--Beta2     
        !  +-----5  
        !  !     +--Beta1     
        +--4  
           !     +--Alpha2    
           +-----1  
                 +--Alpha1    


 reversions in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       0   0   1   1   1   0            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

root      2         yes    ..1.. .
  2    Delta        yes    ..... 1
  2       3         yes    ...11 .
  3    Epsilon      no     ..... .
  3       6         yes    1.0.. .
  6    Gamma1       no     ..... .
  6       4         yes    .1... .
  4       5         yes    ...00 .
  5    Beta2        no     ..... .
  5    Beta1        no     ..... .
  4       1         no     ..... .
  1    Alpha2       no     ..... .
  1    Alpha1       no     ..... .





  +-----------------Delta     
  !  
--2  +--------------Epsilon   
  !  !  
  +--3  +-----------Gamma1    
     !  !  
     !  !        +--Beta2     
     +--6     +--5  
        !  +--4  +--Beta1     
        !  !  !  
        +--1  +-----Alpha2    
           !  
           +--------Alpha1    


 reversions in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       0   0   1   1   1   0            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

root      2         yes    ..1.. .
  2    Delta        yes    ..... 1
  2       3         yes    ...11 .
  3    Epsilon      no     ..... .
  3       6         yes    1.0.. .
  6    Gamma1       no     ..... .
  6       1         yes    .1... .
  1       4         no     ..... .
  4       5         yes    ...00 .
  5    Beta2        no     ..... .
  5    Beta1        no     ..... .
  4    Alpha2       no     ..... .
  1    Alpha1       no     ..... .


PHYLIPNEW-3.69.650/doc/dnaml.html0000664000175000017500000011053107712247475013034 00000000000000 dnaml

version 3.6

DnaML -- DNA Maximum Likelihood program

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the maximum likelihood method for DNA sequences. The present version is faster than earlier versions of DNAML. Details of the algorithm are published in the paper by Felsenstein and Churchill (1996). The model of base substitution allows the expected frequencies of the four bases to be unequal, allows the expected frequencies of transitions and transversions to be unequal, and has several ways of allowing different rates of evolution at different sites.

The assumptions of the present model are:

  1. Each site in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. Each site undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  4. All relevant sites are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  5. A substitution consists of one of two sorts of events:
    (a)
    The first kind of event consists of the replacement of the existing base by a base drawn from a pool of purines or a pool of pyrimidines (depending on whether the base being replaced was a purine or a pyrimidine). It can lead either to no change or to a transition.
    (b)
    The second kind of event consists of the replacement of the existing base by a base drawn at random from a pool of bases at known frequencies, independently of the identity of the base which is being replaced. This could lead either to a no change, to a transition or to a transversion.

    The ratio of the two purines in the purine replacement pool is the same as their ratio in the overall pool, and similarly for the pyrimidines.

    The ratios of transitions to transversions can be set by the user. The substitution process can be diagrammed as follows: Suppose that you specified A, C, G, and T base frequencies of 0.24, 0.28, 0.27, and 0.21.

    • First kind of event:

      1. Determine whether the existing base is a purine or a pyrimidine.
      2. Draw from the proper pool:

              Purine pool:                Pyrimidine pool:
        
             |               |            |               |
             |   0.4706 A    |            |   0.5714 C    |
             |   0.5294 G    |            |   0.4286 T    |
             | (ratio is     |            | (ratio is     |
             |  0.24 : 0.27) |            |  0.28 : 0.21) |
             |_______________|            |_______________|
        

    • Second kind of event:

      Draw from the overall pool:

      
                    |                  |
                    |      0.24 A      |
                    |      0.28 C      |
                    |      0.27 G      |
                    |      0.21 T      |
                    |__________________|
      

    Note that if the existing base is, say, an A, the first kind of event has a 0.4706 probability of "replacing" it by another A. The second kind of event has a 0.24 chance of replacing it by another A. This rather disconcerting model is used because it has nice mathematical properties that make likelihood calculations far easier. A closely similar, but not precisely identical model having different rates of transitions and transversions has been used by Hasegawa et. al. (1985b). The transition probability formulas for the current model were given (with my permission) by Kishino and Hasegawa (1989). Another explanation is available in the paper by Felsenstein and Churchill (1996).

Note the assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those sites that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different sites. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of sites all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant sites. The program computes the the likelihood by summing it over all possible assignments of rates to sites, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a site having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive sites with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all sites to rate 2.4, or that fail to have consecutive sites that have the same rate.

The Hidden Markov Model framework for rate variation among sites was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant sites.

This feature effectively removes the artificial assumption that all sites have the same rate, and also means that we need not know in advance the identities of the sites that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each site (for example, we might want first, second, and third codon positions in a protein coding sequence to be three different categories. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of sites in the different categories. For example, we might specify that first, second, and third positions evolve at relative rates of 1.0, 0.8, and 2.7.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a site is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

INPUT FORMAT AND OPTIONS

Subject to these assumptions, the program is a correct maximum likelihood method. The input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:

Nucleic acid sequence Maximum Likelihood method, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  T        Transition/transversion ratio:  2.0000
  F       Use empirical base frequencies?  Yes
  C                One category of sites?  Yes
  R           Rate variation among sites?  constant rate
  W                       Sites weighted?  No
  S        Speedier but rougher analysis?  Yes
  G                Global rearrangements?  No
  J   Randomize input order of sequences?  No. Use input order
  O                        Outgroup root?  No, use as outgroup species  1
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes
  5   Reconstruct hypothetical sequences?  No

  Y to accept these or type the letter for one to change

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options U, W, J, O, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The T option in this program does not stand for Threshold, but instead is the Transition/transversion option. The user is prompted for a real number greater than 0.0, as the expected ratio of transitions to transversions. Note that this is not the ratio of the first to the second kinds of events, but the resulting expected ratio of transitions to transversions. The exact relationship between these two quantities depends on the frequencies in the base pools. The default value of the T parameter if you do not use the T option is 2.0.

The F (Frequencies) option is one which may save users much time. If you want to use the empirical frequencies of the bases, observed in the input sequences, as the base frequencies, you simply use the default setting of the F option. These empirical frequencies are not really the maximum likelihood estimates of the base frequencies, but they will often be close to those values (what they are is maximum likelihood estimates under a "star" or "explosion" phylogeny). If you change the setting of the F option you will be prompted for the frequencies of the four bases. These must add to 1 and are to be typed on one line separated by blanks, not commas.

The R (Hidden Markov Model rates) option allows the user to approximate a Gamma distribution of rates among sites, or a Gamma distribution plus a class of invariant sites, or to specify how many categories of substitution rates there will be in a Hidden Markov Model of rate variation, and what are the rates and probabilities for each. By repeatedly selecting the R option one toggles among no rate variation, the Gamma, Gamma+I, and general HMM possibilities.

If you choose Gamma or Gamma+I the program will ask how many rate categories you want. If you have chosen Gamma+I, keep in mind that one rate category will be set aside for the invariant class and only the remaining ones used to approximate the Gamma distribution. For the approximation we do not use the quantile method of Yang (1995) but instead use a quadrature method using generalized Laguerre polynomials. This should give a good approximation to the Gamma distribution with as few as 5 or 6 categories.

In the Gamma and Gamma+I cases, the user will be asked to supply the coefficient of variation of the rate of substitution among sites. This is different from the parameters used by Nei and Jin (1990) but related to them: their parameter a is also known as "alpha", the shape parameter of the Gamma distribution. It is related to the coefficient of variation by

     CV = 1 / a1/2

or

     a = 1 / (CV)2

(their parameter b is absorbed here by the requirement that time is scaled so that the mean rate of evolution is 1 per unit time, which means that a = b). As we consider cases in which the rates are less variable we should set a larger and larger, as CV gets smaller and smaller.

If the user instead chooses the general Hidden Markov Model option, they are first asked how many HMM rate categories there will be (for the moment there is an upper limit of 9, which should not be restrictive). Then the program asks for the rates for each category. These rates are only meaningful relative to each other, so that rates 1.0, 2.0, and 2.4 have the exact same effect as rates 2.0, 4.0, and 4.8. Note that an HMM rate category can have rate of change 0, so that this allows us to take into account that there may be a category of sites that are invariant. Note that the run time of the program will be proportional to the number of HMM rate categories: twice as many categories means twice as long a run. Finally the program will ask for the probabilities of a random site falling into each of these regional rate categories. These probabilities must be nonnegative and sum to 1. Default for the program is one category, with rate 1.0 and probability 1.0 (actually the rate does not matter in that case).

If more than one HMM rate category is specified, then another option, A, becomes visible in the menu. This allows us to specify that we want to assume that sites that have the same HMM rate category are expected to be clustered so that there is autocorrelation of rates. The program asks for the value of the average patch length. This is an expected length of patches that have the same rate. If it is 1, the rates of successive sites will be independent. If it is, say, 10.25, then the chance of change to a new rate will be 1/10.25 after every site. However the "new rate" is randomly drawn from the mix of rates, and hence could even be the same. So the actual observed length of patches with the same rate will be a bit larger than 10.25. Note below that if you choose multiple patches, there will be an estimate in the output file as to which combination of rate categories contributed most to the likelihood.

Note that the autocorrelation scheme we use is somewhat different from Yang's (1995) autocorrelated Gamma distribution. I am unsure whether this difference is of any importance -- our scheme is chosen for the ease with which it can be implemented.

The C option allows user-defined rate categories. The user is prompted for the number of user-defined rates, and for the rates themselves, which cannot be negative but can be zero. These numbers, which must be nonnegative (some could be 0), are defined relative to each other, so that if rates for three categories are set to 1 : 3 : 2.5 this would have the same meaning as setting them to 2 : 6 : 5. The assignment of rates to sites is then made by reading a file whose default name is "categories". It should contain a string of digits 1 through 9. A new line or a blank can occur after any character in this string. Thus the categories file might look like this:

122231111122411155
1155333333444

With the current options R, A, and C the program has gained greatly in its ability to infer different rates at different sites and estimate phylogenies under a more realistic model. Note that Likelihood Ratio Tests can be used to test whether one combination of rates is significantly better than another, provided one rate scheme represents a restriction of another with fewer parameters. The number of parameters needed for rate variation is the number of regional rate categories, plus the number of user-defined rate categories less 2, plus one if the regional rate categories have a nonzero autocorrelation.

The G (global search) option causes, after the last species is added to the tree, each possible group to be removed and re-added. This improves the result, since the position of every species is reconsidered. It approximately triples the run-time of the program.

The User tree (option U) is read from a file whose default name is intree. The trees can be multifurcating.

If the U (user tree) option is chosen another option appears in the menu, the L option. If it is selected, it signals the program that it should take any branch lengths that are in the user tree and simply evaluate the likelihood of that tree, without further altering those branch lengths. This means that if some branches have lengths and others do not, the program will estimate the lengths of those that do not have lengths given in the user tree. Note that the program RETREE can be used to add and remove lengths from a tree.

The U option can read a multifurcating tree. This allows us to test the hypothesis that a certain branch has zero length (we can also do this by using RETREE to set the length of that branch to 0.0 when it is present in the tree). By doing a series of runs with different specified lengths for a branch we can plot a likelihood curve for its branch length while allowing all other branches to adjust their lengths to it. If all branches have lengths specified, none of them will be iterated. This is useful to allow a tree produced by another method to have its likelihood evaluated. The L option has no effect and does not appear in the menu if the U option is not used.

The W (Weights) option is invoked in the usual way, with only weights 0 and 1 allowed. It selects a set of sites to be analyzed, ignoring the others. The sites selected are those with weight 1. If the W option is not invoked, all sites are analyzed. The Weights (W) option takes the weights from a file whose default name is "weights". The weights follow the format described in the main documentation file.

The M (multiple data sets) option will ask you whether you want to use multiple sets of weights (from the weights file) or multiple data sets from the input file. The ability to use a single data set with multiple weights means that much less disk space will be used for this input data. The bootstrapping and jackknifing tool Seqboot has the ability to create a weights file with multiple weights. Note also that when we use multiple weights for bootstrapping we can also then maintain different rate categories for different sites in a meaningful way. You should not use the multiple data sets option without using multiple weights, you should not at the same time use the user-defined rate categories option (option C).

The algorithm used for searching among trees is faster than it was in version 3.5, thanks to using a technique invented by David Swofford and J. S. Rogers. This involves not iterating most branch lengths on most trees when searching among tree topologies, This is of necessity a "quick-and-dirty" search but it saves much time. There is a menu option (option S) which can turn off this search and revert to the earlier search method which iterated branch lengths in all topologies. This will be substantially slower but will also be a bit more likely to find the tree topology of highest likelihood.

OUTPUT FORMAT

The output starts by giving the number of species, the number of sites, and the base frequencies for A, C, G, and T that have been specified. It then prints out the transition/transversion ratio that was specified or used by default. It also uses the base frequencies to compute the actual transition/transversion ratio implied by the parameter.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of sites is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. Note that the trees printed out have a trifurcation at the base. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen.

A table is printed showing the length of each tree segment (in units of expected nucleotide substitutions per site), as well as (very) rough confidence limits on their lengths. If a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow.

In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant. YOU ARE WARNED AGAINST TAKING IT TOO SERIOUSLY. If you want to get a better likelihood curve for a branch length you can do multiple runs with different prespecified lengths for that branch, as discussed above in the discussion of the L option.

One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that).

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the expected transition/transversion ratio to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring sites (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length. Note that versions 2.7 and earlier of this program printed out the branch lengths in terms of expected probability of change, so that they were scaled differently.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what site categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each site which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). In that table, if a site has a base which accounts for more than 95% of the likelihood, it is printed in capital letters (A rather than a). If the best nucleotide accounts for less than 50% of the likelihood, the program prints out an ambiguity code (such as M for "A or C") for the set of nucleotides which, taken together, account for more half of the likelihood. The ambiguity codes are listed in the sequence programs documentation file. One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed nucleotides are based on only the single assignment of rates to sites which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

PROGRAM CONSTANTS

The constants defined at the beginning of the program include "maxtrees", the maximum number of user trees that can be processed. It is small (100) at present to save some further memory but the cost of increasing it is not very great. Other constants include "maxcategories", the maximum number of site categories, "namelength", the length of species names in characters, and three others, "smoothings", "iterations", and "epsilon", that help "tune" the algorithm and define the compromise between execution speed and the quality of the branch lengths found by iteratively maximizing the likelihood. Reducing iterations and smoothings, and increasing epsilon, will result in faster execution but a worse result. These values will not usually have to be changed.

The program spends most of its time doing real arithmetic. The algorithm, with separate and independent computations occurring for each pattern, lends itself readily to parallel processing.

PAST AND FUTURE OF THE PROGRAM

This program, which in version 2.6 replaced the old version of DNAML, is not derived directly from it but instead was developed by modifying CONTML, with which it shares many of its data structures and much of its strategy. It was speeded up by two major developments, the use of aliasing of nucleotide sites (version 3.1) and pretabulation of some exponentials (added by Akiko Fuseki in version 3.4). In version 3.5 the Hidden Markov Model code was added and the method of iterating branch lengths was changed from an EM algorithm to direct search. The Hidden Markov Model code slows things down, especially if there is autocorrelation between sites, so this version is slower than version 3.4. Nevertheless we hope that the sacrifice is worth it.

One change that is needed in the future is to put in some way of allowing for base composition of nucleotide sequences in different parts of the phylogeny.


TEST DATA SET

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC


CONTENTS OF OUTPUT FILE (with all numerical options on)

(It was run with HMM rates having gamma-distributed rates approximated by 5 rate categories, with coefficient of variation of rates 1.0, and with patch length parameter = 1.5. Two user-defined rate categories were used, one for the first 6 sites, the other for the last 7, with rates 1.0 : 2.0. Weights were used, with sites 1 and 13 given weight 0, and all others weight 1.)


Nucleic acid sequence Maximum Likelihood method, version 3.6a3

 5 species,  13  sites

    Site categories are:

             1111112222 222


    Sites are weighted as follows:

             0111111111 111


Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         AAGGTCGCCA AAC
Gamma        CATTTCGTCA CAA
Delta        GGTATTTCGG CCT
Epsilon      GGGATCTCGG CCC



Empirical Base Frequencies:

   A       0.23333
   C       0.30000
   G       0.23333
  T(U)     0.23333

Transition/transversion ratio =   2.000000


Discrete approximation to gamma distributed rates
 Coefficient of variation of rates = 1.000000  (alpha = 1.000000)

State in HMM    Rate of change    Probability

        1           0.264            0.522
        2           1.413            0.399
        3           3.596            0.076
        4           7.086            0.0036
        5          12.641            0.000023

Expected length of a patch of sites having the same rate =    1.500


Site category   Rate of change

        1           1.000
        2           2.000



  +Beta      
  |  
  |                                                         +Epsilon   
  |  +------------------------------------------------------3  
  1--2                                                      +--Delta     
  |  |  
  |  +--------Gamma     
  |  
  +--Alpha     


remember: this is an unrooted tree!

Ln Likelihood =   -66.19167

 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------

     1          Alpha             0.49468     (     zero,     1.23032) **
     1          Beta              0.00006     (     zero,     0.62569)
     1             2              0.22531     (     zero,     2.28474)
     2             3              8.20666     (     zero,    23.52785) **
     3          Epsilon           0.00006     (     zero,     0.65419)
     3          Delta             0.44668     (     zero,     1.10233) **
     2          Gamma             1.34187     (     zero,     3.46288) **

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01

Combination of categories that contributes the most to the likelihood:

             1122121111 112

Most probable category at each site if > 0.95 probability ("." otherwise)

             .......... ...

Probable sequences at interior nodes:

  node       Reconstructed sequence (caps if > 0.95)

    1        .AGGTCGCCA AAC
 Beta        AAGGTCGCCA AAC
    2        .AggTcGcCA aAc
    3        .GGATCTCGG CCC
 Epsilon     GGGATCTCGG CCC
 Delta       GGTATTTCGG CCT
 Gamma       CATTTCGTCA CAA
 Alpha       AACGTGGCCA AAT

PHYLIPNEW-3.69.650/doc/dnapenny.html0000664000175000017500000007505407712247475013567 00000000000000 dnapenny

version 3.6

DNAPENNY - Branch and bound to find
all most parsimonious trees
for nucleic acid sequence parsimony criteria

© Copyright 1986-2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DNAPENNY is a program that will find all of the most parsimonious trees implied by your data when the nucleic acid sequence parsimony criterion is employed. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion).

There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW.

The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times).

The Algorithm

The search strategy used by DNAPENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of base substitutions. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated.

This is because the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. For each tree the number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups):

     Make tree of first two species:     (A,B)
          Add C in first place:     ((A,B),C)
               Add D in first place:     (((A,D),B),C)
               Add D in second place:     ((A,(B,D)),C)
               Add D in third place:     (((A,B),D),C)
               Add D in fourth place:     ((A,B),(C,D))
               Add D in fifth place:     (((A,B),C),D)
          Add C in second place:     ((A,C),B)
               Add D in first place:     (((A,D),C),B)
               Add D in second place:     ((A,(C,D)),B)
               Add D in third place:     (((A,C),D),B)
               Add D in fourth place:     ((A,C),(B,D))
               Add D in fifth place:     (((A,C),B),D)
          Add C in third place:     (A,(B,C))
               Add D in first place:     ((A,D),(B,C))
               Add D in second place:     (A,((B,D),C))
               Add D in third place:     (A,(B,(C,D)))
               Add D in fourth place:     (A,((B,C),D))
               Add D in fifth place:     ((A,(B,C)),D)

Among these fifteen trees you will find all of the four-species rooted trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of steps is evaluated. In addition a prediction is made as to how many steps will be added, at a minimum, as further species are added.

This is done by counting how many sites which are invariant in the data up the most recent species added will ultimately show variation when further species are added. Thus if 20 sites vary among species A, B, and C and their root, and if tree ((A,C),B) requires 24 steps, then if there are 8 more sites which will be seen to vary when species D is added, we can immediately say that no matter how we add D, the resulting tree can have no less than 24 + 8 = 32 steps. The point of all this is that if a previously-found tree such as ((A,B),(C,D)) required only 30 steps, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther.

The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time.

The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible. This process of evaluating which species to add in which order goes on the first time the search makes a tree; thereafter it uses that order.

The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer losses than these, it clears out the list and restarts it with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems.

Controlling Run Times

Among the quantities available to be set from the menu of DNAPENNY, two (howoften and howmany) are of particular importance. As DNAPENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 1000, the default settings. Every time 100 trees have been examined, DNAPENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far.

When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften times howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to pay for (or wait for). In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften, and nothing will be printed out except some headings and then the final trees.

The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate.

A constant at the beginning of the program that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if maxtrees is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If maxtrees is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that maxtrees be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, maxtrees is set to 100 in the distribution copy.

Method and Options

The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in DNAPARS, and thus the remainder of this document will be nearly identical to the DNAPARS document.

This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are exactly analogous to those of DNAPARS:

  1. Each site evolves independently.
  2. Different lineages evolve independently.
  3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a deletion N bases long is N separate events.

The input data is standard. The first line of the input file contains the number of species and the number of sites. If the Weights option is being used, there must also be a W in this first line to signal its presence. There are only two options requiring information to be present in the input file, W (Weights) and U (User tree). All options other than W (including U) are invoked using the menu.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The options are selected using an interactive menu. The menu looks like this:


Penny algorithm for DNA, version 3.6a3
 branch-and-bound to find all most parsimonious trees

Settings for this run:
  H        How many groups of  100 trees:  1000
  F        How often to report, in trees:   100
  S           Branch and bound is simple?  Yes
  O                        Outgroup root?  No, use as outgroup species  1
  T              Use Threshold parsimony?  No, use ordinary parsimony
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  I          Input sequences interleaved?  Yes
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4          Print out steps in each site  No
  5  Print sequences at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The user either types "Y" (followed, of course, by a carriage-return) if the settings shown are to be accepted, or the letter or digit corresponding to an option that is to be changed.

The options O, T, W, M, and 0 are the usual ones. They are described in the main documentation file of this package. Option I is the same as in other molecular sequence programs and is described in the documentation file for the sequence programs.

The T (threshold) option allows a continuum of methods between parsimony and compatibility. Thresholds less than or equal to 1.0 do not have any meaning and should not be used: they will result in a tree dependent only on the input order of species and not at all on the data!

The W (Weights) option allows only weights of 0 or 1.

The M (Multiple data sets) option for this program does not allow multiple sets of weights. We hope to change this soon.

The options H, F, and S are not found in the other molecular sequence programs. H (How many) allows the user to set the quantity howmany, which we have already seen controls number of times that the program will report on its progress. F allows the user to set the quantity howoften, which sets how often it will report -- after scanning how many trees.

The S (Simple) option alters a step in DNAPENNY which reconsiders the order in which species are added to the tree. Normally the decision as to what species to add to the tree next is made as the first tree is being constructed; that ordering of species is not altered subsequently. The S option causes it to be continually reconsidered. This will probably result in a substantial increase in run time, but on some data sets of intermediate messiness it may help. It is included in case it might prove of use on some data sets.

Output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each character. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a deletion may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs.


TEST DATA SET

    8    6
Alpha1    AAGAAG
Alpha2    AAGAAG
Beta1     AAGGGG
Beta2     AAGGGG
Gamma1    AGGAAG
Gamma2    AGGAAG
Delta     GGAGGA
Epsilon   GGAAAG


CONTENTS OF OUTPUT FILE (if all numerical options are on)


Penny algorithm for DNA, version 3.6a3
 branch-and-bound to find all most parsimonious trees


requires a total of              8.000

     9 trees in all found




  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !              +--3  
  !           +--7  +--Epsilon   
  1           !  !  
  !     +-----6  +-----Gamma2    
  !     !     !  
  !  +--4     +--------Gamma1    
  !  !  !  
  !  !  !           +--Beta2     
  +--2  +-----------5  
     !              +--Beta1     
     !  
     +-----------------Alpha2    

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      2         no     AAGAAG
   2      4         no     AAGAAG
   4      6         yes    AGGAAG
   6      7         no     AGGAAG
   7      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   7   Gamma2       no     AGGAAG
   6   Gamma1       no     AGGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG
   2   Alpha2       no     AAGAAG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !           +-----3  
  !           !     +--Epsilon   
  1     +-----6  
  !     !     !     +--Gamma2    
  !     !     +-----7  
  !  +--4           +--Gamma1    
  !  !  !  
  !  !  !           +--Beta2     
  +--2  +-----------5  
     !              +--Beta1     
     !  
     +-----------------Alpha2    

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      2         no     AAGAAG
   2      4         no     AAGAAG
   4      6         yes    AGGAAG
   6      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   6      7         no     AGGAAG
   7   Gamma2       no     AGGAAG
   7   Gamma1       no     AGGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG
   2   Alpha2       no     AAGAAG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !              +--3  
  !           +--6  +--Epsilon   
  1           !  !  
  !     +-----7  +-----Gamma1    
  !     !     !  
  !  +--4     +--------Gamma2    
  !  !  !  
  !  !  !           +--Beta2     
  +--2  +-----------5  
     !              +--Beta1     
     !  
     +-----------------Alpha2    

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      2         no     AAGAAG
   2      4         no     AAGAAG
   4      7         yes    AGGAAG
   7      6         no     AGGAAG
   6      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   6   Gamma1       no     AGGAAG
   7   Gamma2       no     AGGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG
   2   Alpha2       no     AAGAAG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !              +--3  
  1           +--7  +--Epsilon   
  !           !  !  
  !  +--------6  +-----Gamma2    
  !  !        !  
  !  !        +--------Gamma1    
  +--2  
     !              +--Beta2     
     !           +--5  
     +-----------4  +--Beta1     
                 !  
                 +-----Alpha2    

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      2         no     AAGAAG
   2      6         yes    AGGAAG
   6      7         no     AGGAAG
   7      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   7   Gamma2       no     AGGAAG
   6   Gamma1       no     AGGAAG
   2      4         no     AAGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG
   4   Alpha2       no     AAGAAG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !           +-----3  
  1           !     +--Epsilon   
  !  +--------6  
  !  !        !     +--Gamma2    
  !  !        +-----7  
  +--2              +--Gamma1    
     !  
     !              +--Beta2     
     !           +--5  
     +-----------4  +--Beta1     
                 !  
                 +-----Alpha2    

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      2         no     AAGAAG
   2      6         yes    AGGAAG
   6      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   6      7         no     AGGAAG
   7   Gamma2       no     AGGAAG
   7   Gamma1       no     AGGAAG
   2      4         no     AAGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG
   4   Alpha2       no     AAGAAG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !              +--3  
  1           +--6  +--Epsilon   
  !           !  !  
  !  +--------7  +-----Gamma1    
  !  !        !  
  !  !        +--------Gamma2    
  +--2  
     !              +--Beta2     
     !           +--5  
     +-----------4  +--Beta1     
                 !  
                 +-----Alpha2    

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      2         no     AAGAAG
   2      7         yes    AGGAAG
   7      6         no     AGGAAG
   6      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   6   Gamma1       no     AGGAAG
   7   Gamma2       no     AGGAAG
   2      4         no     AAGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG
   4   Alpha2       no     AAGAAG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !              +--3  
  !           +--7  +--Epsilon   
  1           !  !  
  !        +--6  +-----Gamma2    
  !        !  !  
  !  +-----2  +--------Gamma1    
  !  !     !  
  +--4     +-----------Alpha2    
     !  
     !              +--Beta2     
     +--------------5  
                    +--Beta1     

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      4         no     AAGAAG
   4      2         no     AAGAAG
   2      6         yes    AGGAAG
   6      7         no     AGGAAG
   7      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   7   Gamma2       no     AGGAAG
   6   Gamma1       no     AGGAAG
   2   Alpha2       no     AAGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !           +-----3  
  !           !     +--Epsilon   
  1        +--6  
  !        !  !     +--Gamma2    
  !  +-----2  +-----7  
  !  !     !        +--Gamma1    
  !  !     !  
  +--4     +-----------Alpha2    
     !  
     !              +--Beta2     
     +--------------5  
                    +--Beta1     

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      4         no     AAGAAG
   4      2         no     AAGAAG
   2      6         yes    AGGAAG
   6      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   6      7         no     AGGAAG
   7   Gamma2       no     AGGAAG
   7   Gamma1       no     AGGAAG
   2   Alpha2       no     AAGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG





  +--------------------Alpha1    
  !  
  !                 +--Delta     
  !              +--3  
  !           +--6  +--Epsilon   
  1           !  !  
  !        +--7  +-----Gamma1    
  !        !  !  
  !  +-----2  +--------Gamma2    
  !  !     !  
  +--4     +-----------Alpha2    
     !  
     !              +--Beta2     
     +--------------5  
                    +--Beta1     

  remember: this is an unrooted tree!


steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       1   1   1   2   2   1            

From    To     Any Steps?    State at upper node
                            
          1                AAGAAG
   1   Alpha1       no     AAGAAG
   1      4         no     AAGAAG
   4      2         no     AAGAAG
   2      7         yes    AGGAAG
   7      6         no     AGGAAG
   6      3         yes    GGAAAG
   3   Delta        yes    GGAGGA
   3   Epsilon      no     GGAAAG
   6   Gamma1       no     AGGAAG
   7   Gamma2       no     AGGAAG
   2   Alpha2       no     AAGAAG
   4      5         yes    AAGGGG
   5   Beta2        no     AAGGGG
   5   Beta1        no     AAGGGG


PHYLIPNEW-3.69.650/doc/neighbor.html0000664000175000017500000002033007712247475013533 00000000000000 neighbor

version 3.6

NEIGHBOR -- Neighbor-Joining and UPGMA methods

© Copyright 1991-2000 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program implements the Neighbor-Joining method of Nei and Saitou (1987) and the UPGMA method of clustering. The program was written by Mary Kuhner and Jon Yamato, using some code from program FITCH. An important part of the code was translated from FORTRAN code from the neighbor-joining program written by Naruya Saitou and by Li Jin, and is used with the kind permission of Drs. Saitou and Jin.

NEIGHBOR constructs a tree by successive clustering of lineages, setting branch lengths as the lineages join. The tree is not rearranged thereafter. The tree does not assume an evolutionary clock, so that it is in effect an unrooted tree. It should be somewhat similar to the tree obtained by FITCH. The program cannot evaluate a User tree, nor can it prevent branch lengths from becoming negative. However the algorithm is far faster than FITCH or KITSCH. This will make it particularly effective in their place for large studies or for bootstrap or jackknife resampling studies which require runs on multiple data sets.

The UPGMA option constructs a tree by successive (agglomerative) clustering using an average-linkage method of clustering. It has some relationship to KITSCH, in that when the tree topology turns out the same, the branch lengths with UPGMA will turn out to be the same as with the P = 0 option of KITSCH.

The options for NEIGHBOR are selected through the menu, which looks like this:


Neighbor-Joining/UPGMA method version 3.6a3

Settings for this run:
  N       Neighbor-joining or UPGMA tree?  Neighbor-joining
  O                        Outgroup root?  No, use as outgroup species  1
  L         Lower-triangular data matrix?  No
  R         Upper-triangular data matrix?  No
  S                        Subreplicates?  No
  J     Randomize input order of species?  No. Use input order
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4       Write out trees onto tree file?  Yes


  Y to accept these or type the letter for one to change

Most of the input options (L, R, S, J, and M) are as given in the Distance Matrix Programs documentation file, that file, and their input format is the same as given there. The O (Outgroup) option is described in the main documentation file of this package. It is not available when the UPGMA option is selected. The Jumble option (J) does not allow multiple jumbles (as most of the other programs that have it do), as there is no objective way of choosing which of the multiple results is best, there being no explicit criterion for optimality of the tree.

Option N chooses between the Neighbor-Joining and UPGMA methods. Option S is the usual Subreplication option. Here, however, it is present only to allow NEIGHBOR to read the input data: the number of replicates is actually ignored, even though it is read in. Note that this means that one cannot use it to have missing data in the input file, if NEIGHBOR is to be used.

The output consists of an tree (rooted if UPGMA, unrooted if Neighbor-Joining) and the lengths of the interior segments. The Average Percent Standard Deviation is not computed or printed out. If the tree found by Neighbor is fed into FITCH as a User Tree, it will compute this quantity if one also selects the N option of FITCH to ensure that none of the branch lengths is re-estimated.

As NEIGHBOR runs it prints out an account of the successive clustering levels, if you allow it to. This is mostly for reassurance and can be suppressed using menu option 2. In this printout of cluster levels the word "OTU" refers to a tip species, and the word "NODE" to an interior node of the resulting tree.

The constants available for modification at the beginning of the program are "namelength" which gives the length of a species name, and the usual boolean constants that initiliaze the terminal type. There is no feature saving multiply trees tied for best, partly because we do not expect exact ties except in cases where the branch lengths make the nature of the tie obvious, as when a branch is of zero length.

The major advantage of NEIGHBOR is its speed: it requires a time only proportional to the square of the number of species. It is significantly faster than version 3.5 of this program. By contrast FITCH and KITSCH require a time that rises as the fourth power of the number of species. Thus NEIGHBOR is well-suited to bootstrapping studies and to analysis of very large trees. Our simulation studies (Kuhner and Felsenstein, 1994) show that, contrary to statements in the literature by others, NEIGHBOR does not get as accurate an estimate of the phylogeny as does FITCH. However it does nearly as well, and in view of its speed this will make it a quite useful program.


TEST DATA SET

    7
Bovine      0.0000  1.6866  1.7198  1.6606  1.5243  1.6043  1.5905
Mouse       1.6866  0.0000  1.5232  1.4841  1.4465  1.4389  1.4629
Gibbon      1.7198  1.5232  0.0000  0.7115  0.5958  0.6179  0.5583
Orang       1.6606  1.4841  0.7115  0.0000  0.4631  0.5061  0.4710
Gorilla     1.5243  1.4465  0.5958  0.4631  0.0000  0.3484  0.3083
Chimp       1.6043  1.4389  0.6179  0.5061  0.3484  0.0000  0.2692
Human       1.5905  1.4629  0.5583  0.4710  0.3083  0.2692  0.0000


OUTPUT FROM TEST DATA SET (with all numerical options on)


   7 Populations

Neighbor-Joining/UPGMA method version 3.6a3


 Neighbor-joining method

 Negative branch lengths allowed


Name                       Distances
----                       ---------

Bovine        0.00000   1.68660   1.71980   1.66060   1.52430   1.60430
              1.59050
Mouse         1.68660   0.00000   1.52320   1.48410   1.44650   1.43890
              1.46290
Gibbon        1.71980   1.52320   0.00000   0.71150   0.59580   0.61790
              0.55830
Orang         1.66060   1.48410   0.71150   0.00000   0.46310   0.50610
              0.47100
Gorilla       1.52430   1.44650   0.59580   0.46310   0.00000   0.34840
              0.30830
Chimp         1.60430   1.43890   0.61790   0.50610   0.34840   0.00000
              0.26920
Human         1.59050   1.46290   0.55830   0.47100   0.30830   0.26920
              0.00000


  +---------------------------------------------Mouse     
  ! 
  !                        +---------------------Gibbon    
  1------------------------2 
  !                        !  +----------------Orang     
  !                        +--5 
  !                           ! +--------Gorilla   
  !                           +-4 
  !                             ! +--------Chimp     
  !                             +-3 
  !                               +------Human     
  ! 
  +------------------------------------------------------Bovine    


remember: this is an unrooted tree!

Between        And            Length
-------        ---            ------
   1          Mouse           0.76891
   1             2            0.42027
   2          Gibbon          0.35793
   2             5            0.04648
   5          Orang           0.28469
   5             4            0.02696
   4          Gorilla         0.15393
   4             3            0.03982
   3          Chimp           0.15167
   3          Human           0.11753
   1          Bovine          0.91769


PHYLIPNEW-3.69.650/doc/retree.html0000664000175000017500000004521407712247475013234 00000000000000 retree

version 3.6

RETREE -- Interactive Tree Rearrangement

© Copyright 1993-2002 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

RETREE is a tree editor. It reads in a tree, or allows the user to construct one, and displays this tree on the screen. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file.

The input trees are in one file (with default file name intree), the output trees are written into another (outtree). The user can reroot, flip branches, change names of species, change or remove branch lengths, and move around to look at various parts of the tree if it is too large to fit on the screen. The trees can be multifurcating at any level, although the user is warned that many PHYLIP programs still cannot handle multifurcations above the root, or even at the root.

A major use for this program will be to change rootedness of trees so that a rooted tree derived from one program can be fed in as an unrooted tree to another (you are asked about this when you give the command to write out the tree onto the tree output file). It will also be useful for specifying the length of a branch in a tree where you want a program like DNAML, DNAMLK, FITCH, or CONTML to hold that branch length constant (see the L suboption of the User Tree option in those programs. It will also be useful for changing the order of species for purely cosmetic reasons for DRAWGRAM and DRAWTREE, including using the Midpoint method of rooting the tree. It can also be used to write out a tree file in the Nexus format used by Paup and MacClade or in our XML tree file format.

This program uses graphic characters that show the tree to best advantage on some computer systems. Its graphic characters will work best on MSDOS systems or MSDOS windows in Windows, and to any system whose screen or terminals emulate ANSI standard terminals such as old Digitial VT100 terminals, Telnet programs, or VT100-compatible windows in the X windowing system. For any other screen types, (such as Macintosh windows) there is a generic option which does not make use of screen graphics characters. The program will work well in those cases, but the tree it displays will look a bit uglier.

The user interaction starts with the program presenting a menu. The menu looks like this:


Tree Rearrangement, version 3.6a3

Settings for this run:
  U          Initial tree (arbitrary, user, specify)?  User tree from tree file
  N   Format to write out trees (PHYLIP, Nexus, XML)?  PHYLIP
  0                     Graphics type (IBM PC, ANSI)?  (none)
  W       Width of terminal screen, of plotting area?  80, 80
  L                        Number of lines on screen?  24

Are these settings correct? (type Y or the letter for one to change)

The 0 (Graphics type) option is the usual one and is described in the main documentation file. The U (initial tree) option allows the user to choose whether the initial tree is to be arbitrary, interactively specified by the user, or read from a tree file. Typing U causes the program to change among the three possibilities in turn. Usually we will want to use a User Tree from a file. It requires that you have available a tree file with the tree topology of the initial tree. If you wish to set up some other particular tree you can either use the "specify" choice in the initial tree option (which is somewhat clumsy to use) or rearrange a User Tree of an arbitrary tree into the shape you want by using the rearrangement commands given below.

The L (screen Lines) option allows the user to change the height of the screen (in lines of characters) that is assumed to be available on the display. This may be particularly helpful when displaying large trees on displays that have more than 24 lines per screen, or on workstation or X-terminal screens that can emulate the ANSI terminals with more than 24 lines.

The N (output file format) option allows the user to specify that the tree files that are written by the program will be in one of three formats:

  1. The PHYLIP default file format (the Newick standard) used by the programs in this package.
  2. The Nexus format defined by David Swofford and by Wayne Maddison and David Maddison for their programs PAUP and MacClade. A tree file written in Nexus format should be directly readable by those programs (They also have options to read a regular PHYLIP tree file as well).
  3. An XML tree file format which we have defined.

The XML tree file format is fairly simple. Each tree is included in tags <PHYLOGENY> ... </PHYLOGENY>. Each branch of the tree is enclosed in a pair of tags <BRANCH> ... </BRANCH>, which enclose the branch and all its descendants. If the branch has a length, this is given by the LENGTH attribute of the BRANCH tag, so that the pair of tags looks like this:

<BRANCH LENGTH=0.09362> ... </BRANCH>

A tip of the tree is at the end of a branch (and hence enclosed in a pair of <BRANCH> ... </BRANCH> tags. Its name is enclosed by <NAME> ... </NAME> tags. Here is an XML tree:

<phylogeny>
  <branch>
    <branch length=0.87231><name>Mouse</name></branch>
    <branch length=0.49807><name>Bovine</name></branch>
    <branch length=0.39538>
      <branch length=0.25930><name>Gibbon</name></branch>
      <branch length=0.10815>
        <branch length=0.24166><name>Orang</name></branch>
        <branch length=0.04405>
          <branch length=0.12322><name>Gorilla</name></branch>
          <branch length=0.06026>
            <branch length=0.13846><name>Chimp</name></branch>
            <branch length=0.0857><name>Human</name></branch>
          </branch>
        </branch>
      </branch>
    </branch>
  </branch>
</phylogeny>

The indentation is for readability but is not part of the XML tree standard, which ignores that kind of white space.

What programs can read an XML tree? None right now, not even PHYLIP programs. But soon our lab's LAMARC package will have programs that can read an XML tree. XML is rapidly becoming the standard for representing and interchanging complex data -- it is time to have an XML tree standard. Certain extensions are obvious (to represent the bootstrap proportion for a branch, use BOOTP=0.83 in the BRANCH tag, for example).

The W (screen and window Width) option specifies the width in characters of the area which the trees will be plotted to fit into. This is by default 80 characters so that they will fit on a normal width terminal. The actual width of the display on the terminal (normally 80 characters) will be regarded as a window displaying part of the tree. Thus you could set the "plotting area" to 132 characters, and inform the program that the screen width is 80 characters. Then the program will display only part of the tree at any one time. Below we will show how to move the "window" and see other parts of the tree.

After the initial menu is displayed and the choices are made, the program then sets up an initial tree and displays it. Below it will be a one-line menu of possible commands. Here is what the tree and the menu look like (this is the tree specified by the example input tree given at the bottom of this page, as it displays when the terminal type is "none"):

                                      ,>>1:Human
                                   ,>22  
                                ,>21  `>>2:Chimp
                                !  !  
                             ,>20  `>>>>>3:Gorilla
                             !  !  
                 ,>>>>>>>>>>19  `>>>>>>>>4:Orang
                 !           !  
              ,>18           `>>>>>>>>>>>5:Gibbon
              !  !  
              !  !              ,>>>>>>>>6:Barbary Ma
              !  `>>>>>>>>>>>>>23  
              !                 !  ,>>>>>7:Crab-e. Ma
     ,>>>>>>>17                 `>24  
     !        !                    !  ,>>8:Rhesus Mac
     !        !                    `>25  
     !        !                       `>>9:Jpn Macaq
  ,>16        !  
  !  !        `>>>>>>>>>>>>>>>>>>>>>>>>>10:Squir. Mon
  !  !  
  !  !                                ,>11:Tarsier
** 7 lines below screen **

NEXT? (Options: R . U W O T F D B N H J K L C + ? X Q) (? for Help) 

The tree that was read in had no branch lengths on its branches. The absence of a branch length is indicated by drawing the branch with ">" characters (>>>>>>>). When branches have branch lengths, they are drawn with "-" characters (-------) and their lengths on the screen are approximately proportional to the branch length.

If you type "?" you will get a single screen showing a description of each of these commands in a few words. Here are slightly more detailed descriptions of the commands:

R
("Rearrange"). This command asks for the number of a node which is to be removed from the tree. It and everything to the right of it on the tree is to be removed (by breaking the branch immediately below it). (This is also everything "above" it on the tree when the tree grows upwards, but as the tree grows from left to right on the screen we use "right" rather than "above"). The command also asks whether that branch is to be inserted At a node or Before a node. The first will insert it as an additional branch coming out of an existing node (creating a more multifurcating tree), and the second will insert it so that a new internal node is created in the tree, located in the branch that precedes the node (to the left of it), with the branch that is inserted coming off from that new node. In both cases the program asks you for the number of a node at (or before) which that group is to be inserted. If an impossible number is given, the program refuses to carry out the rearrangement and asks for a new command. The rearranged tree is displayed: it will often have a different number of steps than the original. If you wish to undo a rearrangement, use the Undo command, for which see below.

.
(dot) This command simply causes the current tree to be redisplayed. It is of use when the tree has partly disappeared off of the top of the screen owing to too many responses to commands being printed out at the bottom of the screen.

=
(toggle display of branch lengths). This option is available whenever the tree has a full set of branch lengths. It toggles on and off whether the tree displayed on the screen is shown with the relative branch lengths roughly correct. (It cannot be better than roughly correct because the display is in units of length of whole character widths on the screen). It does not actually remove any branch lengths from the tree: if the tree showing on the screen seems to have no branch lengths after use of the "=" option, if it were written out at that point, it would still have a full] set of branch lengths.

U
("Undo"). This command reverses the effect of the most recent rearrangement, outgroup re-rooting, or flipping of branches. It returns to the previous tree topology. It will be of great use when rearranging the tree and when one -- it permits you to abandon the new one and return to the previous one without remembering its topology in detail. Some operations, such as the simultaneous removal of lengths from all branches, cannot be reversed.

W
("Write"). This command writes out the current tree onto a tree output file. If the file already has been written to by this run of RETREE, it will ask you whether you want to replace the contents of the file, add the tree to the end of the file, or not write out the tree to the file. It will also ask you whether you want the tree to written out as Rooted or Unrooted. If you choose Unrooted, it will write the outermost split of the tree as a three-way split with the three branches being those that issue from one of the nodes. This node will be the left (upper) interior node which is next to the root, or the other one if there is no interior node to the left (above) the root. The tree is written in the standard format used by PHYLIP (a subset of the Newick standard), in the Nexus format, or in an XML tree file format. A normal PHYLIP tree is in the proper format to serve as the User-Defined Tree for setting up the initial tree in a subsequent run of the program. However, some programs also require a line in the tree input file that gives the number of trees in the file. You may have to add this line using an editor such as vi, Emacs, Windows Notepad, or MacOS's Simpletext.

O
("Outgroup"). This asks for the number of a node which is to be the outgroup. The tree will be redisplayed with that node as the left descendant of the bottom fork. Note that it is possible to use this to make a multi-species group the outgroup (i.e., you can give the number of an interior node of the tree as the outgroup, and the program will re-root the tree properly with that on the left of the bottom fork.

M
("Midpoint root"). This reroots a tree that has a complete set of branches using the Midpoint rooting method. That rooting method finds the centroid of the tree -- the point that is equidistant from the two farthest points of the tree, and roots the tree there. This is the point in the middle of the longest path from one tip to another in the tree. This has the effect of making the two farthest tips stick out an equal distance to the right. Note that as the tree is rerooted, the scale may change on the screen so that it looks like it ahas suddenly gotted a bit longer. It will not have actually changed in total length. This option is not in the menu if the tree does not have a full set of branch lengths.

T
("Transpose"). This asks for a node number and then flips the two branches at that node, so that the left-right order of branches at that node is changed. This also does not actually change the tree topology but it does change the appearance of the tree. However, unlike the F option discussed below, the individual subtrees defined by those branches do not have the order of any branches reversed in them.

F
("Flip"). This asks for a node number and then flips the entire subtree at that node, so that the left-right order of branches in the whole subtree is changed. This does not actually change the tree topology but it does change the appearance of the tree. Note that it works differently than the F option in the programs MOVE, DNAMOVE, and DOLMOVE, which is actually like the T option mentioned above.

B
("Branch length"). This asks you for the number of a node which is at the end of a branch length, then asks you whether you want to enter a branch length for that branch, change the branch length for that branch (if there is one already) or remove the branch length from the branch.

N
("Name"). This asks you which species you want to change the name for (referring to it by the number for that branch), then gives you the option of either removing the name, typing a new name, or leaving the name as is. Be sure not to try to enter a parentheses ("(" or ")"), a colon (":"), a comma (",") or a semicolon (";") in a name, as those may be mistaken for structural information about the tree when the tree file is read by another program.

H, J, K, or L.
These are the movement commands for scrolling the "window" across a tree. H moves the "window" leftwards (though not beyond column 1, J moves it down, K up, and L right. The "window" will move 20 columns or rows at a time, and the tree will be redrawn in the new "window". Note that this amount of movement is not a full screen.

C
("Clade"). The C command instructs the program to print out only that part of the tree (the "clade") from a certain node on up. The program will prompt you for the number of this node. Remember that thereafter you are not looking at the whole tree. To go back to looking at the whole tree give the C command again and enter "0" for the node number when asked. Most users will not want to use this option unless forced to, as much can be accomplished with the window movement commands H, J, K, and L.

+
("next tree"). This causes the program to read in the next tree in the input file, if there is one. Currently the program does not detect gracefully that it has come to the end of the input tree file, and may crash with a "segmentation fault" if it does. However usually it will not lose any tree file that it has written. On Unix or Linux systems the crash may produce a useless "core dump" (a big file named "core") which you will want to delete.

?
("Help"). Prints a one-screen summary of what the commands do, a few words for each command.

X
("Exit"). Exit from program. If the current tree has not yet been saved into a file, the program will first ask you whether it should be saved.

Q
("Quit"). A synonym for X. Same as the eXit command.

The program was written by Andrew Keeffe, using some code from DNAMOVE, which he also wrote.

Below is a test tree file. We have already showed (above), what the resulting tree display looks like when the terminal type is "none". For ANSI or IBM PC screens it will look better, using the graphics characters of those screens, which we do not attempt to show here.


TEST INPUT TREE FILE

((((((((Human,Chimp),Gorilla),Orang),Gibbon),(Barbary_Ma,(Crab-e._Ma,
(Rhesus_Mac,Jpn_Macaq)))),Squir._Mon),((Tarsier,Lemur),Bovine)),Mouse);
PHYLIPNEW-3.69.650/doc/dollop.html0000664000175000017500000003275107712247475013241 00000000000000 dollop

version 3.6

DOLLOP -- Dollo and Polymorphism Parsimony Program

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program carries out the Dollo and polymorphism parsimony methods. The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary.

The assumptions of this method are in effect:

  1. We know which state is the ancestral one (state 0).
  2. The characters are evolving independently.
  3. Different lineages evolve independently.
  4. The probability of a forward change (0-->1) is small over the evolutionary times involved.
  5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change.
  6. Retention of polymorphism for both states (0 and 1) is highly improbable.
  7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment.

One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred.

The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was independently published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained.

The assumptions of the polymorphism parsimony method are in effect:

  1. The ancestral state (state 0) is known in each character.
  2. The characters are evolving independently of each other.
  3. Different lineages are evolving independently.
  4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group.
  5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change.
  6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism.
  7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

The input format is the standard one, with "?", "P", "B" states allowed. The options are selected using a menu:


Dollo and polymorphism parsimony algorithm, version 3.6a3

Settings for this run:
  U                 Search for best tree?  Yes
  P                     Parsimony method?  Dollo
  J     Randomize input order of species?  No. Use input order
  T              Use Threshold parsimony?  No, use ordinary parsimony
  A   Use ancestral states in input file?  No
  W                       Sites weighted?  No
  M           Analyze multiple data sets?  No
  0   Terminal type (IBM PC, ANSI, none)?  (none)
  1    Print out the data at start of run  No
  2  Print indications of progress of run  Yes
  3                        Print out tree  Yes
  4     Print out steps in each character  No
  5     Print states at all nodes of tree  No
  6       Write out trees onto tree file?  Yes

Are these settings correct? (type Y or the letter for one to change)

The options U, J, T, A, and M are the usual User Tree, Jumble, Ancestral States, and Multiple Data Sets options, described either in the main documentation file or in the Discrete Characters Programs documentation file. The A (Ancestral States) option allows implementation of the unordered Dollo parsimony and unordered polymorphism parsimony methods which I have described elsewhere (1984b). When the A option is used the ancestor is not to be counted as one of the species. The O (outgroup) option is not available since the tree produced is already rooted. Since the Dollo and polymorphism methods produce a rooted tree, the user-defined trees required by the U option have two-way forks at each level.

The P (Parsimony Method) option is the one that toggles between polymorphism parsimony and Dollo parsimony. The program defaults to Dollo parsimony.

The T (Threshold) option has already been described in the Discrete Characters programs documentation file. Setting T at or below 1.0 but above 0 causes the criterion to become compatibility rather than polymorphism parsimony, although there is no advantage to using this program instead of MIX to do a compatibility method. Setting the threshold value higher brings about an intermediate between the Dollo or polymorphism parsimony methods and the compatibility method, so that there is some rationale for doing that. Since the Dollo and polymorphism methods produces a rooted tree, the user-defined trees required by the U option have two-way forks at each level.

Using a threshold value of 1.0 or lower, but above 0, one can obtain a rooted (or, if the A option is used with ancestral states of "?", unrooted) compatibility criterion, but there is no particular advantage to using this program for that instead of MIX. Higher threshold values are of course meaningful and provide intermediates between Dollo and compatibility methods.

The X (Mixed parsimony methods) option is not available in this program. The Factors option is also not available in this program, as it would have no effect on the result even if that information were provided in the input file.

Output is standard: a list of equally parsimonious trees, and, if the user selects menu option 4, a table of the numbers of reversions or retentions of polymorphism necessary in each character. If any of the ancestral states has been specified to be unknown, a table of reconstructed ancestral states is also provided. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the tabulated counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1.

If the user selects menu option 5, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there may be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the best tree. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use DOLMOVE to display the tree and examine its interior states, as the algorithm in DOLMOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in DOLLOP gives up more easily on displaying these states.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one.

At the beginning of the program is the constant "maxtrees", the maximum number of trees which the program will store for output.

The algorithm is a fairly simple adaptation of the one used in the program SOKAL, which was formerly in this package and has been superseded by MIX. It requires two passes through each tree to count the numbers of reversions.


TEST DATA SET

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110


TEST SET OUTPUT (with all numerical options on)


Dollo and polymorphism parsimony algorithm, version 3.6a3

 5 species,   6  characters

Dollo parsimony method


Name         Characters
----         ----------

Alpha        11011 0
Beta         11000 0
Gamma        10011 0
Delta        00100 1
Epsilon      00111 0



One most parsimonious tree found:




  +-----------Delta     
--3  
  !  +--------Epsilon   
  +--4  
     !  +-----Gamma     
     +--2  
        !  +--Beta      
        +--1  
           +--Alpha     


requires a total of      3.000

 reversions in each character:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       0   0   1   1   1   0            

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)

root      3         yes    ..1.. .
  3    Delta        yes    ..... 1
  3       4         yes    ...11 .
  4    Epsilon      no     ..... .
  4       2         yes    1.0.. .
  2    Gamma        no     ..... .
  2       1         yes    .1... .
  1    Beta         yes    ...00 .
  1    Alpha        no     ..... .


PHYLIPNEW-3.69.650/doc/dnamove.html0000664000175000017500000003465307712247475013404 00000000000000 dnamove

version 3.6

DNAMOVE - Interactive DNA parsimony

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

DNAMOVE is an interactive DNA parsimony program, inspired by Wayne Maddison and David and Wayne Maddison's marvellous program MacClade, which is written for Macintosh computers. DNAMOVE reads in a data set which is prepared in almost the same format as one for the DNA parsimony program DNAPARS. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different sites and the way the nucleotide states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different sites are affected by changes in the tree topology.

This program uses graphic characters that show the tree to best advantage on some computer systems. Its graphic characters will work best on MSDOS systems or MSDOS windows in Windows, and to any system whose screen or terminals emulate ANSI standard terminals such as old Digital VT100 terminals, Telnet programs, or VT100-compatible windows in the X windowing system. For any other screen types, (such as Macintosh windows) there is a generic option which does not make use of screen graphics characters. The program will work well in those cases, but the tree it displays will look a bit uglier.

The input data file is set up almost identically to the data files for DNAPARS. The code for nucleotide sequences is the standard one, as described in the molecular sequence programs document. The user trees are contained in the input tree file which is used for input of the starting tree (if desired). The output tree file is used for the final tree.

The user interaction starts with the program presenting a menu. The menu looks like this:


Interactive DNA parsimony, version 3.6a3

Settings for this run:
  O                             Outgroup root?  No, use as outgroup species  1
  W                            Sites weighted?  No
  T                   Use Threshold parsimony?  No, use ordinary parsimony
  I               Input sequences interleaved?  Yes
  U   Initial tree (arbitrary, user, specify)?  Arbitrary
  0        Graphics type (IBM PC, ANSI, none)?  (none)
  S                  Width of terminal screen?  80
  L                 Number of lines on screen?  24

Are these settings correct? (type Y or the letter for one to change)

The O (Outgroup), W (Weights), T (Threshold), and 0 (Graphics type) options are the usual ones and are described in the main documentation file. The I (Interleaved) option is the usual one and is described in the main documentation file and the molecular sequences programs documentation file. The U (initial tree) option allows the user to choose whether the initial tree is to be arbitrary, interactively specified by the user, or read from a tree file. Typing U causes the program to change among the three possibilities in turn. I would recommend that for a first run, you allow the tree to be set up arbitrarily (the default), as the "specify" choice is difficult to use and the "user tree" choice requires that you have available a tree file with the tree topology of the initial tree, which must be a rooted tree. Its default name is intree. The program will ask you for its name if it looks for the input tree file and does not find one of this name. If you wish to set up some particular tree you can also do that by the rearrangement commands specified below.

The W (Weights) option allows only weights of 0 or 1.

The T (threshold) option allows a continuum of methods between parsimony and compatibility. Thresholds less than or equal to 1.0 do not have any meaning and should not be used: they will result in a tree dependent only on the input order of species and not at all on the data!

The L (screen Lines) option allows the user to change the height of the screen (in lines of characters) that is assumed to be available on the display. This may be particularly helpful when displaying large trees on terminals that have more than 24 lines per screen, or on workstation or X-terminal screens that can emulate the ANSI terminals with more than 24 lines.

After the initial menu is displayed and the choices are made, the program then sets up an initial tree and displays it. Below it will be a one-line menu of possible commands, which looks like this:

NEXT? (Options: R # + - S . T U W O F C H ? X Q) (H or ? for Help) 

If you type H or ? you will get a single screen showing a description of each of these commands in a few words. Here are slightly more detailed descriptions:

R ("Rearrange")
This command asks for the number of a node which is to be removed from the tree. It and everything to the right of it on the tree is to be removed (by breaking the branch immediately below it). The command also asks for the number of a node below which that group is to be inserted. If an impossible number is given, the program refuses to carry out the rearrangement and asks for a new command. The rearranged tree is displayed: it will often have a different number of steps than the original. If you wish to undo a rearrangement, use the Undo command, for which see below.
#
This command, and the +, - and S commands described below, determine which site has its states displayed on the branches of the trees. The initial tree displayed by the program does not show states of sites. When # is typed, the program does not ask the user which site is to be shown but automatically shows the states of the next site that is not compatible with the tree (the next site that does not perfectly fit the current tree). The search for this site "wraps around" so that if it reaches the last site without finding one that is not compatible with the tree, the search continues at the first site; if no incompatible site is found the current site is shown again, and if no current site is being shown then the first site is shown. The display takes the form of different symbols or textures on the branches of the tree. The state of each branch is actually the state of the node above it. A key of the symbols or shadings used for states A, C, G, T (U) and ? are shown next to the tree. State ? means that more than one possible nucleotide could exist at that point on the tree, and that the user may want to consider the different possibilities, which are usually apparent by inspection.
+
This command is the same as \# except that it goes forward one site, showing the states of the next site. If no site has been shown, using + will cause the first site to be shown. Once the last site has been reached, using + again will show the first site.

-
This command is the same as + except that it goes backwards, showing the states of the previous site. If no site has been shown, using - will cause the last site to be shown. Once site number 1 has been reached, using - again will show the last site.
S ("Show").
This command is the same as + and - except that it causes the program to ask you for the number of a site. That site is the one whose states will be displayed. If you give the site number as 0, the program will go back to not showing the states of the sites.
. (dot)
This command simply causes the current tree to be redisplayed. It is of use when the tree has partly disappeared off of the top of the screen owing to too many responses to commands being printed out at the bottom of the screen.

T ("Try rearrangements").
This command asks for the name of a node. The part of the tree at and above that node is removed from the tree. The program tries to re-insert it in each possible location on the tree (this may take some time, and the program reminds you to wait). Then it prints out a summary. For each possible location the program prints out the number of the node to the right of the place of insertion and the number of steps required in each case. These are divided into those that are better then or tied with the current tree. Once this summary is printed out, the group that was removed is reinserted into its original position. It is up to you to use the R command to actually carry out any of the arrangements that have been tried.
U ("Undo").
This command reverses the effect of the most recent rearrangement, outgroup re-rooting, or flipping of branches. It returns to the previous tree topology. It will be of great use when rearranging the tree and when a rearrangement proves worse than the preceding one -- it permits you to abandon the new one and return to the previous one without remembering its topology in detail.
W ("Write").
This command writes out the current tree onto a tree output file. If the file already has been written to by this run of DNAMOVE, it will ask you whether you want to replace the contents of the file, add the tree to the end of the file, or not write out the tree to the file. The tree is written in the standard format used by PHYLIP (a subset of the Newick standard). It is in the proper format to serve as the User-Defined Tree for setting up the initial tree in a subsequent run of the program. Note that if you provided the initial tree topology in a tree file and replace its contents, that initial tree will be lost.
O ("Outgroup").
This asks for the number of a node which is to be the outgroup. The tree will be redisplayed with that node as the left descendant of the bottom fork. Note that it is possible to use this to make a multi-species group the outgroup (i.e., you can give the number of an interior node of the tree as the outgroup, and the program will re-root the tree properly with that on the left of the bottom fork.
F ("Flip").
This asks for a node number and then flips the two branches at that node, so that the left-right order of branches at that node is changed. This does not actually change the tree topology (or the number of steps on that tree) but it does change the appearance of the tree.
C ("Clade").
When the data consist of more than 12 species (or more than half the number of lines on the screen if this is not 24), it may be difficult to display the tree on one screen. In that case the tree will be squeezed down to one line per species. This is too small to see all the interior states of the tree. The C command instructs the program to print out only that part of the tree (the "clade") from a certain node on up. The program will prompt you for the number of this node. Remember that thereafter you are not looking at the whole tree. To go back to looking at the whole tree give the C command again and enter "0" for the node number when asked. Most users will not want to use this option unless forced to.
H ("Help").
Prints a one-screen summary of what the commands do, a few words for each command.
? ("huh?").
A synonym for H. Same as Help command.
X ("Exit").
Exit from program. If the current tree has not yet been saved into a file, the program will first ask you whether it should be saved.
Q ("Quit").
A synonym for X. Same as the eXit command.

ADAPTING THE PROGRAM TO YOUR COMPUTER AND TO YOUR TERMINAL

As we have seen, the initial menu of the program allows you to choose among three screen types (PCDOS, Ansi, and none). We have tried to have the default values be correct for PC, Macintosh, and Unix screens. If the setting is "none" (which is necessary on Macintosh screens), the special graphics characters will not be used to indicate nucleotide states, but only letters will be used for the four nucleotides. This is less easy to look at.

MORE ABOUT THE PARSIMONY CRITERION

This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are exactly analogous to those of MIX:

  1. Each site evolves independently.
  2. Different lineages evolve independently.
  3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change.

Below is a test data set, but we cannot show the output it generates because of the interactive nature of the program.


DATA SET

   5   13
Alpha     AACGUGGCCA AAU
Beta      AAGGUCGCCA AAC
Gamma     CAUUUCGUCA CAA
Delta     GGUAUUUCGG CCU
Epsilon   GGGAUCUCGG CCC
PHYLIPNEW-3.69.650/doc/contml.html0000664000175000017500000003233707712247475013244 00000000000000 contml

version 3.6

CONTML - Gene Frequencies and Continuous Characters Maximum Likelihood method

© Copyright 1986-2002 by the University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.

This program estimates phylogenies by the restricted maximum likelihood method based on the Brownian motion model. It is based on the model of Edwards and Cavalli-Sforza (1964; Cavalli-Sforza and Edwards, 1967). Gomberg (1966), Felsenstein (1973b, 1981c) and Thompson (1975) have done extensive further work leading to efficient algorithms. CONTML uses restricted maximum likelihood estimation (REML), which is the criterion used by Felsenstein (1973b). The actual algorithm is an iterative EM Algorithm (Dempster, Laird, and Rubin, 1977) which is guaranteed to always give increasing likelihoods. The algorithm is described in detail in a paper of mine (Felsenstein, 1981c), which you should definitely consult if you are going to use this program. Some simulation tests of it are given by Rohlf and Wooten (1988) and Kim and Burgman (1988).

The default (gene frequency) mode treats the input as gene frequencies at a series of loci, and square-root-transforms the allele frequencies (constructing the frequency of the missing allele at each locus first). This enables us to use the Brownian motion model on the resulting coordinates, in an approximation equivalent to using Cavalli-Sforza and Edwards's (1967) chord measure of genetic distance and taking that to give distance between particles undergoing pure Brownian motion. It assumes that each locus evolves independently by pure genetic drift.

The alternative continuous characters mode (menu option C) treats the input as a series of coordinates of each species in N dimensions. It assumes that we have transformed the characters to remove correlations and to standardize their variances.

The input file is as described in the continuous characters documentation file above. Options are selected using a menu:


Continuous character Maximum Likelihood method version 3.6a3

Settings for this run:
  U                       Search for best tree?  Yes
  C  Gene frequencies or continuous characters?  Gene frequencies
  A   Input file has all alleles at each locus?  No, one allele missing at each
  O                              Outgroup root?  No, use as outgroup species 1
  G                      Global rearrangements?  No
  J           Randomize input order of species?  No. Use input order
  M                 Analyze multiple data sets?  No
  0         Terminal type (IBM PC, ANSI, none)?  (none)
  1          Print out the data at start of run  No
  2        Print indications of progress of run  Yes
  3                              Print out tree  Yes
  4             Write out trees onto tree file?  Yes

  Y to accept these or type the letter for one to change

Option U is the usual User Tree option. Options C (Continuous Characters) and A (All alleles present) have been described in the Gene Frequencies and Continuous Characters Programs documentation file. The options G, J, O and M are the usual Global Rearrangements, Jumble order of species, Outgroup root, and Multiple Data Sets options.

The M (Multiple data sets) option does not allow multiple sets of weights instead of multiple data sets, as there are no weights in this program.

The G and J options have no effect if the User Tree option is selected. User trees are given with a trifurcation (three-way split) at the base. They can start from any interior node. Thus the tree:

     A
     !
     *--B
     !
     *-----C
     !
     *--D
     !
     E

can be represented by any of the following:

     (A,B,(C,(D,E)));
     ((A,B),C,(D,E));
     (((A,B),C),D,E);

(there are of course 69 other representations as well obtained from these by swapping the order of branches at an interior node).

The output has a standard appearance. The topology of the tree is given by an unrooted tree diagram. The lengths (in time or in expected amounts of variance) are given in a table below the topology, and a rough confidence interval given for each length. Negative lower bounds on length indicate that rearrangements may be acceptable.

The units of length are amounts of expected accumulated variance (not time). The log likelihood (natural log) of each tree is also given, and it is indicated how many topologies have been tried. The tree does not necessarily have all tips contemporary, and the log likelihood may be either positive or negative (this simply corresponds to whether the density function does or does not exceed 1) and a negative log likelihood does not indicate any error. The log likelihood allows various formal likelihood ratio hypothesis tests. The description of the tree includes approximate standard errors on the lengths of segments of the tree. These are calculated by considering only the curvature of the likelihood surface as the length of the segment is varied, holding all other lengths constant. As such they are most probably underestimates of the variance, and hence may give too much confidence in the given tree.

One should use caution in interpreting the likelihoods that are printed out. If the model is wrong, it will not be possible to use the likelihoods to make formal statistical statements. Thus, if gene frequencies are being analyzed, but the gene frequencies change not only by genetic drift, but also by mutation, the model is not correct. It would be as well-justified in this case to use GENDIST to compute the Nei (1972) genetic distance and then use FITCH, KITSCH or NEIGHBOR to make a tree. If continuous characters are being analyzed, but if the characters have not been transformed to new coordinates that evolve independently and at equal rates, then the model is also violated and no statistical analysis is possible.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across loci. If the two trees means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. The version used here is a multivariate normal approximation to their test; it is due to Shimodaira (1998). The variances and covariances of the sum of log likelihoods across loci are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one.

One problem which sometimes arises is that the program is fed two species (or populations) with identical transformed gene frequencies: this can happen if sample sizes are small and/or many loci are monomorphic. In this case the program "gets its knickers in a twist" and can divide by zero, usually causing a crash. If you suspect that this has happened, check for two species with identical coordinates. If you find them, eliminate one from the problem: the two must always show up as being at the same point on the tree anyway.

The constants available for modification at the beginning of the program include "epsilon1", a small quantity used in the iterations of branch lengths, "epsilon2", another not quite so small quantity used to check whether gene frequencies that were fed in for all alleles do not add up to 1, "smoothings", the number of passes through a given tree in the iterative likelihood maximization for a given topology, "maxtrees", the maximum number of user trees that will be used for the Kishino-Hasegawa-Templeton test, and "namelength", the length of species names. There is no provision in this program for saving multiple trees that are tied for having the highest likelihood, mostly because an exact tie is unlikely anyway.

The algorithm does not run as quickly as the discrete character methods but is not enormously slower. Like them, its execution time should rise as the cube of the number of species.

TEST DATA SET

This data set was compiled by me from the compilation of human gene frequencies by Mourant (1976). It appeared in a paper of mine (Felsenstein, 1981c) on maximum likelihood phylogenies from gene frequencies. The names of the loci and alleles are given in that paper.

    5    10
2 2 2 2 2 2 2 2 2 2
European   0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205
0.8055 0.5043
African    0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600
0.7582 0.6207
Chinese    0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726
0.7482 0.7334
American   0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000
0.8086 0.8636
Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396
0.9097 0.2976


TEST SET OUTPUT (WITH ALL NUMERICAL OPTIONS TURNED ON)


Continuous character Maximum Likelihood method version 3.6a3


   5 Populations,   10 Loci

Numbers of alleles at the loci:
------- -- ------- -- --- -----

   2   2   2   2   2   2   2   2   2   2

Name                 Gene Frequencies
----                 ---- -----------

  locus:         1         2         3         4         5         6
                 7         8         9        10

European     0.28680   0.56840   0.44220   0.42860   0.38280   0.72850
             0.63860   0.02050   0.80550   0.50430
African      0.13560   0.48400   0.06020   0.03970   0.59770   0.96750
             0.95110   0.06000   0.75820   0.62070
Chinese      0.16280   0.59580   0.72980   1.00000   0.38110   0.79860
             0.77820   0.07260   0.74820   0.73340
American     0.01440   0.69900   0.32800   0.74210   0.66060   0.86030
             0.79240   0.00000   0.80860   0.86360
Australian   0.12110   0.22740   0.58210   1.00000   0.20180   0.90000
             0.98370   0.03960   0.90970   0.29760


  +----------------------------------African   
  !  
  !              +--------American  
  1--------------2  
  !              !                    +-----------------------Australian
  !              +--------------------3  
  !                                   +Chinese   
  !  
  +--European  


remember: this is an unrooted tree!

Ln Likelihood =    33.29060

Between     And             Length      Approx. Confidence Limits
-------     ---             ------      ------- ---------- ------
  1       African           0.08464   (     0.02351,     0.17917)
  1          2              0.03569   (    -0.00262,     0.09493)
  2       American          0.02094   (    -0.00904,     0.06731)
  2          3              0.05098   (     0.00555,     0.12124)
  3       Australian        0.05959   (     0.01775,     0.12430)
  3       Chinese           0.00221   (    -0.02034,     0.03710)
  1       European          0.00624   (    -0.01948,     0.04601)


PHYLIPNEW-3.69.650/config.guess0000755000175000017500000012743212171071677012626 00000000000000#! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. timestamp='2012-06-10' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Per Bothner. Please send patches (context # diff format) to and include a ChangeLog # entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # # You can get the latest version of this script from: # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] Output the configuration name of the system \`$me' is run on. Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" >&2 exit 1 ;; * ) break ;; esac done if test $# != 0; then echo "$me: too many arguments$help" >&2 exit 1 fi trap 'exit 1' 1 2 15 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a # compiler to aid in system detection is discouraged as it requires # temporary files to be created and, as you can see below, it is a # headache to deal with in a portable fashion. # Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still # use `HOST_CC' if defined, but it is deprecated. # Portable tmp directory creation inspired by the Autoconf team. set_cc_for_build=' trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; : ${TMPDIR=/tmp} ; { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; dummy=$tmp/dummy ; tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; case $CC_FOR_BUILD,$HOST_CC,$CC in ,,) echo "int x;" > $dummy.c ; for c in cc gcc c89 c99 ; do if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then CC_FOR_BUILD="$c"; break ; fi ; done ; if test x"$CC_FOR_BUILD" = x ; then CC_FOR_BUILD=no_compiler_found ; fi ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) if (test -f /.attbin/uname) >/dev/null 2>&1 ; then PATH=$PATH:/.attbin ; export PATH fi UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward # compatibility and a consistent mechanism for selecting the # object file format. # # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ /usr/sbin/$sysctl 2>/dev/null || echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched # to ELF recently, or will in the future. case "${UNAME_MACHINE_ARCH}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? os=netbsd else os=netbsdelf fi ;; *) os=netbsd ;; esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need # kernel version information, so it can be replaced with a # suitable tag, in the style of linux-gnu. case "${UNAME_VERSION}" in Debian*) release='-gnu' ;; *) release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" exit ;; *:OpenBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; *:SolidBSD:*:*) echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} exit ;; macppc:MirBSD:*:*) echo powerpc-unknown-mirbsd${UNAME_RELEASE} exit ;; *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on # OSF/1 and Tru64 systems produced since 1995. I hope that # covers most systems running today. This code pipes the CPU # types through head -n 1, so we only detect the type of CPU 0. ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") UNAME_MACHINE="alpha" ;; "EV4.5 (21064)") UNAME_MACHINE="alpha" ;; "LCA4 (21066/21068)") UNAME_MACHINE="alpha" ;; "EV5 (21164)") UNAME_MACHINE="alphaev5" ;; "EV5.6 (21164A)") UNAME_MACHINE="alphaev56" ;; "EV5.6 (21164PC)") UNAME_MACHINE="alphapca56" ;; "EV5.7 (21164PC)") UNAME_MACHINE="alphapca57" ;; "EV6 (21264)") UNAME_MACHINE="alphaev6" ;; "EV6.7 (21264A)") UNAME_MACHINE="alphaev67" ;; "EV6.8CB (21264C)") UNAME_MACHINE="alphaev68" ;; "EV6.8AL (21264B)") UNAME_MACHINE="alphaev68" ;; "EV6.8CX (21264D)") UNAME_MACHINE="alphaev68" ;; "EV6.9A (21264/EV69A)") UNAME_MACHINE="alphaev69" ;; "EV7 (21364)") UNAME_MACHINE="alphaev7" ;; "EV7.9 (21364A)") UNAME_MACHINE="alphaev79" ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 exit $exitcode ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead # of the specific Alpha model? echo alpha-pc-interix exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 exit ;; *:[Aa]miga[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-amigaos exit ;; *:[Mm]orph[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-morphos exit ;; *:OS/390:*:*) echo i370-ibm-openedition exit ;; *:z/VM:*:*) echo s390-ibm-zvmoe exit ;; *:OS400:*:*) echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} exit ;; arm:riscos:*:*|arm:RISCOS:*:*) echo arm-unknown-riscos exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then echo pyramid-pyramid-sysv3 else echo pyramid-pyramid-bsd fi exit ;; NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 exit ;; DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 exit ;; DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in sparc) echo sparc-icl-nx7; exit ;; esac ;; s390x:SunOS:*:*) echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4H:SunOS:5.*:*) echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) echo i386-pc-auroraux${UNAME_RELEASE} exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build SUN_ARCH="i386" # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then SUN_ARCH="x86_64" fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) UNAME_RELEASE=`uname -v` ;; esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} ;; sun4) echo sparc-sun-sunos${UNAME_RELEASE} ;; esac exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor # > m68000). The system name ranges from "MiNT" over "FreeMiNT" # to the lowercase version "mint" (or "freemint"). Finally # the system name "TOS" denotes a system which is actually not # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) echo m68k-milan-mint${UNAME_RELEASE} exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) echo m68k-hades-mint${UNAME_RELEASE} exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) echo m68k-unknown-mint${UNAME_RELEASE} exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #ifdef __cplusplus #include /* for printf() prototype */ int main (int argc, char *argv[]) { #else int main (argc, argv) int argc; char *argv[]; { #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); #endif #endif exit (-1); } EOF $CC_FOR_BUILD -o $dummy $dummy.c && dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && SYSTEM_NAME=`$dummy $dummyarg` && { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax exit ;; Motorola:*:4.3:PL8-*) echo powerpc-harris-powermax exit ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) echo powerpc-harris-powermax exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 exit ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ [ ${TARGET_BINARY_INTERFACE}x = x ] then echo m88k-dg-dgux${UNAME_RELEASE} else echo m88k-dg-dguxbcs${UNAME_RELEASE} fi else echo i586-dg-dgux${UNAME_RELEASE} fi exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix exit ;; ia64:AIX:*:*) if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include main() { if (!__power_pc()) exit(1); puts("powerpc-ibm-aix3.2.5"); exit(0); } EOF if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` then echo "$SYSTEM_NAME" else echo rs6000-ibm-aix3.2.5 fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi exit ;; *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc fi if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} exit ;; *:AIX:*:*) echo rs6000-ibm-aix exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 exit ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in 9000/31? ) HP_ARCH=m68000 ;; 9000/[34]?? ) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in 32) HP_ARCH="hppa2.0n" ;; 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 esac ;; esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #define _HPUX_SOURCE #include #include int main () { #if defined(_SC_KERNEL_BITS) long bits = sysconf(_SC_KERNEL_BITS); #endif long cpu = sysconf (_SC_CPU_VERSION); switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0"); break; case CPU_PA_RISC1_1: puts ("hppa1.1"); break; case CPU_PA_RISC2_0: #if defined(_SC_KERNEL_BITS) switch (bits) { case 64: puts ("hppa2.0w"); break; case 32: puts ("hppa2.0n"); break; default: puts ("hppa2.0"); break; } break; #else /* !defined(_SC_KERNEL_BITS) */ puts ("hppa2.0"); break; #endif default: puts ("hppa1.0"); break; } exit (0); } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac if [ ${HP_ARCH} = "hppa2.0w" ] then eval $set_cc_for_build # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler # generating 64-bit code. GNU and HP use different nomenclature: # # $ CC_FOR_BUILD=cc ./config.guess # => hppa2.0w-hp-hpux11.23 # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then HP_ARCH="hppa2.0w" else HP_ARCH="hppa64" fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} exit ;; ia64:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` echo ia64-hp-hpux${HPUX_REV} exit ;; 3050*:HI-UX:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include int main () { long cpu = sysconf (_SC_CPU_VERSION); /* The order matters, because CPU_IS_HP_MC68K erroneously returns true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct results, however. */ if (CPU_IS_PA_RISC (cpu)) { switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; default: puts ("hppa-hitachi-hiuxwe2"); break; } } else if (CPU_IS_HP_MC68K (cpu)) puts ("m68k-hitachi-hiuxwe2"); else puts ("unknown-hitachi-hiuxwe2"); exit (0); } EOF $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd exit ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' exit ;; CRAY*TS:*:*:*) echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*T3E:*:*:*) echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*SV1:*:*:*) echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; *:UNICOS/mp:*:*) echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} exit ;; sparc*:BSD/OS:*:*) echo sparc-unknown-bsdi${UNAME_RELEASE} exit ;; *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} exit ;; *:FreeBSD:*:*) UNAME_PROCESSOR=`/usr/bin/uname -p` case ${UNAME_PROCESSOR} in amd64) echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; *) echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; esac exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin exit ;; *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; i*:MSYS*:*) echo ${UNAME_MACHINE}-pc-msys exit ;; i*:windows32*:*) # uname -m includes "-pc" on this system. echo ${UNAME_MACHINE}-mingw32 exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; *:Interix*:*) case ${UNAME_MACHINE} in x86) echo i586-pc-interix${UNAME_RELEASE} exit ;; authenticamd | genuineintel | EM64T) echo x86_64-unknown-interix${UNAME_RELEASE} exit ;; IA64) echo ia64-unknown-interix${UNAME_RELEASE} exit ;; esac ;; [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks exit ;; 8664:Windows_NT:*) echo x86_64-pc-mks exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # UNAME_MACHINE based on the output of uname instead of i386? echo i586-pc-interix exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin exit ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) echo x86_64-unknown-cygwin exit ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; *:GNU:*:*) # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; aarch64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in EV5) UNAME_MACHINE=alphaev5 ;; EV56) UNAME_MACHINE=alphaev56 ;; PCA56) UNAME_MACHINE=alphapca56 ;; PCA57) UNAME_MACHINE=alphapca56 ;; EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then echo ${UNAME_MACHINE}-unknown-linux-gnu else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then echo ${UNAME_MACHINE}-unknown-linux-gnueabi else echo ${UNAME_MACHINE}-unknown-linux-gnueabihf fi fi exit ;; avr32*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; cris:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; crisv32:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; frv:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; hexagon:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; i*86:Linux:*:*) LIBC=gnu eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #ifdef __dietlibc__ LIBC=dietlibc #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` echo "${UNAME_MACHINE}-pc-linux-${LIBC}" exit ;; ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m68*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; mips:Linux:*:* | mips64:Linux:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #undef CPU #undef ${UNAME_MACHINE} #undef ${UNAME_MACHINE}el #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) CPU=${UNAME_MACHINE}el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) CPU=${UNAME_MACHINE} #else CPU= #endif #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; or32:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; padre:Linux:*:*) echo sparc-unknown-linux-gnu exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) echo hppa64-unknown-linux-gnu exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in PA7*) echo hppa1.1-unknown-linux-gnu ;; PA8*) echo hppa2.0-unknown-linux-gnu ;; *) echo hppa-unknown-linux-gnu ;; esac exit ;; ppc64:Linux:*:*) echo powerpc64-unknown-linux-gnu exit ;; ppc:Linux:*:*) echo powerpc-unknown-linux-gnu exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit ;; sh64*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; tile*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; vax:Linux:*:*) echo ${UNAME_MACHINE}-dec-linux-gnu exit ;; x86_64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; xtensa*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. echo i386-sequent-sysv4 exit ;; i*86:UNIX_SV:4.2MP:2.*) # Unixware is an offshoot of SVR4, but it has its own version # number series starting with 2... # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. echo ${UNAME_MACHINE}-pc-os2-emx exit ;; i*86:XTS-300:*:STOP) echo ${UNAME_MACHINE}-unknown-stop exit ;; i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos exit ;; i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable exit ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} exit ;; i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp exit ;; i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} else echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi exit ;; i*86:*:5:[678]*) # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ && UNAME_MACHINE=i586 (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ && UNAME_MACHINE=i686 (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ && UNAME_MACHINE=i686 echo ${UNAME_MACHINE}-pc-sco$UNAME_REL else echo ${UNAME_MACHINE}-pc-sysv32 fi exit ;; pc:*:*:*) # Left here for compatibility: # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; Intel:Mach:3*:*) echo i386-pc-mach3 exit ;; paragon:*:*:*) echo i860-intel-osf1 exit ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv exit ;; mc68k:UNIX:SYSTEM5:3.51m) echo m68k-convergent-sysv exit ;; M680?0:D-NIX:5.3:*) echo m68k-diab-dnix exit ;; M68*:*:R3V[5678]*:*) test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4; exit; } ;; NCR*:*:4.2:* | MPRAS*:*:4.2:*) OS_REL='.3' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} exit ;; rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} exit ;; PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` echo ${UNAME_MACHINE}-sni-sysv4 else echo ns32k-sni-sysv fi exit ;; PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort # says echo i586-unisys-sysv4 exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 exit ;; i*86:VOS:*:*) # From Paul.Green@stratus.com. echo ${UNAME_MACHINE}-stratus-vos exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then echo mips-nec-sysv${UNAME_RELEASE} else echo mips-unknown-sysv${UNAME_RELEASE} fi exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos exit ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. echo powerpc-apple-beos exit ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. echo i586-pc-beos exit ;; BePC:Haiku:*:*) # Haiku running on Intel PC compatible. echo i586-pc-haiku exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} exit ;; SX-5:SUPER-UX:*:*) echo sx5-nec-superux${UNAME_RELEASE} exit ;; SX-6:SUPER-UX:*:*) echo sx6-nec-superux${UNAME_RELEASE} exit ;; SX-7:SUPER-UX:*:*) echo sx7-nec-superux${UNAME_RELEASE} exit ;; SX-8:SUPER-UX:*:*) echo sx8-nec-superux${UNAME_RELEASE} exit ;; SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; *:Rhapsody:*:*) echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown case $UNAME_PROCESSOR in i386) eval $set_cc_for_build if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then UNAME_PROCESSOR="x86_64" fi fi ;; unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = "x86"; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} exit ;; *:QNX:*:4*) echo i386-pc-qnx exit ;; NEO-?:NONSTOP_KERNEL:*:*) echo neo-tandem-nsk${UNAME_RELEASE} exit ;; NSE-*:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux exit ;; BS2000:POSIX*:*:*) echo bs2000-siemens-sysv exit ;; DS/*:UNIX_System_V:*:*) echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. if test "$cputype" = "386"; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" fi echo ${UNAME_MACHINE}-unknown-plan9 exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 exit ;; *:TENEX:*:*) echo pdp10-unknown-tenex exit ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) echo pdp10-dec-tops20 exit ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) echo pdp10-xkl-tops20 exit ;; *:TOPS-20:*:*) echo pdp10-unknown-tops20 exit ;; *:ITS:*:*) echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) echo mips-sei-seiux${UNAME_RELEASE} exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; *:*VMS:*:*) UNAME_MACHINE=`(uname -p) 2>/dev/null` case "${UNAME_MACHINE}" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; V*) echo vax-dec-vms ; exit ;; esac ;; *:XENIX:*:SysV) echo i386-pc-xenix exit ;; i*86:skyos:*:*) echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos exit ;; i*86:AROS:*:*) echo ${UNAME_MACHINE}-pc-aros exit ;; x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 #echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 eval $set_cc_for_build cat >$dummy.c < # include #endif main () { #if defined (sony) #if defined (MIPSEB) /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, I don't know.... */ printf ("mips-sony-bsd\n"); exit (0); #else #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 "4" #else "" #endif ); exit (0); #endif #endif #if defined (__arm) && defined (__acorn) && defined (__unix) printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) printf ("m68k-hp-bsd\n"); exit (0); #endif #if defined (NeXT) #if !defined (__ARCHITECTURE__) #define __ARCHITECTURE__ "m68k" #endif int version; version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; if (version < 4) printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); else printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); exit (0); #endif #if defined (MULTIMAX) || defined (n16) #if defined (UMAXV) printf ("ns32k-encore-sysv\n"); exit (0); #else #if defined (CMU) printf ("ns32k-encore-mach\n"); exit (0); #else printf ("ns32k-encore-bsd\n"); exit (0); #endif #endif #endif #if defined (__386BSD__) printf ("i386-pc-bsd\n"); exit (0); #endif #if defined (sequent) #if defined (i386) printf ("i386-sequent-dynix\n"); exit (0); #endif #if defined (ns32000) printf ("ns32k-sequent-dynix\n"); exit (0); #endif #endif #if defined (_SEQUENT_) struct utsname un; uname(&un); if (strncmp(un.version, "V2", 2) == 0) { printf ("i386-sequent-ptx2\n"); exit (0); } if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ printf ("i386-sequent-ptx1\n"); exit (0); } printf ("i386-sequent-ptx\n"); exit (0); #endif #if defined (vax) # if !defined (ultrix) # include # if defined (BSD) # if BSD == 43 printf ("vax-dec-bsd4.3\n"); exit (0); # else # if BSD == 199006 printf ("vax-dec-bsd4.3reno\n"); exit (0); # else printf ("vax-dec-bsd\n"); exit (0); # endif # endif # else printf ("vax-dec-bsd\n"); exit (0); # endif # else printf ("vax-dec-ultrix\n"); exit (0); # endif #endif #if defined (alliant) && defined (i860) printf ("i860-alliant-bsd\n"); exit (0); #endif exit (1); } EOF $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) if [ -x /usr/convex/getsysinfo ] then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; c34*) echo c34-convex-bsd exit ;; c38*) echo c38-convex-bsd exit ;; c4*) echo c4-convex-bsd exit ;; esac fi cat >&2 < in order to provide the needed information to handle your system. config.guess timestamp = $timestamp uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` /bin/uname -X = `(/bin/uname -X) 2>/dev/null` hostinfo = `(hostinfo) 2>/dev/null` /bin/universe = `(/bin/universe) 2>/dev/null` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` /bin/arch = `(/bin/arch) 2>/dev/null` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` UNAME_MACHINE = ${UNAME_MACHINE} UNAME_RELEASE = ${UNAME_RELEASE} UNAME_SYSTEM = ${UNAME_SYSTEM} UNAME_VERSION = ${UNAME_VERSION} EOF exit 1 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: PHYLIPNEW-3.69.650/README0000664000175000017500000000732111253743723011160 00000000000000This is the EMBOSS integrated version of PHYLIP 3.69 If you require the original PHYLIP 3.69 distribution, you should obtain it from the author, Joe Felsenstein, at http://evolution.gs.washington.edu/phylip/software.html This file records the steps involved in making PHYLIP 3.69 compatible with EMBOSS as an EMBASSY package. The procedure is relatively simple, compared to other packages, as PHYLIP has a nicely isolated user interface and our main task is to write the ACD interface. 1. Make a new directory, and copy in the phylip source (src/ directory) and documentation (doc/ directory) files. 2. Move the include files (*.h) from src/ to include/ 3. Create a configure.in file in the ./ directory 4. Create a Makefile.am file in the ./ src/ and emboss_acd/ directories 5. In src/Makefile.am use a prefix 'f' for every program. In this way the original PHYLIP package can co-exist with the EMBASSY version, and so can the EMBASSY PHYLIP 3.5 package, which used an 'e' prefix. We can claim that the 'f' stands for 'PHYLIP' although being the letter after 'e' is also a significant factor. 6. Add files in the emboss_acd directory (initially from the phylip embassy package) with the 'f' prefix. 7. In include/phylip.h rename VERSION to ORIGINALVERSION as our new ./configure will define it. Put the same version (3.69) into ./configure 8. Looks like PHYLIP 3.69 has new library source code - these functions were generally in the main program *.c files before. We add these as extra sources in Makefile.am 9. PHYLIP 3.69 has programs we did not build in PHYLIP 3.5. This time, we build them all (for now). 10. Put the ACD interface into each program: (a) comment out with the existing getoptions function put /* */ around it put // on every line so we know it is changed change /* */ to /# #/ (b) add an emboss_getoptions function in main (after "init()") to initialise the same variables ad the original getoptions and to use ajAcdGet calls (c) use the modified openfile calls in the remaining code 11. Options are also defined in a set of functions in phylip.c Replace these with calls to standard ACD options. EMBOSS validation will catch any that are not defined in the ACD file. 12. Make sure we add ajExit() at the end of main to test for unused ACD options and final debug output. 13. Use perl scriptsd to check for options in the source (prompts in getoptions and calls to the phylip.c "init" functions. Compare these to the ACD files from PHYLIP 3.5 and update accordingly. 14. Make test cases using the test data from the phylip doc/*.html examples. Use these to test both phylip 3.69 and phylip 3.5 and note any differences. 15. Some programs allow multiple input datasets. This means a seqsetall ACD type which we don't have yet. Set the maximum to 1 for these, but allow unlimited datasets where the same option -datasets refers to a weight file which is a simple infile that we let PHYLIP read. 16. Convert printf and exxit(-1) to use ajErr for the message and make exxit(-1) call ajExitBad() 17. Programs end with printf("Done") - and other progress reports. Comment them out. 18. Made new ACD data types for Dist, Freq, Properties, Tree and tried to read all their many file formats. 19. Made fcontrast work with the new style for frequency and tree data. This meant parsing trees from strings instead of files (lots of changes to phylip.c treeread onwards), although the frequency part was (so far) pretty simple. 20. So remove the old functions that read files (all of them!) Pass trees as char* Carefully put Freq->Data into new arrays (in input order :-) Keep ACD minimal for now. PHYLIPNEW-3.69.650/data/0002775000175000017500000000000012171071713011261 500000000000000PHYLIPNEW-3.69.650/data/font50000664000175000017500000004076610227217747012203 00000000000000CA 3051 21 20 28 -2356 1136 -2152 2235 -2254 2336 -2356 2354 2437 2435 -1441 2241 -835 1435 -1935 2635 -1136 935 -1136 1335 -2236 2035 -2237 2135 -2437 2535 -13035 CB 3052 21 24 28 -1956 1335 -2056 1435 -2156 1535 -1656 2756 3055 3153 3151 3048 2947 2646 -2955 3053 3051 2948 2847 -2756 2855 2953 2951 2848 2646 -1846 2646 2845 2943 2941 2838 2636 2235 1035 -2745 2843 2841 2738 2536 -2646 2744 2741 2638 2436 2235 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -13435 CC 3053 21 21 28 -2854 2954 3056 2950 2952 2854 2755 2556 2256 1955 1753 1550 1447 1343 1340 1437 1536 1835 2135 2336 2538 2640 -1954 1752 1650 1547 1443 1439 1537 -2256 2055 1852 1750 1647 1543 1538 1636 1835 -13135 CD 3054 21 23 28 -1956 1335 -2056 1435 -2156 1535 -1656 2556 2855 2954 3051 3047 2943 2739 2537 2336 1935 1035 -2755 2854 2951 2947 2843 2639 2437 -2556 2754 2851 2847 2743 2539 2236 1935 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -13335 CE 3055 21 23 28 -1956 1335 -2056 1435 -2156 1535 -2550 2342 -1656 3156 3050 -1846 2446 -1035 2535 2740 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -2756 3055 -2856 3054 -2956 3053 -3056 3050 -2550 2346 2342 -2448 2246 2344 -2447 2146 2345 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -2035 2536 -2235 2537 -2537 2740 -13335 CF 3056 21 22 28 -1956 1335 -2056 1435 -2156 1535 -2550 2342 -1656 3156 3050 -1846 2446 -1035 1835 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -2756 3055 -2856 3054 -2956 3053 -3056 3050 -2550 2346 2342 -2448 2246 2344 -2447 2146 2345 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -13235 CG 3057 21 22 28 -2854 2954 3056 2950 2952 2854 2755 2556 2256 1955 1753 1550 1447 1343 1340 1437 1536 1835 2035 2336 2538 2742 -1954 1752 1650 1547 1443 1439 1537 -2438 2539 2642 -2256 2055 1852 1750 1647 1543 1538 1636 1835 -2035 2236 2439 2542 -2242 3042 -2342 2541 -2442 2539 -2842 2640 -2942 2641 -13235 CH 3058 21 26 28 -1956 1335 -2056 1435 -2156 1535 -3156 2535 -3256 2635 -3356 2735 -1656 2456 -2856 3656 -1746 2946 -1035 1835 -2235 3035 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -2956 3255 -3056 3154 -3456 3254 -3556 3255 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -2636 2335 -2637 2435 -2737 2835 -2636 2935 -13635 CI 3059 21 14 28 -1956 1335 -2056 1435 -2156 1535 -1656 2456 -1035 1835 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -12435 CJ 3060 21 19 28 -2456 1939 1837 1635 -2556 2143 2040 1938 -2656 2243 2038 1836 1635 1435 1236 1138 1140 1241 1341 1440 1439 1338 1238 -1240 1239 1339 1340 1240 -2156 2956 -2256 2555 -2356 2454 -2756 2554 -2856 2555 -12935 CK 3061 21 23 28 -1956 1335 -2056 1435 -2156 1535 -3255 1744 -2147 2535 -2247 2635 -2348 2736 -1656 2456 -2956 3556 -1035 1835 -2235 2935 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -3056 3255 -3456 3255 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -2536 2335 -2537 2435 -2637 2835 -13335 CL 3062 21 20 28 -1956 1335 -2056 1435 -2156 1535 -1656 2456 -1035 2535 2741 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -2035 2536 -2235 2638 -2435 2741 -13035 CM 3063 21 28 28 -1956 1336 -1955 2037 2035 -2056 2137 -2156 2238 -3356 2238 2035 -3356 2735 -3456 2835 -3556 2935 -1656 2156 -3356 3856 -1035 1635 -2435 3235 -1756 1955 -1856 1954 -3656 3454 -3756 3455 -1336 1135 -1336 1535 -2836 2535 -2837 2635 -2937 3035 -2836 3135 -13835 CN 3064 21 25 28 -1956 1336 -1956 2635 -2056 2638 -2156 2738 -3255 2738 2635 -1656 2156 -2956 3556 -1035 1635 -1756 2055 -1856 2054 -3056 3255 -3456 3255 -1336 1135 -1336 1535 -13535 CO 3065 21 22 28 -2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 2035 2336 2538 2741 2844 2948 2951 2854 2755 2556 2256 -1853 1650 1547 1443 1439 1537 -2438 2641 2744 2848 2852 2754 -2256 2055 1852 1750 1647 1543 1538 1636 1735 -2035 2236 2439 2541 2644 2748 2753 2655 2556 -13235 CP 3066 21 23 28 -1956 1335 -2056 1435 -2156 1535 -1656 2856 3155 3253 3251 3148 2946 2545 1745 -3055 3153 3151 3048 2846 -2856 2955 3053 3051 2948 2746 2545 -1035 1835 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -13335 CQ 3067 21 22 28 -2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 2035 2336 2538 2741 2844 2948 2951 2854 2755 2556 2256 -1853 1650 1547 1443 1439 1537 -2438 2641 2744 2848 2852 2754 -2256 2055 1852 1750 1647 1543 1538 1636 1735 -2035 2236 2439 2541 2644 2748 2753 2655 2556 -1538 1640 1841 1941 2140 2238 2333 2432 2532 2633 -2332 2431 2531 -2238 2231 2330 2530 2633 2634 -13235 CR 3068 21 24 28 -1956 1335 -2056 1435 -2156 1535 -1656 2756 3055 3153 3151 3048 2947 2646 1846 -2955 3053 3051 2948 2847 -2756 2855 2953 2951 2848 2646 -2246 2445 2544 2738 2837 2937 3038 -2737 2836 2936 -2544 2636 2735 2935 3038 3039 -1035 1835 -1756 2055 -1856 1954 -2256 2054 -2356 2055 -1436 1135 -1437 1235 -1537 1635 -1436 1735 -13435 CS 3069 21 23 28 -2954 3054 3156 3050 3052 2954 2855 2556 2156 1855 1653 1650 1748 1946 2543 2641 2638 2536 -1750 1848 2544 2642 -1855 1753 1751 1849 2446 2644 2742 2739 2637 2536 2235 1835 1536 1437 1339 1341 1235 1337 1437 -13335 CT 3070 21 22 28 -2356 1735 -2456 1835 -2556 1935 -1656 1450 -3256 3150 -1656 3256 -1435 2235 -1756 1450 -1956 1553 -2156 1655 -2856 3155 -2956 3154 -3056 3153 -3156 3150 -1836 1535 -1837 1635 -1937 2035 -1836 2135 -13235 CU 3071 21 25 28 -1856 1545 1441 1438 1536 1835 2235 2536 2738 2841 3255 -1956 1645 1541 1537 1636 -2056 1745 1641 1637 1835 -1556 2356 -2956 3556 -1656 1955 -1756 1854 -2156 1954 -2256 1955 -3056 3255 -3456 3255 -13535 CV 3072 21 20 28 -1656 1654 1737 1735 -1755 1838 -1856 1939 -2955 1735 -1456 2156 -2656 3256 -1556 1654 -1956 1854 -2056 1755 -2756 2955 -3156 2955 -13035 CW 3073 21 26 28 -1856 1854 1637 1635 -1955 1738 -2056 1839 -2656 1839 1635 -2656 2654 2437 2435 -2755 2538 -2856 2639 -3455 2639 2435 -1556 2356 -2656 2856 -3156 3756 -1656 1955 -1756 1854 -2156 1953 -2256 1955 -3256 3455 -3656 3455 -13635 CX 3074 21 22 28 -1756 2335 -1856 2435 -1956 2535 -3055 1236 -1556 2256 -2756 3356 -935 1535 -2035 2735 -1656 1854 -2056 1954 -2156 1955 -2856 3055 -3256 3055 -1236 1035 -1236 1435 -2336 2135 -2337 2235 -2437 2635 -13235 CY 3075 21 22 28 -1656 2046 1735 -1756 2146 1835 -1856 2246 1935 -3155 2246 -1456 2156 -2856 3456 -1435 2235 -1556 1755 -1956 1854 -2056 1755 -2956 3155 -3356 3155 -1836 1535 -1837 1635 -1937 2035 -1836 2135 -13235 CZ 3076 21 22 28 -2956 1135 -3056 1235 -3156 1335 -3156 1756 1550 -1135 2535 2741 -1856 1550 -1956 1653 -2156 1755 -2135 2536 -2335 2638 -2435 2741 -13235 Ca 3151 21 22 28 -2649 2442 2438 2536 2635 2835 3037 3139 -2749 2542 2536 -2649 2849 2642 2538 -2442 2445 2348 2149 1949 1648 1445 1342 1340 1437 1536 1735 1935 2136 2237 2339 2442 -1748 1545 1442 1439 1537 -1949 1747 1645 1542 1539 1636 1735 -13235 Cb 3152 21 19 28 -1756 1549 1443 1439 1537 1636 1835 2035 2336 2539 2642 2644 2547 2448 2249 2049 1848 1747 1645 1542 -1856 1649 1545 1539 1636 -2337 2439 2542 2545 2447 -1456 1956 1749 1542 -2035 2237 2339 2442 2445 2348 2249 -1556 1855 -1656 1754 -12935 Cc 3153 21 18 28 -2445 2446 2346 2344 2544 2546 2448 2249 1949 1648 1445 1342 1340 1437 1536 1735 1935 2236 2439 -1647 1545 1442 1439 1537 -1949 1747 1645 1542 1539 1636 1735 -12835 Cd 3154 21 22 28 -2856 2545 2441 2438 2536 2635 2835 3037 3139 -2956 2645 2541 2536 -2556 3056 2642 2538 -2442 2445 2348 2149 1949 1648 1445 1342 1340 1437 1536 1735 1935 2136 2237 2339 2442 -1647 1545 1442 1439 1537 -1949 1747 1645 1542 1539 1636 1735 -2656 2955 -2756 2854 -13235 Ce 3155 21 18 28 -1440 1841 2142 2444 2546 2448 2249 1949 1648 1445 1342 1340 1437 1536 1735 1935 2236 2438 -1647 1545 1442 1439 1537 -1949 1747 1645 1542 1539 1636 1735 -12835 Cf 3156 21 16 28 -2654 2655 2555 2553 2753 2755 2656 2456 2255 2053 1951 1848 1744 1535 1432 1330 1128 -2052 1949 1844 1635 1532 -2456 2254 2152 2049 1944 1736 1633 1531 1329 1128 928 829 831 1031 1029 929 930 -1449 2549 -12635 Cg 3157 21 21 28 -2649 2235 2132 1929 1728 -2749 2335 2131 -2649 2849 2435 2231 2029 1728 1428 1229 1130 1132 1332 1330 1230 1231 -2442 2445 2348 2149 1949 1648 1445 1342 1340 1437 1536 1735 1935 2136 2237 2339 2442 -1647 1545 1442 1439 1537 -1949 1747 1645 1542 1539 1636 1735 -13135 Ch 3158 21 22 28 -1856 1235 1435 -1956 1335 -1556 2056 1435 -1642 1846 2048 2249 2449 2648 2746 2743 2538 -2648 2644 2540 2536 -2646 2441 2438 2536 2635 2835 3037 3139 -1656 1955 -1756 1854 -13235 Ci 3159 21 13 28 -1956 1954 2154 2156 1956 -2056 2054 -1955 2155 -1145 1247 1449 1649 1748 1846 1843 1638 -1748 1744 1640 1636 -1746 1541 1538 1636 1735 1935 2137 2239 -12335 Cj 3160 21 13 28 -2056 2054 2254 2256 2056 -2156 2154 -2055 2255 -1245 1347 1549 1749 1848 1946 1943 1736 1633 1531 1329 1128 928 829 831 1031 1029 929 930 -1848 1843 1636 1533 1431 -1846 1742 1535 1432 1330 1128 -12335 Ck 3161 21 22 28 -1856 1235 1435 -1956 1335 -1556 2056 1435 -2847 2848 2748 2746 2946 2948 2849 2649 2448 2044 1843 -1643 1843 2042 2141 2337 2436 2636 -2041 2237 2336 -1843 1942 2136 2235 2435 2636 2839 -1656 1955 -1756 1854 -13235 Cl 3162 21 12 28 -1856 1545 1441 1438 1536 1635 1835 2037 2139 -1956 1645 1541 1536 -1556 2056 1642 1538 -1656 1955 -1756 1854 -12235 Cm 3163 21 35 28 -1145 1247 1449 1649 1748 1846 1843 1635 -1748 1743 1535 -1746 1642 1435 1635 -1843 2046 2248 2449 2649 2848 2946 2943 2735 -2848 2843 2635 -2846 2742 2535 2735 -2943 3146 3348 3549 3749 3948 4046 4043 3838 -3948 3944 3840 3836 -3946 3741 3738 3836 3935 4135 4337 4439 -14535 Cn 3164 21 24 28 -1145 1247 1449 1649 1748 1846 1843 1635 -1748 1743 1535 -1746 1642 1435 1635 -1843 2046 2248 2449 2649 2848 2946 2943 2738 -2848 2844 2740 2736 -2846 2641 2638 2736 2835 3035 3237 3339 -13435 Co 3165 21 20 28 -1949 1648 1445 1342 1340 1437 1536 1835 2135 2436 2639 2742 2744 2647 2548 2249 1949 -1647 1545 1442 1439 1537 -2437 2539 2642 2645 2547 -1949 1747 1645 1542 1539 1636 1835 -2135 2337 2439 2542 2545 2448 2249 -13035 Cp 3166 21 22 28 -1145 1247 1449 1649 1748 1846 1843 1739 1428 -1748 1743 1639 1328 -1746 1642 1228 -1842 1945 2047 2148 2349 2549 2748 2847 2944 2942 2839 2636 2335 2135 1936 1839 1842 -2747 2845 2842 2739 2637 -2549 2648 2745 2742 2639 2537 2335 -928 1728 -1329 1028 -1330 1128 -1430 1528 -1329 1628 -13235 Cq 3167 21 21 28 -2649 2028 -2749 2128 -2649 2849 2228 -2442 2445 2348 2149 1949 1648 1445 1342 1340 1437 1536 1735 1935 2136 2237 2339 2442 -1647 1545 1442 1439 1537 -1949 1747 1645 1542 1539 1636 1735 -1728 2528 -2129 1828 -2130 1928 -2230 2328 -2129 2428 -13135 Cr 3168 21 18 28 -1145 1247 1449 1649 1748 1846 1842 1635 -1748 1742 1535 -1746 1642 1435 1635 -2647 2648 2548 2546 2746 2748 2649 2449 2248 2046 1842 -12835 Cs 3169 21 17 28 -2446 2447 2347 2345 2545 2547 2448 2149 1849 1548 1447 1445 1543 1742 2041 2240 2338 -1548 1445 -1544 1743 2042 2241 -2340 2236 -1447 1545 1744 2043 2242 2340 2338 2236 1935 1635 1336 1237 1239 1439 1437 1337 1338 -12735 Ct 3170 21 14 28 -1956 1645 1541 1538 1636 1735 1935 2137 2239 -2056 1745 1641 1636 -1956 2156 1742 1638 -1349 2349 -12435 Cu 3171 21 24 28 -1145 1247 1449 1649 1748 1846 1843 1638 -1748 1744 1640 1636 -1746 1541 1538 1636 1835 2035 2236 2438 2641 -2849 2641 2638 2736 2835 3035 3237 3339 -2949 2741 2736 -2849 3049 2842 2738 -13435 Cv 3172 21 20 28 -1145 1247 1449 1649 1748 1846 1843 1638 -1748 1744 1640 1636 -1746 1541 1538 1636 1835 2035 2236 2438 2641 2745 2749 2649 2648 2746 -13035 Cw 3173 21 30 28 -1145 1247 1449 1649 1748 1846 1843 1638 -1748 1744 1640 1636 -1746 1541 1538 1636 1835 2035 2236 2438 2541 -2749 2541 2538 2636 2835 3035 3236 3438 3641 3745 3749 3649 3648 3746 -2849 2641 2636 -2749 2949 2742 2638 -14035 Cx 3174 21 22 28 -1345 1548 1749 1949 2148 2246 2244 -1949 2048 2044 1940 1838 1636 1435 1235 1136 1138 1338 1336 1236 1237 -2147 2144 2040 2037 -2947 2948 2848 2846 3046 3048 2949 2749 2548 2346 2244 2140 2136 2235 -1940 1938 2036 2235 2435 2636 2839 -13235 Cy 3175 21 22 28 -1145 1247 1449 1649 1748 1846 1843 1638 -1748 1744 1640 1636 -1746 1541 1538 1636 1835 2035 2236 2438 2642 -2849 2435 2332 2129 1928 -2949 2535 2331 -2849 3049 2635 2431 2229 1928 1628 1429 1330 1332 1532 1530 1430 1431 -13235 Cz 3176 21 20 28 -2749 2647 2445 1639 1437 1335 -2647 1747 1546 1444 -2447 2048 1748 1647 -2447 2049 1749 1547 1444 -1437 2337 2538 2640 -1637 2036 2336 2437 -1637 2035 2335 2537 2640 -13035 C0 3250 21 21 28 -2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 1935 2236 2438 2641 2744 2848 2851 2754 2655 2456 2256 -1954 1752 1650 1547 1443 1439 1537 -2237 2439 2541 2644 2748 2752 2654 -2256 2055 1852 1750 1647 1543 1538 1636 1735 -1935 2136 2339 2441 2544 2648 2653 2555 2456 -13135 C1 3251 21 21 28 -2252 1735 1935 -2556 2352 1835 -2556 1935 -2556 2253 1951 1750 -2252 2051 1750 -13135 C2 3252 21 21 28 -1751 1752 1852 1850 1650 1652 1754 1855 2156 2456 2755 2853 2851 2749 2547 1541 1339 1135 -2655 2753 2751 2649 2447 2145 -2456 2555 2653 2651 2549 2347 1541 -1237 1338 1538 2037 2537 2638 -1538 2036 2536 -1538 2035 2335 2536 2638 2639 -13135 C3 3253 21 21 28 -1751 1752 1852 1850 1650 1652 1754 1855 2156 2456 2755 2853 2851 2749 2648 2447 2146 -2655 2753 2751 2649 2548 -2456 2555 2653 2651 2549 2347 2146 -1946 2146 2445 2544 2642 2639 2537 2336 2035 1735 1436 1337 1239 1241 1441 1439 1339 1340 -2444 2542 2539 2437 -2146 2345 2443 2439 2337 2236 2035 -13135 C4 3254 21 21 28 -2552 2035 2235 -2856 2652 2135 -2856 2235 -2856 1241 2841 -13135 C5 3255 21 21 28 -1956 1446 -1956 2956 -1955 2755 -1854 2354 2755 2956 -1446 1547 1848 2148 2447 2546 2644 2641 2538 2336 1935 1635 1436 1337 1239 1241 1441 1439 1339 1340 -2446 2544 2541 2438 2236 -2148 2347 2445 2441 2338 2136 1935 -13135 C6 3256 21 21 28 -2752 2753 2653 2651 2851 2853 2755 2556 2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 2035 2336 2538 2640 2643 2545 2446 2247 1947 1746 1645 1543 -1853 1650 1547 1443 1439 1537 -2438 2540 2543 2445 -2256 2055 1852 1750 1647 1543 1538 1636 1735 -2035 2236 2337 2440 2444 2346 2247 -13135 C7 3257 21 21 28 -1656 1450 -2956 2853 2650 2245 2042 1939 1835 -2043 1839 1735 -2650 2044 1841 1739 1635 1835 -1553 1856 2056 2553 -1755 2055 2553 -1553 1754 2054 2553 2753 2854 2956 -13135 C8 3258 21 21 28 -2156 1855 1754 1652 1649 1747 1946 2246 2547 2748 2850 2853 2755 2556 2156 -2356 1855 -1854 1752 1748 1847 -1747 2046 -2146 2547 -2648 2750 2753 2655 -2755 2356 -2156 1954 1852 1848 1946 -2246 2447 2548 2650 2654 2556 -1946 1545 1343 1241 1238 1336 1635 2035 2436 2537 2639 2642 2544 2445 2246 -2046 1545 -1645 1443 1341 1338 1436 -1336 1835 2436 -2437 2539 2542 2444 -2445 2146 -1946 1745 1543 1441 1438 1536 1635 -2035 2236 2337 2439 2443 2345 2246 -13135 C9 3259 21 21 28 -2648 2546 2445 2244 1944 1745 1646 1548 1551 1653 1855 2156 2456 2655 2754 2851 2848 2744 2641 2438 2236 1935 1635 1436 1338 1340 1540 1538 1438 1439 -1746 1648 1651 1753 -2654 2752 2748 2644 2541 2338 -1944 1845 1747 1751 1854 1955 2156 -2456 2555 2653 2648 2544 2441 2339 2136 1935 -13135 C. 3260 21 11 28 -1338 1237 1236 1335 1435 1536 1537 1438 1338 -1337 1336 1436 1437 1337 -12135 C, 3261 21 11 28 -1435 1335 1236 1237 1338 1438 1537 1535 1433 1332 1131 -1337 1336 1436 1437 1337 -1435 1434 1332 -12135 C: 3262 21 11 28 -1649 1548 1547 1646 1746 1847 1848 1749 1649 -1648 1647 1747 1748 1648 -1338 1237 1236 1335 1435 1536 1537 1438 1338 -1337 1336 1436 1437 1337 -12135 C; 3263 21 11 28 -1649 1548 1547 1646 1746 1847 1848 1749 1649 -1648 1647 1747 1748 1648 -1435 1335 1236 1237 1338 1438 1537 1535 1433 1332 1131 -1337 1336 1436 1437 1337 -1435 1434 1332 -12135 C! 3264 21 11 28 -1956 1856 1755 1542 -1955 1855 1542 -1955 1954 1542 -1956 2055 2054 1542 -1338 1237 1236 1335 1435 1536 1537 1438 1338 -1337 1336 1436 1437 1337 -12135 C? 3265 21 21 28 -1751 1752 1852 1850 1650 1652 1754 1855 2156 2556 2855 2953 2951 2849 2748 2547 2146 1945 1943 2142 2242 -2356 2855 -2755 2853 2851 2749 2648 2447 -2556 2655 2753 2751 2649 2548 2146 2045 2043 2142 -1838 1737 1736 1835 1935 2036 2037 1938 1838 -1837 1836 1936 1937 1837 -13135 C/ 3270 21 23 28 -3460 828 928 -3460 3560 928 -13335 C( 3271 21 16 28 -2660 2459 2157 1854 1651 1447 1343 1338 1434 1531 1728 -1954 1751 1547 1442 1434 -2660 2358 2055 1852 1750 1647 1543 1434 -1442 1533 1630 1728 -12635 C) 3272 21 16 28 -1960 2157 2254 2350 2345 2241 2037 1834 1531 1229 1028 -2254 2246 2141 1937 1734 -1960 2058 2155 2246 -2254 2145 2041 1938 1836 1633 1330 1028 -12635 C* 3273 21 17 28 -2056 1955 2145 2044 -2056 2044 -2056 2155 1945 2044 -1553 1653 2447 2547 -1553 2547 -1553 1552 2548 2547 -2553 2453 1647 1547 -2553 1547 -2553 2552 1548 1547 -12735 C- 3274 21 25 28 -1445 3145 3144 -1445 1444 3144 -13535 C 3249 21 16 28 -12635 PHYLIPNEW-3.69.650/data/font40000664000175000017500000002604110227217747012170 00000000000000CA 2051 21 20 28 -2356 1035 -2356 2435 -2254 2335 -1441 2341 -835 1435 -2035 2635 -13035 CB 2052 21 24 28 -1956 1335 -2056 1435 -1656 2756 3055 3153 3151 3048 2947 2646 -2756 2955 3053 3051 2948 2847 2646 -1746 2646 2845 2943 2941 2838 2636 2235 1035 -2646 2745 2843 2841 2738 2536 2235 -13435 CC 2053 21 21 28 -2854 2954 3056 2950 2952 2854 2755 2556 2256 1955 1753 1550 1447 1343 1340 1437 1536 1835 2135 2336 2538 2640 -2256 2055 1853 1650 1547 1443 1440 1537 1636 1835 -13135 CD 2054 21 23 28 -1956 1335 -2056 1435 -1656 2556 2855 2954 3051 3047 2943 2739 2537 2336 1935 1035 -2556 2755 2854 2951 2947 2843 2639 2437 2236 1935 -13335 CE 2055 21 23 28 -1956 1335 -2056 1435 -2450 2242 -1656 3156 3050 3056 -1746 2346 -1035 2535 2740 2435 -13335 CF 2056 21 22 28 -1956 1335 -2056 1435 -2450 2242 -1656 3156 3050 3056 -1746 2346 -1035 1735 -13235 CG 2057 21 22 28 -2854 2954 3056 2950 2952 2854 2755 2556 2256 1955 1753 1550 1447 1343 1340 1437 1536 1835 2035 2336 2538 2742 -2256 2055 1853 1650 1547 1443 1440 1537 1636 1835 -2035 2236 2438 2642 -2342 3042 -13235 CH 2058 21 26 28 -1956 1335 -2056 1435 -3256 2635 -3356 2735 -1656 2356 -2956 3656 -1746 2946 -1035 1735 -2335 3035 -13635 CI 2059 21 13 28 -1956 1335 -2056 1435 -1656 2356 -1035 1735 -12335 CJ 2060 21 18 28 -2556 2039 1937 1836 1635 1435 1236 1138 1140 1241 1340 1239 -2456 1939 1837 1635 -2156 2856 -12835 CK 2061 21 23 28 -1956 1335 -2056 1435 -3356 1643 -2347 2735 -2247 2635 -1656 2356 -2956 3556 -1035 1735 -2335 2935 -13335 CL 2062 21 20 28 -1956 1335 -2056 1435 -1656 2356 -1035 2535 2741 2435 -13035 CM 2063 21 27 28 -1956 1335 -1956 2035 -2056 2137 -3356 2035 -3356 2735 -3456 2835 -1656 2056 -3356 3756 -1035 1635 -2435 3135 -13735 CN 2064 21 25 28 -1956 1335 -1956 2638 -1953 2635 -3256 2635 -1656 1956 -2956 3556 -1035 1635 -13535 CO 2065 21 22 28 -2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 2035 2336 2538 2741 2844 2948 2951 2854 2755 2556 2256 -2256 2055 1853 1650 1547 1443 1440 1537 1735 -2035 2236 2438 2641 2744 2848 2851 2754 2556 -13235 CP 2066 21 23 28 -1956 1335 -2056 1435 -1656 2856 3155 3253 3251 3148 2946 2545 1745 -2856 3055 3153 3151 3048 2846 2545 -1035 1735 -13335 CQ 2067 21 22 28 -2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 2035 2336 2538 2741 2844 2948 2951 2854 2755 2556 2256 -2256 2055 1853 1650 1547 1443 1440 1537 1735 -2035 2236 2438 2641 2744 2848 2851 2754 2556 -1537 1538 1640 1841 1941 2140 2238 2231 2330 2530 2632 2633 -2238 2332 2431 2531 2632 -13235 CR 2068 21 24 28 -1956 1335 -2056 1435 -1656 2756 3055 3153 3151 3048 2947 2646 1746 -2756 2955 3053 3051 2948 2847 2646 -2246 2445 2544 2636 2735 2935 3037 3038 -2544 2737 2836 2936 3037 -1035 1735 -13435 CS 2069 21 23 28 -2954 3054 3156 3050 3052 2954 2855 2556 2156 1855 1653 1651 1749 1848 2544 2742 -1651 1849 2545 2644 2742 2739 2637 2536 2235 1835 1536 1437 1339 1341 1235 1337 1437 -13335 CT 2070 21 21 28 -2356 1735 -2456 1835 -1756 1450 1656 3156 3050 3056 -1435 2135 -13135 CU 2071 21 25 28 -1856 1545 1441 1438 1536 1835 2235 2536 2738 2841 3256 -1956 1645 1541 1538 1636 1835 -1556 2256 -2956 3556 -13535 CV 2072 21 20 28 -1656 1735 -1756 1837 -3056 1735 -1456 2056 -2656 3256 -13035 CW 2073 21 26 28 -1856 1635 -1956 1737 -2656 1635 -2656 2435 -2756 2537 -3456 2435 -1556 2256 -3156 3756 -13635 CX 2074 21 22 28 -1756 2435 -1856 2535 -3156 1135 -1556 2156 -2756 3356 -935 1535 -2135 2735 -13235 CY 2075 21 21 28 -1656 2046 1735 -1756 2146 1835 -3156 2146 -1456 2056 -2756 3356 -1435 2135 -13135 CZ 2076 21 22 28 -3056 1135 -3156 1235 -1856 1550 1756 3156 -1135 2535 2741 2435 -13235 Ca 2151 21 21 28 -2649 2442 2338 2336 2435 2735 2937 3039 -2749 2542 2438 2436 2535 -2442 2445 2348 2149 1949 1648 1445 1342 1339 1437 1536 1735 1935 2136 2339 2442 -1949 1748 1545 1442 1438 1536 -13135 Cb 2152 21 19 28 -1856 1443 1440 1537 1636 -1956 1543 -1543 1646 1848 2049 2249 2448 2547 2645 2642 2539 2336 2035 1835 1636 1539 1543 -2448 2546 2542 2439 2236 2035 -1556 1956 -12935 Cc 2153 21 18 28 -2446 2445 2545 2546 2448 2249 1949 1648 1445 1342 1339 1437 1536 1735 1935 2236 2439 -1949 1748 1545 1442 1438 1536 -12835 Cd 2154 21 21 28 -2856 2442 2338 2336 2435 2735 2937 3039 -2956 2542 2438 2436 2535 -2442 2445 2348 2149 1949 1648 1445 1342 1339 1437 1536 1735 1935 2136 2339 2442 -1949 1748 1545 1442 1438 1536 -2556 2956 -13135 Ce 2155 21 18 28 -1440 1841 2142 2444 2546 2448 2249 1949 1648 1445 1342 1339 1437 1536 1735 1935 2236 2438 -1949 1748 1545 1442 1438 1536 -12835 Cf 2156 21 15 28 -2555 2454 2553 2654 2655 2556 2356 2155 2054 1952 1849 1535 1431 1329 -2356 2154 2052 1948 1739 1635 1532 1430 1329 1128 928 829 830 931 1030 929 -1449 2449 -12535 Cg 2157 21 20 28 -2749 2335 2232 2029 1728 1428 1229 1130 1131 1232 1331 1230 -2649 2235 2132 1929 1728 -2442 2445 2348 2149 1949 1648 1445 1342 1339 1437 1536 1735 1935 2136 2339 2442 -1949 1748 1545 1442 1438 1536 -13035 Ch 2158 21 21 28 -1856 1235 -1956 1335 -1542 1746 1948 2149 2349 2548 2647 2645 2439 2436 2535 -2349 2547 2545 2339 2336 2435 2735 2937 3039 -1556 1956 -13135 Ci 2159 21 13 28 -1956 1855 1954 2055 1956 -1145 1247 1449 1749 1848 1845 1639 1636 1735 -1649 1748 1745 1539 1536 1635 1935 2137 2239 -12335 Cj 2160 21 13 28 -2056 1955 2054 2155 2056 -1245 1347 1549 1849 1948 1945 1635 1532 1430 1329 1128 928 829 830 931 1030 929 -1749 1848 1845 1535 1432 1330 1128 -12335 Ck 2161 21 20 28 -1856 1235 -1956 1335 -2648 2547 2646 2747 2748 2649 2549 2348 1944 1743 1543 -1743 1942 2136 2235 -1743 1842 2036 2135 2335 2536 2739 -1556 1956 -13035 Cl 2162 21 12 28 -1856 1442 1338 1336 1435 1735 1937 2039 -1956 1542 1438 1436 1535 -1556 1956 -12235 Cm 2163 21 33 28 -1145 1247 1449 1749 1848 1846 1742 1535 -1649 1748 1746 1642 1435 -1742 1946 2148 2349 2549 2748 2847 2845 2535 -2549 2747 2745 2435 -2742 2946 3148 3349 3549 3748 3847 3845 3639 3636 3735 -3549 3747 3745 3539 3536 3635 3935 4137 4239 -14335 Cn 2164 21 23 28 -1145 1247 1449 1749 1848 1846 1742 1535 -1649 1748 1746 1642 1435 -1742 1946 2148 2349 2549 2748 2847 2845 2639 2636 2735 -2549 2747 2745 2539 2536 2635 2935 3137 3239 -13335 Co 2165 21 18 28 -1949 1648 1445 1342 1339 1437 1536 1735 1935 2236 2439 2542 2545 2447 2348 2149 1949 -1949 1748 1545 1442 1438 1536 -1935 2136 2339 2442 2446 2348 -12835 Cp 2166 21 21 28 -1145 1247 1449 1749 1848 1846 1742 1328 -1649 1748 1746 1642 1228 -1742 1845 2048 2249 2449 2648 2747 2845 2842 2739 2536 2235 2035 1836 1739 1742 -2648 2746 2742 2639 2436 2235 -928 1628 -13135 Cq 2167 21 20 28 -2649 2028 -2749 2128 -2442 2445 2348 2149 1949 1648 1445 1342 1339 1437 1536 1735 1935 2136 2339 2442 -1949 1748 1545 1442 1438 1536 -1728 2428 -13035 Cr 2168 21 17 28 -1145 1247 1449 1749 1848 1846 1742 1535 -1649 1748 1746 1642 1435 -1742 1946 2148 2349 2549 2648 2647 2546 2447 2548 -12735 Cs 2169 21 17 28 -2447 2446 2546 2547 2448 2149 1849 1548 1447 1445 1544 2240 2339 -1446 1545 2241 2340 2337 2236 1935 1635 1336 1237 1238 1338 1337 -12735 Ct 2170 21 14 28 -1956 1542 1438 1436 1535 1835 2037 2139 -2056 1642 1538 1536 1635 -1349 2249 -12435 Cu 2171 21 23 28 -1145 1247 1449 1749 1848 1845 1639 1637 1835 -1649 1748 1745 1539 1537 1636 1835 2035 2236 2438 2642 -2849 2642 2538 2536 2635 2935 3137 3239 -2949 2742 2638 2636 2735 -13335 Cv 2172 21 20 28 -1145 1247 1449 1749 1848 1845 1639 1637 1835 -1649 1748 1745 1539 1537 1636 1835 1935 2236 2438 2641 2745 2749 2649 2747 -13035 Cw 2173 21 29 28 -1145 1247 1449 1749 1848 1845 1639 1637 1835 -1649 1748 1745 1539 1537 1636 1835 2035 2236 2438 2540 -2749 2540 2537 2636 2835 3035 3236 3438 3540 3644 3649 3549 3647 -2849 2640 2637 2835 -13935 Cx 2174 21 20 28 -1345 1548 1749 2049 2147 2144 -1949 2047 2044 1940 1838 1636 1435 1335 1236 1237 1338 1437 1336 -1940 1937 2035 2335 2536 2739 -2748 2647 2746 2847 2848 2749 2649 2448 2246 2144 2040 2037 2135 -13035 Cy 2175 21 21 28 -1145 1247 1449 1749 1848 1845 1639 1637 1835 -1649 1748 1745 1539 1537 1636 1835 2035 2236 2438 2642 -2949 2535 2432 2229 1928 1628 1429 1330 1331 1432 1531 1430 -2849 2435 2332 2129 1928 -13135 Cz 2176 21 20 28 -2749 2647 2445 1639 1437 1335 -1445 1547 1749 2049 2447 -1547 1748 2048 2447 2647 -1437 1637 2036 2336 2537 -1637 2035 2335 2537 2639 -13035 C0 2750 21 21 28 -2256 1955 1753 1550 1447 1343 1340 1437 1536 1735 1935 2236 2438 2641 2744 2848 2851 2754 2655 2456 2256 -2256 2055 1853 1650 1547 1443 1440 1537 1735 -1935 2136 2338 2541 2644 2748 2751 2654 2456 -13135 C1 2751 21 21 28 -2252 1735 -2456 1835 -2456 2153 1851 1650 -2353 1951 1650 -13135 C2 2752 21 21 28 -1752 1851 1750 1651 1652 1754 1855 2156 2456 2755 2853 2851 2749 2547 2245 1843 1541 1339 1135 -2456 2655 2753 2751 2649 2447 1843 -1237 1338 1538 2036 2336 2537 2639 -1538 2035 2335 2536 2639 -13135 C3 2753 21 21 28 -1752 1851 1750 1651 1652 1754 1855 2156 2456 2755 2853 2851 2749 2447 2146 -2456 2655 2753 2751 2649 2447 -1946 2146 2445 2544 2642 2639 2537 2436 2135 1735 1436 1337 1239 1240 1341 1440 1339 -2146 2345 2444 2542 2539 2437 2336 2135 -13135 C4 2754 21 21 28 -2655 2035 -2756 2135 -2756 1241 2841 -13135 C5 2755 21 21 28 -1956 1446 -1956 2956 -1955 2455 2956 -1446 1547 1848 2148 2447 2546 2644 2641 2538 2336 2035 1735 1436 1337 1239 1240 1341 1440 1339 -2148 2347 2446 2544 2541 2438 2236 2035 -13135 C6 2756 21 21 28 -2753 2652 2751 2852 2853 2755 2556 2256 1955 1753 1550 1447 1343 1339 1437 1536 1735 2035 2336 2538 2640 2643 2545 2446 2247 1947 1746 1544 1442 -2256 2055 1853 1650 1547 1443 1438 1536 -2035 2236 2438 2540 2544 2446 -13135 C7 2757 21 21 28 -1656 1450 -2956 2853 2650 2144 1941 1839 1735 -2650 2044 1841 1739 1635 -1553 1856 2056 2553 -1654 1855 2055 2553 2753 2854 2956 -13135 C8 2758 21 21 28 -2156 1855 1754 1652 1649 1747 1946 2246 2647 2748 2850 2853 2755 2456 2156 -2156 1955 1854 1752 1749 1847 1946 -2246 2547 2648 2750 2753 2655 2456 -1946 1545 1343 1241 1238 1336 1635 2035 2436 2537 2639 2642 2544 2445 2246 -1946 1645 1443 1341 1338 1436 1635 -2035 2336 2437 2539 2543 2445 -13135 C9 2759 21 21 28 -2749 2647 2445 2244 1944 1745 1646 1548 1551 1653 1855 2156 2456 2655 2754 2852 2848 2744 2641 2438 2236 1935 1635 1436 1338 1339 1440 1539 1438 -1745 1647 1651 1753 1955 2156 -2655 2753 2748 2644 2541 2338 2136 1935 -13135 C. 2760 21 11 28 -1337 1236 1335 1436 1337 -12135 C, 2761 21 11 28 -1335 1236 1337 1436 1435 1333 1131 -12135 C: 2762 21 11 28 -1649 1548 1647 1748 1649 -1337 1236 1335 1436 -12135 C; 2763 21 11 28 -1649 1548 1647 1748 1649 -1335 1236 1337 1436 1435 1333 1131 -12135 C! 2764 21 11 28 -1856 1755 1543 -1855 1543 -1856 1955 1543 -1337 1236 1335 1436 1337 -12135 C? 2765 21 21 28 -1752 1851 1750 1651 1652 1754 1855 2156 2556 2855 2953 2951 2849 2748 2146 1945 1943 2042 2242 -2556 2755 2853 2851 2749 2648 2447 -1837 1736 1835 1936 1837 -13135 C/ 2770 21 22 28 -3460 828 -13235 C( 2771 21 15 28 -2560 2157 1854 1651 1447 1342 1338 1433 1530 1628 -2157 1853 1649 1546 1441 1436 1531 1628 -12535 C) 2772 21 15 28 -1960 2058 2155 2250 2246 2141 1937 1734 1431 1028 -1960 2057 2152 2147 2042 1939 1735 1431 -12535 C* 2773 21 17 28 -2056 2044 -1553 2547 -2553 1547 -12735 C- 2774 21 26 28 -1444 3244 -13635 C 2749 21 16 28 -12635 PHYLIPNEW-3.69.650/data/font10000664000175000017500000001345610227217747012173 00000000000000CA 501 21 18 28 -1956 1135 -1956 2735 -1442 2442 -12835 CB 502 21 21 28 -1456 1435 -1456 2356 2655 2754 2852 2850 2748 2647 2346 -1446 2346 2645 2744 2842 2839 2737 2636 2335 1435 -13135 CC 503 21 21 28 -2851 2753 2555 2356 1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 -13135 CD 504 21 21 28 -1456 1435 -1456 2156 2455 2653 2751 2848 2843 2740 2638 2436 2135 1435 -13135 CE 505 21 19 28 -1456 1435 -1456 2756 -1446 2246 -1435 2735 -12935 CF 506 21 18 28 -1456 1435 -1456 2756 -1446 2246 -12835 CG 507 21 21 28 -2851 2753 2555 2356 1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 2843 -2343 2843 -13135 CH 508 21 22 28 -1456 1435 -2856 2835 -1446 2846 -13235 CI 509 21 8 28 -1456 1435 -11835 CJ 510 21 16 28 -2256 2240 2137 2036 1835 1635 1436 1337 1240 1242 -12635 CK 511 21 21 28 -1456 1435 -2856 1442 -1947 2835 -13135 CL 512 21 17 28 -1456 1435 -1435 2635 -12735 CM 513 21 24 28 -1456 1435 -1456 2235 -3056 2235 -3056 3035 -13435 CN 514 21 22 28 -1456 1435 -1456 2835 -2856 2835 -13235 CO 515 21 22 28 -1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 2943 2948 2851 2753 2555 2356 1956 -13235 CP 516 21 21 28 -1456 1435 -1456 2356 2655 2754 2852 2849 2747 2646 2345 1445 -13135 CQ 517 21 22 28 -1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 2943 2948 2851 2753 2555 2356 1956 -2239 2833 -13235 CR 518 21 21 28 -1456 1435 -1456 2356 2655 2754 2852 2850 2748 2647 2346 1446 -2146 2835 -13135 CS 519 21 20 28 -2753 2555 2256 1856 1555 1353 1351 1449 1548 1747 2345 2544 2643 2741 2738 2536 2235 1835 1536 1338 -13035 CT 520 21 16 28 -1856 1835 -1156 2556 -12635 CU 521 21 22 28 -1456 1441 1538 1736 2035 2235 2536 2738 2841 2856 -13235 CV 522 21 18 28 -1156 1935 -2756 1935 -12835 CW 523 21 24 28 -1256 1735 -2256 1735 -2256 2735 -3256 2735 -13435 CX 524 21 20 28 -1356 2735 -2756 1335 -13035 CY 525 21 18 28 -1156 1946 1935 -2756 1946 -12835 CZ 526 21 20 28 -2756 1335 -1356 2756 -1335 2735 -13035 Ca 601 21 19 28 -2549 2535 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -12935 Cb 602 21 19 28 -1456 1435 -1446 1648 1849 2149 2348 2546 2643 2641 2538 2336 2135 1835 1636 1438 -12935 Cc 603 21 18 28 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -12835 Cd 604 21 19 28 -2556 2535 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -12935 Ce 605 21 18 28 -1343 2543 2545 2447 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -12835 Cf 606 21 12 28 -2056 1856 1655 1552 1535 -1249 1949 -12235 Cg 607 21 19 28 -2549 2533 2430 2329 2128 1828 1629 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -12935 Ch 608 21 19 28 -1456 1435 -1445 1748 1949 2249 2448 2545 2535 -12935 Ci 609 21 8 28 -1356 1455 1556 1457 1356 -1449 1435 -11835 Cj 610 21 10 28 -1556 1655 1756 1657 1556 -1649 1632 1529 1328 1128 -12035 Ck 611 21 17 28 -1456 1435 -2449 1439 -1843 2535 -12735 Cl 612 21 8 28 -1456 1435 -11835 Cm 613 21 30 28 -1449 1435 -1445 1748 1949 2249 2448 2545 2535 -2545 2848 3049 3349 3548 3645 3635 -14035 Cn 614 21 19 28 -1449 1435 -1445 1748 1949 2249 2448 2545 2535 -12935 Co 615 21 19 28 -1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 2641 2643 2546 2348 2149 1849 -12935 Cp 616 21 19 28 -1449 1428 -1446 1648 1849 2149 2348 2546 2643 2641 2538 2336 2135 1835 1636 1438 -12935 Cq 617 21 19 28 -2549 2528 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -12935 Cr 618 21 13 28 -1449 1435 -1443 1546 1748 1949 2249 -12335 Cs 619 21 17 28 -2446 2348 2049 1749 1448 1346 1444 1643 2142 2341 2439 2438 2336 2035 1735 1436 1338 -12735 Ct 620 21 12 28 -1556 1539 1636 1835 2035 -1249 1949 -12235 Cu 621 21 19 28 -1449 1439 1536 1735 2035 2236 2539 -2549 2535 -12935 Cv 622 21 16 28 -1249 1835 -2449 1835 -12635 Cw 623 21 22 28 -1349 1735 -2149 1735 -2149 2535 -2949 2535 -13235 Cx 624 21 17 28 -1349 2435 -2449 1335 -12735 Cy 625 21 16 28 -1249 1835 -2449 1835 1631 1429 1228 1128 -12635 Cz 626 21 17 28 -2449 1335 -1349 2449 -1335 2435 -12735 C0 700 21 20 28 -1956 1655 1452 1347 1344 1439 1636 1935 2135 2436 2639 2744 2747 2652 2455 2156 1956 -13035 C1 701 21 20 28 -1652 1853 2156 2135 -13035 C2 702 21 20 28 -1451 1452 1554 1655 1856 2256 2455 2554 2652 2650 2548 2345 1335 2735 -13035 C3 703 21 20 28 -1556 2656 2048 2348 2547 2646 2743 2741 2638 2436 2135 1835 1536 1437 1339 -13035 C4 704 21 20 28 -2356 1342 2842 -2356 2335 -13035 C5 705 21 20 28 -2556 1556 1447 1548 1849 2149 2448 2646 2743 2741 2638 2436 2135 1835 1536 1437 1339 -13035 C6 706 21 20 28 -2653 2555 2256 2056 1755 1552 1447 1442 1538 1736 2035 2135 2436 2638 2741 2742 2645 2447 2148 2048 1747 1545 1442 -13035 C7 707 21 20 28 -2756 1735 -1356 2756 -13035 C8 708 21 20 28 -1856 1555 1453 1451 1549 1748 2147 2446 2644 2742 2739 2637 2536 2235 1835 1536 1437 1339 1342 1444 1646 1947 2348 2549 2651 2653 2555 2256 1856 -13035 C9 709 21 20 28 -2649 2546 2344 2043 1943 1644 1446 1349 1350 1453 1655 1956 2056 2355 2553 2649 2644 2539 2336 2035 1835 1536 1438 -13035 C. 710 21 10 28 -1537 1436 1535 1636 1537 -12035 C, 711 21 10 28 -1636 1535 1436 1537 1636 1634 1532 1431 -12035 C: 712 21 10 28 -1549 1448 1547 1648 1549 -1537 1436 1535 1636 1537 -12035 C; 713 21 10 28 -1549 1448 1547 1648 1549 -1636 1535 1436 1537 1636 1634 1532 1431 -12035 C! 714 21 10 28 -1556 1542 -1537 1436 1535 1636 1537 -12035 C? 715 21 18 28 -1351 1352 1454 1555 1756 2156 2355 2454 2552 2550 2448 2347 1945 1942 -1937 1836 1935 2036 1937 -12835 C/ 720 21 22 28 -3060 1228 -13235 C( 721 21 14 28 -2160 1958 1755 1551 1446 1442 1537 1733 1930 2128 -12435 C) 722 21 14 28 -1360 1558 1755 1951 2046 2042 1937 1733 1530 1328 -12435 C- 724 21 26 28 -1444 3244 -13635 C* 728 21 16 28 -1850 1838 -1347 2341 -2347 1341 -12635 C 699 21 16 28 -12635 PHYLIPNEW-3.69.650/data/Makefile0000664000175000017500000003423212171071711012641 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # data/Makefile. Generated from Makefile.in by configure. # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/PHYLIPNEW pkglibdir = $(libdir)/PHYLIPNEW pkglibexecdir = $(libexecdir)/PHYLIPNEW am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = x86_64-unknown-linux-gnu host_triplet = x86_64-unknown-linux-gnu subdir = data DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/$(PACKAGE)/data/ ACLOCAL = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run aclocal-1.12 AMTAR = $${TAR-tar} ANT = AR = ar AUTOCONF = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoconf AUTOHEADER = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoheader AUTOMAKE = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run automake-1.12 AWK = gawk CC = gcc CCDEPMODE = depmode=gcc3 CFLAGS = -O2 CPP = gcc -E CPPFLAGS = -DAJ_LinuxLF -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 CXX = g++ CXXCPP = g++ -E CXXDEPMODE = depmode=gcc3 CXXFLAGS = -g -O2 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps DEVWARN_CFLAGS = DLLTOOL = false DSYMUTIL = DUMPBIN = ECHO_C = ECHO_N = -n ECHO_T = EGREP = /usr/bin/grep -E EXEEXT = FGREP = /usr/bin/grep -F GREP = /usr/bin/grep HAVE_MEMMOVE = HAVE_STRERROR = INSTALL = /usr/bin/install -c INSTALL_DATA = ${INSTALL} -m 644 INSTALL_PROGRAM = ${INSTALL} INSTALL_SCRIPT = ${INSTALL} INSTALL_STRIP_PROGRAM = $(install_sh) -c -s JAR = JAVA = JAVAC = JAVA_CFLAGS = JAVA_CPPFLAGS = -DNO_AUTH JAVA_LDFLAGS = LD = /usr/bin/ld -m elf_x86_64 LDFLAGS = LIBOBJS = LIBS = -lm -lhpdf -lgd -lpng -lz -lm LIBTOOL = $(SHELL) $(top_builddir)/libtool LIPO = LN_S = ln -s LTLIBOBJS = MAKEINFO = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run makeinfo MANIFEST_TOOL = : MKDIR_P = /usr/bin/mkdir -p MYSQL_CFLAGS = -I/usr/include/mysql -g -pipe -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-strict-aliasing -fwrapv -fPIC -fPIC -g -static-libgcc -fno-omit-frame-pointer -fno-strict-aliasing -DMY_PTHREAD_FASTMUTEX=1 MYSQL_CONFIG = /usr/bin/mysql_config MYSQL_CPPFLAGS = -I/usr/include/mysql MYSQL_LDFLAGS = -L/usr/lib64/mysql -lmysqlclient -lpthread -lz -lm -lrt -lssl -lcrypto -ldl MYSQL_VERSION = 5.5.32 NM = /usr/bin/nm -B NMEDIT = OBJDUMP = objdump OBJEXT = o OTOOL = OTOOL64 = PACKAGE = PHYLIPNEW PACKAGE_BUGREPORT = emboss-bug@emboss.open-bio.org PACKAGE_NAME = PHYLIPNEW PACKAGE_STRING = PHYLIPNEW 3.69.650 PACKAGE_TARNAME = PHYLIPNEW PACKAGE_URL = http://emboss.open-bio.org/ PACKAGE_VERSION = 3.69.650 PATH_SEPARATOR = : PCRE_DATE = 11-Apr-2009 PCRE_LIB_VERSION = 0:1:0 PCRE_MAJOR = 7 PCRE_MINOR = 9 PCRE_POSIXLIB_VERSION = 0:0:0 PCRE_VERSION = 7.9 POSIX_MALLOC_THRESHOLD = -DPOSIX_MALLOC_THRESHOLD=10 POSTGRESQL_CFLAGS = -I/usr/include POSTGRESQL_CONFIG = /usr/bin/pg_config POSTGRESQL_CPPFLAGS = -I/usr/include POSTGRESQL_LDFLAGS = -L/usr/lib64 -lpq POSTGRESQL_VERSION = 9.2.4 RANLIB = ranlib SED = /usr/bin/sed SET_MAKE = SHELL = /bin/sh STRIP = strip VERSION = 3.69.650 WARN_CFLAGS = XLIB = -lX11 -lXaw -lXt XMKMF = X_CFLAGS = X_EXTRA_LIBS = X_LIBS = X_PRE_LIBS = -lSM -lICE abs_builddir = /data/scratch/embossdist/embassy/phylipnew/data abs_srcdir = /data/scratch/embossdist/embassy/phylipnew/data abs_top_builddir = /data/scratch/embossdist/embassy/phylipnew abs_top_srcdir = /data/scratch/embossdist/embassy/phylipnew ac_ct_AR = ar ac_ct_CC = gcc ac_ct_CXX = g++ ac_ct_DUMPBIN = am__include = include am__leading_dot = . am__quote = am__tar = $${TAR-tar} chof - "$$tardir" am__untar = $${TAR-tar} xf - bindir = ${exec_prefix}/bin build = x86_64-unknown-linux-gnu build_alias = build_cpu = x86_64 build_os = linux-gnu build_vendor = unknown builddir = . datadir = ${datarootdir} datarootdir = ${prefix}/share docdir = ${datarootdir}/doc/${PACKAGE_TARNAME} dvidir = ${docdir} embprefix = /usr/local exec_prefix = ${prefix} host = x86_64-unknown-linux-gnu host_alias = host_cpu = x86_64 host_os = linux-gnu host_vendor = unknown htmldir = ${docdir} includedir = ${prefix}/include infodir = ${datarootdir}/info install_sh = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/install-sh libdir = ${exec_prefix}/lib libexecdir = ${exec_prefix}/libexec localedir = ${datarootdir}/locale localstatedir = ${prefix}/var mandir = ${datarootdir}/man mkdir_p = $(MKDIR_P) oldincludedir = /usr/include pdfdir = ${docdir} prefix = /usr/local program_transform_name = s,x,x, psdir = ${docdir} sbindir = ${exec_prefix}/sbin sharedstatedir = ${prefix}/com srcdir = . sysconfdir = ${prefix}/etc target_alias = top_build_prefix = ../ top_builddir = .. top_srcdir = .. pkgdata_DATA = font1 font2 font3 font4 font5 \ font6 all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu data/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu data/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/data/font30000664000175000017500000004113110227217747012164 00000000000000CA 3001 21 20 28 -2056 1336 -1953 2535 -2053 2635 -2056 2735 -1541 2441 -1135 1735 -2235 2935 -1336 1235 -1336 1535 -2536 2335 -2537 2435 -2637 2835 -13035 CB 3002 21 22 28 -1556 1535 -1655 1636 -1756 1735 -1256 2456 2755 2854 2952 2950 2848 2747 2446 -2754 2852 2850 2748 -2456 2655 2753 2749 2647 2446 -1746 2446 2745 2844 2942 2939 2837 2736 2435 1235 -2744 2842 2839 2737 -2446 2645 2743 2738 2636 2435 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -13235 CC 3003 21 21 28 -2753 2856 2850 2753 2555 2356 2056 1755 1553 1451 1348 1343 1440 1538 1736 2035 2335 2536 2738 2840 -1653 1551 1448 1443 1540 1638 -2056 1855 1652 1548 1543 1639 1836 2035 -13135 CD 3004 21 22 28 -1556 1535 -1655 1636 -1756 1735 -1256 2256 2555 2753 2851 2948 2943 2840 2738 2536 2235 1235 -2653 2751 2848 2843 2740 2638 -2256 2455 2652 2748 2743 2639 2436 2235 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -13235 CE 3005 21 21 28 -1556 1535 -1655 1636 -1756 1735 -1256 2856 2850 -1746 2346 -2350 2342 -1235 2835 2841 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -2356 2855 -2556 2854 -2656 2853 -2756 2850 -2350 2246 2342 -2348 2146 2344 -2347 1946 2345 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2335 2836 -2535 2837 -2635 2838 -2735 2841 -13135 CF 3006 21 20 28 -1556 1535 -1655 1636 -1756 1735 -1256 2856 2850 -1746 2346 -2350 2342 -1235 2035 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -2356 2855 -2556 2854 -2656 2853 -2756 2850 -2350 2246 2342 -2348 2146 2344 -2347 1946 2345 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -13035 CG 3007 21 23 28 -2753 2856 2850 2753 2555 2356 2056 1755 1553 1451 1348 1343 1440 1538 1736 2035 2335 2536 2736 2835 2843 -1653 1551 1448 1443 1540 1638 -2056 1855 1652 1548 1543 1639 1836 2035 -2742 2737 -2643 2637 2536 -2343 3143 -2443 2642 -2543 2641 -2943 2841 -3043 2842 -13335 CH 3008 21 24 28 -1556 1535 -1655 1636 -1756 1735 -2756 2735 -2855 2836 -2956 2935 -1256 2056 -2456 3256 -1746 2746 -1235 2035 -2435 3235 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -2556 2755 -2656 2754 -3056 2954 -3156 2955 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2736 2535 -2737 2635 -2937 3035 -2936 3135 -13435 CI 3009 21 12 28 -1556 1535 -1655 1636 -1756 1735 -1256 2056 -1235 2035 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -12235 CJ 3010 21 16 28 -1956 1939 1836 1735 -2055 2039 1936 -2156 2139 2036 1735 1535 1336 1238 1240 1341 1441 1540 1539 1438 1338 -1340 1339 1439 1440 1340 -1656 2456 -1756 1955 -1856 1954 -2256 2154 -2356 2155 -12635 CK 3011 21 22 28 -1556 1535 -1655 1636 -1756 1735 -2855 1744 -2046 2735 -2146 2835 -2148 2935 -1256 2056 -2556 3156 -1235 2035 -2435 3135 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -2756 2855 -3056 2855 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2737 2535 -2737 3035 -13235 CL 3012 21 18 28 -1556 1535 -1655 1636 -1756 1735 -1256 2056 -1235 2735 2741 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2235 2736 -2435 2737 -2535 2738 -2635 2741 -12835 CM 3013 21 26 28 -1556 1536 -1556 2235 -1656 2238 -1756 2338 -2956 2235 -2956 2935 -3055 3036 -3156 3135 -1256 1756 -2956 3456 -1235 1835 -2635 3435 -1356 1555 -3256 3154 -3356 3155 -1536 1335 -1536 1735 -2936 2735 -2937 2835 -3137 3235 -3136 3335 -13635 CN 3014 21 24 28 -1556 1536 -1556 2935 -1656 2838 -1756 2938 -2955 2935 -1256 1756 -2656 3256 -1235 1835 -1356 1555 -2756 2955 -3156 2955 -1536 1335 -1536 1735 -13435 CO 3015 21 22 28 -2056 1755 1553 1451 1347 1344 1440 1538 1736 2035 2235 2536 2738 2840 2944 2947 2851 2753 2555 2256 2056 -1653 1551 1448 1443 1540 1638 -2638 2740 2843 2848 2751 2653 -2056 1855 1652 1548 1543 1639 1836 2035 -2235 2436 2639 2743 2748 2652 2455 2256 -13235 CP 3016 21 22 28 -1556 1535 -1655 1636 -1756 1735 -1256 2456 2755 2854 2952 2949 2847 2746 2445 1745 -2754 2852 2849 2747 -2456 2655 2753 2748 2646 2445 -1235 2035 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -13235 CQ 3017 21 22 28 -2056 1755 1553 1451 1347 1344 1440 1538 1736 2035 2235 2536 2738 2840 2944 2947 2851 2753 2555 2256 2056 -1653 1551 1448 1443 1540 1638 -2638 2740 2843 2848 2751 2653 -2056 1855 1652 1548 1543 1639 1836 2035 -2235 2436 2639 2743 2748 2652 2455 2256 -1738 1840 2041 2141 2340 2438 2532 2630 2830 2932 2934 -2534 2632 2731 2831 -2438 2633 2732 2832 2933 -13235 CR 3018 21 22 28 -1556 1535 -1655 1636 -1756 1735 -1256 2456 2755 2854 2952 2950 2848 2747 2446 1746 -2754 2852 2850 2748 -2456 2655 2753 2749 2647 2446 -2146 2345 2443 2637 2735 2935 3037 3039 -2639 2737 2836 2936 -2345 2444 2738 2837 2937 3038 -1235 2035 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -13235 CS 3019 21 20 28 -2653 2756 2750 2653 2455 2156 1856 1555 1353 1350 1448 1746 2344 2543 2641 2638 2536 -1450 1548 1747 2345 2544 2642 -1555 1453 1451 1549 1748 2346 2644 2742 2739 2637 2536 2235 1935 1636 1438 1341 1335 1438 -13035 CT 3020 21 20 28 -1256 1250 -1956 1935 -2055 2036 -2156 2135 -2856 2850 -1256 2856 -1635 2435 -1356 1250 -1456 1253 -1556 1254 -1756 1255 -2356 2855 -2556 2854 -2656 2853 -2756 2850 -1936 1735 -1937 1835 -2137 2235 -2136 2335 -13035 CU 3021 21 24 28 -1556 1541 1638 1836 2135 2335 2636 2838 2941 2955 -1655 1640 1738 -1756 1740 1837 1936 2135 -1256 2056 -2656 3256 -1356 1555 -1456 1554 -1856 1754 -1956 1755 -2756 2955 -3156 2955 -13435 CV 3022 21 20 28 -1356 2035 -1456 2038 2035 -1556 2138 -2755 2035 -1156 1856 -2356 2956 -1256 1454 -1656 1554 -1756 1555 -2556 2755 -2856 2755 -13035 CW 3023 21 24 28 -1456 1835 -1556 1840 1835 -1656 1940 -2256 1940 1835 -2256 2635 -2356 2640 2635 -2456 2740 -3055 2740 2635 -1156 1956 -2256 2456 -2756 3356 -1256 1555 -1356 1554 -1756 1654 -1856 1655 -2856 3055 -3256 3055 -13435 CX 3024 21 20 28 -1356 2535 -1456 2635 -1556 2735 -2655 1436 -1156 1856 -2356 2956 -1135 1735 -2235 2935 -1256 1554 -1656 1554 -1756 1555 -2456 2655 -2856 2655 -1436 1235 -1436 1635 -2536 2335 -2537 2435 -2537 2835 -13035 CY 3025 21 22 28 -1356 2045 2035 -1456 2145 2136 -1556 2245 2235 -2855 2245 -1156 1856 -2556 3156 -1735 2535 -1256 1455 -1756 1555 -2656 2855 -3056 2855 -2036 1835 -2037 1935 -2237 2335 -2236 2435 -13235 CZ 3026 21 20 28 -2756 1356 1350 -2556 1335 -2656 1435 -2756 1535 -1335 2735 2741 -1456 1350 -1556 1353 -1656 1354 -1856 1355 -2235 2736 -2435 2737 -2535 2738 -2635 2741 -13035 Ca 3101 21 20 28 -1546 1547 1647 1645 1445 1447 1548 1749 2149 2348 2447 2545 2538 2636 2735 -2347 2445 2438 2536 -2149 2248 2346 2338 2436 2735 2835 -2344 2243 1742 1441 1339 1338 1436 1735 2035 2236 2338 -1541 1439 1438 1536 -2243 1842 1641 1539 1538 1636 1735 -13035 Cb 3102 21 21 28 -1556 1535 1636 1836 -1655 1637 -1256 1756 1736 -1746 1848 2049 2249 2548 2746 2843 2841 2738 2536 2235 2035 1836 1738 -2646 2744 2740 2638 -2249 2448 2547 2644 2640 2537 2436 2235 -1356 1555 -1456 1554 -13135 Cc 3103 21 19 28 -2545 2546 2446 2444 2644 2646 2448 2249 1949 1648 1446 1343 1341 1438 1636 1935 2135 2436 2638 -1546 1444 1440 1538 -1949 1748 1647 1544 1540 1637 1736 1935 -12935 Cd 3104 21 21 28 -2456 2435 2935 -2555 2536 -2156 2656 2635 -2446 2348 2149 1949 1648 1446 1343 1341 1438 1636 1935 2135 2336 2438 -1546 1444 1440 1538 -1949 1748 1647 1544 1540 1637 1736 1935 -2256 2455 -2356 2454 -2637 2735 -2636 2835 -13135 Ce 3105 21 19 28 -1543 2643 2645 2547 2448 2149 1949 1648 1446 1343 1341 1438 1636 1935 2135 2436 2638 -2544 2545 2447 -1546 1444 1440 1538 -2443 2446 2348 2149 -1949 1748 1647 1544 1540 1637 1736 1935 -12935 Cf 3106 21 14 28 -2254 2255 2155 2153 2353 2355 2256 1956 1755 1654 1551 1535 -1754 1651 1636 -1956 1855 1753 1735 -1249 2149 -1235 2035 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -12435 Cg 3107 21 19 28 -2548 2647 2748 2649 2549 2348 2247 -1849 1648 1547 1445 1443 1541 1640 1839 2039 2240 2341 2443 2445 2347 2248 2049 1849 -1647 1545 1543 1641 -2241 2343 2345 2247 -1849 1748 1646 1642 1740 1839 -2039 2140 2242 2246 2148 2049 -1541 1440 1338 1337 1435 1534 1833 2233 2532 2631 -1535 1834 2234 2533 -1337 1436 1735 2235 2534 2632 2631 2529 2228 1628 1329 1231 1232 1334 1635 -1628 1429 1331 1332 1434 1635 -12935 Ch 3108 21 23 28 -1556 1535 -1655 1636 -1256 1756 1735 -1745 1847 1948 2149 2449 2648 2747 2844 2835 -2647 2744 2736 -2449 2548 2645 2635 -1235 2035 -2335 3135 -1356 1555 -1456 1554 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2636 2435 -2637 2535 -2837 2935 -2836 3035 -13335 Ci 3109 21 12 28 -1556 1554 1754 1756 1556 -1656 1654 -1555 1755 -1549 1535 -1648 1636 -1249 1749 1735 -1235 2035 -1349 1548 -1449 1547 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -12235 Cj 3110 21 13 28 -1756 1754 1954 1956 1756 -1856 1854 -1755 1955 -1749 1732 1629 1528 -1848 1833 1730 -1449 1949 1933 1830 1729 1528 1228 1129 1131 1331 1329 1229 1230 -1549 1748 -1649 1747 -12335 Ck 3111 21 22 28 -1556 1535 -1655 1636 -1256 1756 1735 -2648 1739 -2143 2835 -2142 2735 -2042 2635 -2349 3049 -1235 2035 -2335 3035 -1356 1555 -1456 1554 -2449 2648 -2949 2648 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2637 2435 -2537 2935 -13235 Cl 3112 21 12 28 -1556 1535 -1655 1636 -1256 1756 1735 -1235 2035 -1356 1555 -1456 1554 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -12235 Cm 3113 21 34 28 -1549 1535 -1648 1636 -1249 1749 1735 -1745 1847 1948 2149 2449 2648 2747 2844 2835 -2647 2744 2736 -2449 2548 2645 2635 -2845 2947 3048 3249 3549 3748 3847 3944 3935 -3747 3844 3836 -3549 3648 3745 3735 -1235 2035 -2335 3135 -3435 4235 -1349 1548 -1449 1547 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2636 2435 -2637 2535 -2837 2935 -2836 3035 -3736 3535 -3737 3635 -3937 4035 -3936 4135 -14435 Cn 3114 21 23 28 -1549 1535 -1648 1636 -1249 1749 1735 -1745 1847 1948 2149 2449 2648 2747 2844 2835 -2647 2744 2736 -2449 2548 2645 2635 -1235 2035 -2335 3135 -1349 1548 -1449 1547 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -2636 2435 -2637 2535 -2837 2935 -2836 3035 -13335 Co 3115 21 20 28 -1949 1648 1446 1343 1341 1438 1636 1935 2135 2436 2638 2741 2743 2646 2448 2149 1949 -1546 1444 1440 1538 -2538 2640 2644 2546 -1949 1748 1647 1544 1540 1637 1736 1935 -2135 2336 2437 2540 2544 2447 2348 2149 -13035 Cp 3116 21 21 28 -1549 1528 -1648 1629 -1249 1749 1728 -1746 1848 2049 2249 2548 2746 2843 2841 2738 2536 2235 2035 1836 1738 -2646 2744 2740 2638 -2249 2448 2547 2644 2640 2537 2436 2235 -1228 2028 -1349 1548 -1449 1547 -1529 1328 -1530 1428 -1730 1828 -1729 1928 -13135 Cq 3117 21 20 28 -2448 2428 -2547 2529 -2348 2548 2649 2628 -2446 2348 2149 1949 1648 1446 1343 1341 1438 1636 1935 2135 2336 2438 -1546 1444 1440 1538 -1949 1748 1647 1544 1540 1637 1736 1935 -2128 2928 -2429 2228 -2430 2328 -2630 2728 -2629 2828 -13035 Cr 3118 21 17 28 -1549 1535 -1648 1636 -1249 1749 1735 -2447 2448 2348 2346 2546 2548 2449 2249 2048 1846 1743 -1235 2035 -1349 1548 -1449 1547 -1536 1335 -1537 1435 -1737 1835 -1736 1935 -12735 Cs 3119 21 17 28 -2347 2449 2445 2347 2248 2049 1649 1448 1347 1345 1443 1642 2141 2340 2437 -1448 1345 -1444 1643 2142 2341 -2440 2336 -1347 1445 1644 2143 2342 2440 2437 2336 2135 1735 1536 1437 1339 1335 1437 -12735 Ct 3120 21 15 28 -1554 1540 1637 1736 1935 2135 2336 2438 -1654 1639 1737 -1554 1756 1739 1836 1935 -1249 2149 -12535 Cu 3121 21 23 28 -1549 1540 1637 1736 1935 2235 2436 2537 2639 -1648 1639 1737 -1249 1749 1739 1836 1935 -2649 2635 3135 -2748 2736 -2349 2849 2835 -1349 1548 -1449 1547 -2837 2935 -2836 3035 -13335 Cv 3122 21 18 28 -1349 1935 -1449 1937 -1549 2037 -2548 2037 1935 -1149 1849 -2149 2749 -1249 1547 -1749 1548 -2349 2548 -2649 2548 -12835 Cw 3123 21 24 28 -1449 1835 -1549 1838 -1649 1938 -2249 1938 1835 -2249 2635 -2349 2638 -2249 2449 2738 -3048 2738 2635 -1149 1949 -2749 3349 -1249 1548 -1849 1648 -2849 3048 -3249 3048 -13435 Cx 3124 21 20 28 -1449 2435 -1549 2535 -1649 2635 -2548 1536 -1249 1949 -2249 2849 -1235 1835 -2135 2835 -1349 1548 -1849 1648 -2349 2548 -2749 2548 -1536 1335 -1536 1735 -2436 2235 -2536 2735 -13035 Cy 3125 21 19 28 -1449 2035 -1549 2037 -1649 2137 -2648 2137 1831 1629 1428 1228 1129 1131 1331 1329 1229 1230 -1249 1949 -2249 2849 -1349 1647 -1849 1648 -2449 2648 -2749 2648 -12935 Cz 3126 21 18 28 -2349 1335 -2449 1435 -2549 1535 -2549 1349 1345 -1335 2535 2539 -1449 1345 -1549 1346 -1649 1347 -1849 1348 -2035 2536 -2235 2537 -2335 2538 -2435 2539 -12835 C0 3200 21 20 28 -1956 1655 1452 1347 1344 1439 1636 1935 2135 2436 2639 2744 2747 2652 2455 2156 1956 -1654 1552 1448 1443 1539 1637 -2437 2539 2643 2648 2552 2454 -1956 1755 1653 1548 1543 1638 1736 1935 -2135 2336 2438 2543 2548 2453 2355 2156 -13035 C1 3201 21 20 28 -1954 1935 -2054 2036 -2156 2135 -2156 1853 1652 -1535 2535 -1936 1735 -1937 1835 -2137 2235 -2136 2335 -13035 C2 3202 21 20 28 -1452 1451 1551 1552 1452 -1453 1553 1652 1651 1550 1450 1351 1352 1454 1555 1856 2256 2555 2654 2752 2750 2648 2346 1844 1643 1441 1338 1335 -2554 2652 2650 2548 -2256 2455 2552 2550 2448 2246 1844 -1337 1438 1638 2137 2537 2738 -1638 2136 2536 2637 -1638 2135 2535 2636 2738 2740 -13035 C3 3203 21 20 28 -1452 1451 1551 1552 1452 -1453 1553 1652 1651 1550 1450 1351 1352 1454 1555 1856 2256 2555 2653 2650 2548 2247 -2455 2553 2550 2448 -2156 2355 2453 2450 2348 2147 -1947 2247 2446 2644 2742 2739 2637 2536 2235 1835 1536 1437 1339 1340 1441 1541 1640 1639 1538 1438 -2544 2642 2639 2537 -2147 2346 2445 2542 2539 2436 2235 -1440 1439 1539 1540 1440 -13035 C4 3204 21 20 28 -2153 2135 -2254 2236 -2356 2335 -2356 1241 2841 -1835 2635 -2136 1935 -2137 2035 -2337 2435 -2336 2535 -13035 C5 3205 21 20 28 -1556 1346 1548 1849 2149 2448 2646 2743 2741 2638 2436 2135 1835 1536 1437 1339 1340 1441 1541 1640 1639 1538 1438 -2546 2644 2640 2538 -2149 2348 2447 2544 2540 2437 2336 2135 -1440 1439 1539 1540 1440 -1556 2556 -1555 2355 -1554 1954 2355 2556 -13035 C6 3206 21 20 28 -2453 2452 2552 2553 2453 -2554 2454 2353 2352 2451 2551 2652 2653 2555 2356 2056 1755 1553 1451 1347 1341 1438 1636 1935 2135 2436 2638 2741 2742 2645 2447 2148 1948 1747 1646 1544 -1653 1551 1447 1441 1538 1637 -2538 2640 2643 2545 -2056 1855 1754 1652 1548 1541 1638 1736 1935 -2135 2336 2437 2540 2543 2446 2347 2148 -13035 C7 3207 21 20 28 -1356 1350 -2756 2753 2650 2245 2143 2039 2035 -2144 2042 1939 1935 -2650 2145 1942 1839 1835 2035 -1352 1454 1656 1856 2353 2553 2654 2756 -1554 1655 1855 2054 -1352 1453 1654 1854 2353 -13035 C8 3208 21 20 28 -1856 1555 1453 1450 1548 1847 2247 2548 2650 2653 2555 2256 1856 -1655 1553 1550 1648 -2448 2550 2553 2455 -1856 1755 1653 1650 1748 1847 -2247 2348 2450 2453 2355 2256 -1847 1546 1445 1343 1339 1437 1536 1835 2235 2536 2637 2739 2743 2645 2546 2247 -1545 1443 1439 1537 -2537 2639 2643 2545 -1847 1646 1543 1539 1636 1835 -2235 2436 2539 2543 2446 2247 -13035 C9 3209 21 20 28 -1539 1538 1638 1639 1539 -2547 2445 2344 2143 1943 1644 1446 1349 1350 1453 1655 1956 2156 2455 2653 2750 2744 2640 2538 2336 2035 1735 1536 1438 1439 1540 1640 1739 1738 1637 1537 -1546 1448 1451 1553 -2454 2553 2650 2644 2540 2438 -1943 1744 1645 1548 1551 1654 1755 1956 -2156 2355 2453 2550 2543 2439 2337 2236 2035 -13035 C. 3210 21 11 28 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C, 3211 21 11 28 -1736 1635 1535 1436 1437 1538 1638 1737 1734 1632 1431 -1537 1536 1636 1637 1537 -1635 1734 -1736 1632 -12135 C: 3212 21 11 28 -1549 1448 1447 1546 1646 1747 1748 1649 1549 -1548 1547 1647 1648 1548 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C; 3213 21 11 28 -1549 1448 1447 1546 1646 1747 1748 1649 1549 -1548 1547 1647 1648 1548 -1736 1635 1535 1436 1437 1538 1638 1737 1734 1632 1431 -1537 1536 1636 1637 1537 -1635 1734 -1736 1632 -12135 C! 3214 21 11 28 -1556 1455 1453 1545 -1556 1542 1642 -1556 1656 1642 -1656 1755 1753 1645 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C? 3215 21 19 28 -1451 1452 1552 1550 1350 1352 1454 1555 1756 2156 2455 2554 2652 2650 2548 2447 2045 -2454 2553 2549 2448 -2156 2355 2453 2449 2347 2246 -1945 1942 2042 2045 1945 -1938 1837 1836 1935 2035 2136 2137 2038 1938 -1937 1936 2036 2037 1937 -12935 C/ 3220 21 23 28 -3060 1228 1328 -3060 3160 1328 -13335 C( 3221 21 14 28 -2060 1858 1655 1451 1346 1342 1437 1633 1830 2028 -1654 1551 1447 1441 1537 1634 -1858 1756 1653 1547 1541 1635 1732 1830 -12435 C) 3222 21 14 28 -1460 1658 1855 2051 2146 2142 2037 1833 1630 1428 -1854 1951 2047 2041 1937 1834 -1658 1756 1853 1947 1941 1835 1732 1630 -12435 C* 3223 21 16 28 -1856 1755 1945 1844 -1856 1844 -1856 1955 1745 1844 -1353 1453 2247 2347 -1353 2347 -1353 1352 2348 2347 -2353 2253 1447 1347 -2353 1347 -2353 2352 1348 1347 -12635 C- 3224 21 25 28 -1445 3145 3144 -1445 1444 3144 -13535 C 3199 21 16 28 -12635 PHYLIPNEW-3.69.650/data/font20000664000175000017500000002603010227217747012164 00000000000000CA 2501 21 20 28 -2056 1235 -2053 1335 1235 -2053 2735 2835 -2056 2835 -1541 2541 -1440 2640 -13035 CB 2502 21 20 28 -1456 1435 -1555 1536 -1456 2256 2555 2654 2752 2749 2647 2546 2245 -1555 2255 2554 2652 2649 2547 2246 -1546 2246 2545 2644 2742 2739 2637 2536 2235 1435 -1545 2245 2544 2642 2639 2537 2236 1536 -13035 CC 2503 21 21 28 -2851 2753 2555 2356 1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 -2851 2751 2653 2554 2355 1955 1754 1551 1448 1443 1540 1737 1936 2336 2537 2638 2740 2840 -13135 CD 2504 21 21 28 -1456 1435 -1555 1536 -1456 2156 2455 2653 2751 2848 2843 2740 2638 2436 2135 1435 -1555 2155 2454 2553 2651 2748 2743 2640 2538 2437 2136 1536 -13135 CE 2505 21 19 28 -1456 1435 -1555 1536 -1456 2656 -1555 2655 2656 -1546 2146 2145 -1545 2145 -1536 2636 2635 -1435 2635 -12935 CF 2506 21 18 28 -1456 1435 -1555 1535 1435 -1456 2656 -1555 2655 2656 -1546 2146 2145 -1545 2145 -12835 CG 2507 21 21 28 -2851 2753 2555 2356 1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 2844 2344 -2851 2751 2653 2554 2355 1955 1754 1653 1551 1448 1443 1540 1638 1737 1936 2336 2537 2638 2740 2743 2343 2344 -13135 CH 2508 21 22 28 -1456 1435 -1456 1556 1535 1435 -2856 2756 2735 2835 -2856 2835 -1546 2746 -1545 2745 -13235 CI 2509 21 9 28 -1456 1435 1535 -1456 1556 1535 -11935 CJ 2510 21 17 28 -2256 2240 2137 1936 1736 1537 1440 1340 -2256 2356 2340 2237 2136 1935 1735 1536 1437 1340 -12735 CK 2511 21 21 28 -1456 1435 1535 -1456 1556 1535 -2856 2756 1544 -2856 1543 -1847 2735 2835 -1947 2835 -13135 CL 2512 21 17 28 -1456 1435 -1456 1556 1536 -1536 2636 2635 -1435 2635 -12735 CM 2513 21 24 28 -1456 1435 -1551 1535 1435 -1551 2235 -1456 2238 -3056 2238 -2951 2235 -2951 2935 3035 -3056 3035 -13435 CN 2514 21 22 28 -1456 1435 -1553 1535 1435 -1553 2835 -1456 2738 -2756 2738 -2756 2856 2835 -13235 CO 2515 21 22 28 -1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 2943 2948 2851 2753 2555 2356 1956 -2055 1754 1551 1448 1443 1540 1737 2036 2236 2537 2740 2843 2848 2751 2554 2255 2055 -13235 CP 2516 21 20 28 -1456 1435 -1555 1535 1435 -1456 2356 2555 2654 2752 2749 2647 2546 2345 1545 -1555 2355 2554 2652 2649 2547 2346 1546 -13035 CQ 2517 21 22 28 -1956 1755 1553 1451 1348 1343 1440 1538 1736 1935 2335 2536 2738 2840 2943 2948 2851 2753 2555 2356 1956 -2055 1754 1551 1448 1443 1540 1737 2036 2236 2537 2740 2843 2848 2751 2554 2255 2055 -2238 2733 2833 -2238 2338 2833 -13235 CR 2518 21 20 28 -1456 1435 -1555 1535 1435 -1456 2256 2555 2654 2752 2749 2647 2546 2245 1545 -1555 2255 2554 2652 2649 2547 2246 1546 -2045 2635 2735 -2145 2735 -13035 CS 2519 21 20 28 -2753 2555 2256 1856 1555 1353 1351 1449 1548 1747 2245 2444 2543 2641 2638 2537 2236 1836 1637 1538 1338 -2753 2553 2454 2255 1855 1554 1453 1451 1549 1748 2246 2445 2643 2741 2738 2536 2235 1835 1536 1338 -13035 CT 2520 21 17 28 -1855 1835 -1955 1935 1835 -1256 2556 2555 -1256 1255 2555 -12735 CU 2521 21 22 28 -1456 1441 1538 1736 2035 2235 2536 2738 2841 2856 -1456 1556 1541 1638 1737 2036 2236 2537 2638 2741 2756 2856 -13235 CV 2522 21 20 28 -1256 2035 -1256 1356 2038 -2856 2756 2038 -2856 2035 -13035 CW 2523 21 26 28 -1256 1835 -1256 1356 1838 -2356 1838 -2353 1835 -2353 2835 -2356 2838 -3456 3356 2838 -3456 2835 -13635 CX 2524 21 20 28 -1356 2635 2735 -1356 1456 2735 -2756 2656 1335 -2756 1435 1335 -13035 CY 2525 21 19 28 -1256 1946 1935 2035 -1256 1356 2046 -2756 2656 1946 -2756 2046 2035 -12935 CZ 2526 21 20 28 -2656 1335 -2756 1435 -1356 2756 -1356 1355 2655 -1436 2736 2735 -1335 2735 -13035 Ca 2601 21 20 28 -2549 2535 2635 -2549 2649 2635 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -2546 2148 1848 1647 1546 1443 1441 1538 1637 1836 2136 2538 -13035 Cb 2602 21 20 28 -1456 1435 1535 -1456 1556 1535 -1546 1748 1949 2249 2448 2646 2743 2741 2638 2436 2235 1935 1736 1538 -1546 1948 2248 2447 2546 2643 2641 2538 2437 2236 1936 1538 -13035 Cc 2603 21 18 28 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -2546 2445 2347 2148 1848 1647 1546 1443 1441 1538 1637 1836 2136 2337 2439 2538 -12835 Cd 2604 21 20 28 -2556 2535 2635 -2556 2656 2635 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -2546 2148 1848 1647 1546 1443 1441 1538 1637 1836 2136 2538 -13035 Ce 2605 21 18 28 -1442 2542 2545 2447 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -1443 2443 2445 2347 2148 1848 1647 1546 1443 1441 1538 1637 1836 2136 2337 2439 2538 -12835 Cf 2606 21 14 28 -2156 1956 1755 1652 1635 1735 -2156 2155 1955 1754 -1855 1752 1735 -1349 2049 2048 -1349 1348 2048 -12435 Cg 2607 21 20 28 -2649 2549 2534 2431 2330 2129 1929 1730 1631 1431 -2649 2634 2531 2329 2128 1828 1629 1431 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -2546 2148 1848 1647 1546 1443 1441 1538 1637 1836 2136 2538 -13035 Ch 2608 21 20 28 -1456 1435 1535 -1456 1556 1535 -1545 1848 2049 2349 2548 2645 2635 -1545 1847 2048 2248 2447 2545 2535 2635 -13035 Ci 2609 21 9 28 -1456 1355 1354 1453 1553 1654 1655 1556 1456 -1455 1454 1554 1555 1455 -1449 1435 1535 -1449 1549 1535 -11935 Cj 2610 21 9 28 -1456 1355 1354 1453 1553 1654 1655 1556 1456 -1455 1454 1554 1555 1455 -1449 1428 1528 -1449 1549 1528 -11935 Ck 2611 21 19 28 -1456 1435 1535 -1456 1556 1535 -2649 2549 1539 -2649 1538 -1842 2435 2635 -1943 2635 -12935 Cl 2612 21 9 28 -1456 1435 1535 -1456 1556 1535 -11935 Cm 2613 21 31 28 -1449 1435 1535 -1449 1549 1535 -1545 1848 2049 2349 2548 2645 2635 -1545 1847 2048 2248 2447 2545 2535 2635 -2645 2948 3149 3449 3648 3745 3735 -2645 2947 3148 3348 3547 3645 3635 3735 -14135 Cn 2614 21 20 28 -1449 1435 1535 -1449 1549 1535 -1545 1848 2049 2349 2548 2645 2635 -1545 1847 2048 2248 2447 2545 2535 2635 -13035 Co 2615 21 19 28 -1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 2641 2643 2546 2348 2149 1849 -1848 1647 1546 1443 1441 1538 1637 1836 2136 2337 2438 2541 2543 2446 2347 2148 1848 -12935 Cp 2616 21 20 28 -1449 1428 1528 -1449 1549 1528 -1546 1748 1949 2249 2448 2646 2743 2741 2638 2436 2235 1935 1736 1538 -1546 1948 2248 2447 2546 2643 2641 2538 2437 2236 1936 1538 -13035 Cq 2617 21 20 28 -2549 2528 2628 -2549 2649 2628 -2546 2348 2149 1849 1648 1446 1343 1341 1438 1636 1835 2135 2336 2538 -2546 2148 1848 1647 1546 1443 1441 1538 1637 1836 2136 2538 -13035 Cr 2618 21 14 28 -1449 1435 1535 -1449 1549 1535 -1543 1646 1848 2049 2349 -1543 1645 1847 2048 2348 2349 -12435 Cs 2619 21 17 28 -2446 2348 2049 1749 1448 1346 1444 1643 2141 2340 -2241 2339 2338 2236 -2337 2036 1736 1437 -1536 1438 1338 -2446 2346 2248 -2347 2048 1748 1447 -1548 1446 1544 -1445 1644 2142 2341 2439 2438 2336 2035 1735 1436 1338 -12735 Ct 2620 21 11 28 -1556 1535 1635 -1556 1656 1635 -1249 1949 1948 -1249 1248 1948 -12135 Cu 2621 21 20 28 -1449 1439 1536 1735 2035 2236 2539 -1449 1549 1539 1637 1836 2036 2237 2539 -2549 2535 2635 -2549 2649 2635 -13035 Cv 2622 21 16 28 -1249 1835 -1249 1349 1837 -2449 2349 1837 -2449 1835 -12635 Cw 2623 21 24 28 -1349 1835 -1349 1449 1838 -2249 1838 -2246 1835 -2246 2635 -2249 2638 -3149 3049 2638 -3149 2635 -13435 Cx 2624 21 18 28 -1349 2435 2535 -1349 1449 2535 -2549 2449 1335 -2549 1435 1335 -12835 Cy 2625 21 16 28 -1249 1835 -1249 1349 1837 -2449 2349 1837 1428 -2449 1835 1528 1428 -12635 Cz 2626 21 18 28 -2348 1335 -2549 1536 -1349 2549 -1349 1348 2348 -1536 2536 2535 -1335 2535 -12835 C0 2700 21 20 28 -1956 1655 1452 1347 1344 1439 1636 1935 2135 2436 2639 2744 2747 2652 2455 2156 1956 -1755 1552 1447 1444 1539 1736 -1637 1936 2136 2437 -2336 2539 2644 2647 2552 2355 -2454 2155 1955 1654 -13035 C1 2701 21 20 28 -1652 1853 2156 2135 -1652 1651 1852 2054 2035 2135 -13035 C2 2702 21 20 28 -1451 1452 1554 1655 1856 2256 2455 2554 2652 2650 2548 2345 1435 -1451 1551 1552 1654 1855 2255 2454 2552 2550 2448 2245 1335 -1436 2736 2735 -1335 2735 -13035 C3 2703 21 20 28 -1556 2656 1947 -1556 1555 2555 -2556 1847 -1948 2148 2447 2645 2742 2741 2638 2436 2135 1835 1536 1437 1339 1439 -1847 2147 2446 2643 -2247 2545 2642 2641 2538 2236 -2640 2437 2136 1836 1537 1439 -1736 1438 -13035 C4 2704 21 20 28 -2353 2335 2435 -2456 2435 -2456 1340 2840 -2353 1440 -1441 2841 2840 -13035 C5 2705 21 20 28 -1556 1447 -1655 1548 -1556 2556 2555 -1655 2555 -1548 1849 2149 2448 2646 2743 2741 2638 2436 2135 1835 1536 1437 1339 1439 -1447 1547 1748 2148 2447 2644 -2248 2546 2643 2641 2538 2236 -2640 2437 2136 1836 1537 1439 -1736 1438 -13035 C6 2706 21 20 28 -2455 2553 2653 2555 2256 2056 1755 1552 1447 1442 1538 1736 2035 2135 2436 2638 2741 2742 2645 2447 2148 2048 1747 1545 -2554 2255 2055 1754 -1855 1652 1547 1542 1638 1936 -1540 1737 2036 2136 2437 2640 -2236 2538 2641 2642 2545 2247 -2643 2446 2147 2047 1746 1543 -1947 1645 1542 -13035 C7 2707 21 20 28 -1356 2756 1735 -1356 1355 2655 -2656 1635 1735 -13035 C8 2708 21 20 28 -1856 1555 1453 1451 1549 1648 1847 2246 2445 2544 2642 2639 2537 2236 1836 1537 1439 1442 1544 1645 1846 2247 2448 2549 2651 2653 2555 2256 1856 -1655 1553 1551 1649 1848 2247 2446 2644 2742 2739 2637 2536 2235 1835 1536 1437 1339 1342 1444 1646 1847 2248 2449 2551 2553 2455 -2554 2255 1855 1554 -1438 1736 -2336 2638 -13035 C9 2709 21 20 28 -2546 2344 2043 1943 1644 1446 1349 1350 1453 1655 1956 2056 2355 2553 2649 2644 2539 2336 2035 1835 1536 1438 1538 1636 -2549 2446 2144 -2548 2345 2044 1944 1645 1448 -1844 1546 1449 1450 1553 1855 -1451 1654 1955 2055 2354 2551 -2155 2453 2549 2544 2439 2236 -2337 2036 1836 1537 -13035 C. 2710 21 11 28 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C, 2711 21 11 28 -1736 1635 1535 1436 1437 1538 1638 1737 1734 1632 1431 -1537 1536 1636 1637 1537 -1635 1734 -1736 1632 -12135 C: 2712 21 11 28 -1549 1448 1447 1546 1646 1747 1748 1649 1549 -1548 1547 1647 1648 1548 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C; 2713 21 11 28 -1549 1448 1447 1546 1646 1747 1748 1649 1549 -1548 1547 1647 1648 1548 -1736 1635 1535 1436 1437 1538 1638 1737 1734 1632 1431 -1537 1536 1636 1637 1537 -1635 1734 -1736 1632 -12135 C! 2714 21 11 28 -1556 1542 1642 -1556 1656 1642 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C? 2715 21 19 28 -1351 1352 1454 1555 1856 2156 2455 2554 2652 2650 2548 2447 2246 1945 -1351 1451 1452 1554 1855 2155 2454 2552 2550 2448 2247 1946 -1453 1755 -2255 2553 -2549 2146 -1946 1942 2042 2046 -1938 1837 1836 1935 2035 2136 2137 2038 1938 -1937 1936 2036 2037 1937 -12935 C/ 2720 21 23 28 -3060 1228 1328 -3060 3160 1328 -13335 C( 2721 21 14 28 -2060 1858 1655 1451 1346 1342 1437 1633 1830 2028 2128 -2060 2160 1958 1755 1551 1446 1442 1537 1733 1930 2128 -12435 C) 2722 21 14 28 -1360 1558 1755 1951 2046 2042 1937 1733 1530 1328 1428 -1360 1460 1658 1855 2051 2146 2142 2037 1833 1630 1428 -12435 C* 2723 21 16 28 -1856 1755 1945 1844 -1856 1844 -1856 1955 1745 1844 -1353 1453 2247 2347 -1353 2347 -1353 1352 2348 2347 -2353 2253 1447 1347 -2353 1347 -2353 2352 1348 1347 -12635 C- 2724 21 25 28 -1445 3145 3144 -1445 1444 3144 -13535 C 2699 21 16 28 -12635 PHYLIPNEW-3.69.650/data/Makefile.am0000664000175000017500000000016210775447511013245 00000000000000pkgdata_DATA = font1 font2 font3 font4 font5 \ font6 pkgdatadir=$(prefix)/share/$(PACKAGE)/data/ PHYLIPNEW-3.69.650/data/Makefile.in0000664000175000017500000003303412171071677013260 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = data DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/$(PACKAGE)/data/ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ pkgdata_DATA = font1 font2 font3 font4 font5 \ font6 all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu data/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu data/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/data/font60000664000175000017500000003375210227217747012201 00000000000000C@ 2801 21 20 28 -2056 1335 -2056 2735 -2053 2635 -1541 2441 -1135 1735 -2335 2935 -13035 CA 2802 21 22 28 -1556 1535 -1656 1635 -1256 2856 2850 2756 -1646 2446 2745 2844 2942 2939 2837 2736 2435 1235 -2446 2645 2744 2842 2839 2737 2636 2435 -13235 CB 2803 21 22 28 -1556 1535 -1656 1635 -1256 2456 2755 2854 2952 2950 2848 2747 2446 -2456 2655 2754 2852 2850 2748 2647 2446 -1646 2446 2745 2844 2942 2939 2837 2736 2435 1235 -2446 2645 2744 2842 2839 2737 2636 2435 -13235 CC 2804 21 18 28 -1556 1535 -1656 1635 -1256 2756 2750 2656 -1235 1935 -12835 CD 2805 21 24 28 -1856 1850 1742 1638 1536 1435 -2856 2835 -2956 2935 -1556 3256 -1135 3235 -1135 1128 -1235 1128 -3135 3228 -3235 3228 -13435 CE 2806 21 21 28 -1556 1535 -1656 1635 -2250 2242 -1256 2856 2850 2756 -1646 2246 -1235 2835 2841 2735 -13135 CF 2807 21 31 28 -2556 2535 -2656 2635 -2256 2956 -1455 1554 1453 1354 1355 1456 1556 1655 1753 1849 1947 2146 3046 3247 3349 3453 3555 3656 3756 3855 3854 3753 3654 3755 -2146 1945 1843 1738 1636 1535 -2146 2045 1943 1838 1736 1635 1435 1336 1238 -3046 3245 3343 3438 3536 3635 -3046 3145 3243 3338 3436 3535 3735 3836 3938 -2235 2935 -14135 CG 2808 21 20 28 -1453 1356 1350 1453 1655 1856 2256 2555 2653 2650 2548 2247 1947 -2256 2455 2553 2550 2448 2247 -2247 2446 2644 2742 2739 2637 2536 2235 1735 1536 1437 1339 1340 1441 1540 1439 -2545 2642 2639 2537 2436 2235 -13035 CH 2809 21 24 28 -1556 1535 -1656 1635 -2856 2835 -2956 2935 -1256 1956 -2556 3256 -2854 1637 -1235 1935 -2535 3235 -13435 CI 2810 21 24 28 -1556 1535 -1656 1635 -2856 2835 -2956 2935 -1256 1956 -2556 3256 -2854 1637 -1235 1935 -2535 3235 -1862 1863 1763 1762 1860 2059 2459 2660 2762 -13435 CJ 2811 21 24 28 -1556 1535 -1656 1635 -1256 1956 -1646 2346 2547 2649 2753 2855 2956 3056 3155 3154 3053 2954 3055 -2346 2545 2643 2738 2836 2935 -2346 2445 2543 2638 2736 2835 3035 3136 3238 -1235 1935 -13435 CK 2812 21 25 28 -1856 1850 1742 1638 1536 1435 1335 1236 1237 1338 1437 1336 -2956 2935 -3056 3035 -1556 3356 -2635 3335 -13535 CL 2813 21 25 28 -1556 1535 -1656 2238 -1556 2235 -2956 2235 -2956 2935 -3056 3035 -1256 1656 -2956 3356 -1235 1835 -2635 3335 -13535 CM 2814 21 24 28 -1556 1535 -1656 1635 -2856 2835 -2956 2935 -1256 1956 -2556 3256 -1646 2846 -1235 1935 -2535 3235 -13435 CN 2815 21 22 28 -2056 1755 1553 1451 1347 1344 1440 1538 1736 2035 2235 2536 2738 2840 2944 2947 2851 2753 2555 2256 2056 -2056 1855 1653 1551 1447 1444 1540 1638 1836 2035 -2235 2436 2638 2740 2844 2847 2751 2653 2455 2256 -13235 CN 2816 21 24 28 -1556 1535 -1656 1635 -2856 2835 -2956 2935 -1256 3256 -1235 1935 -2535 3235 -13435 CO 2817 21 22 28 -1556 1535 -1656 1635 -1256 2456 2755 2854 2952 2949 2847 2746 2445 1645 -2456 2655 2754 2852 2849 2747 2646 2445 -1235 1935 -13235 CP 2818 21 21 28 -2753 2850 2856 2753 2555 2256 2056 1755 1553 1451 1348 1343 1440 1538 1736 2035 2235 2536 2738 2840 -2056 1855 1653 1551 1448 1443 1540 1638 1836 2035 -13135 CQ 2819 21 19 28 -1956 1935 -2056 2035 -1356 1250 1256 2756 2750 2656 -1635 2335 -12935 CR 2820 21 21 28 -1356 2040 -1456 2140 -2856 2140 1937 1836 1635 1535 1436 1437 1538 1637 1536 -1156 1756 -2456 3056 -13135 CS 2821 21 25 28 -2256 2235 -2356 2335 -1956 2656 -2053 1652 1450 1347 1344 1441 1639 2038 2538 2939 3141 3244 3247 3150 2952 2553 2053 -2053 1752 1550 1447 1444 1541 1739 2038 -2538 2839 3041 3144 3147 3050 2852 2553 -1935 2635 -13535 CT 2822 21 20 28 -1356 2635 -1456 2735 -2756 1335 -1156 1756 -2356 2956 -1135 1735 -2335 2935 -13035 CU 2823 21 24 28 -1556 1535 -1656 1635 -2856 2835 -2956 2935 -1256 1956 -2556 3256 -1235 3235 -3135 3228 -3235 3228 -13435 CV 2824 21 23 28 -1556 1545 1643 1942 2242 2543 2745 -1656 1645 1743 1942 -2756 2735 -2856 2835 -1256 1956 -2456 3156 -2435 3135 -13335 CW 2825 21 33 28 -1556 1535 -1656 1635 -2656 2635 -2756 2735 -3756 3735 -3856 3835 -1256 1956 -2356 3056 -3456 4156 -1235 4135 -14335 CX 2826 21 33 28 -1556 1535 -1656 1635 -2656 2635 -2756 2735 -3756 3735 -3856 3835 -1256 1956 -2356 3056 -3456 4156 -1235 4135 -4035 4128 -4135 4128 -14335 CY 2827 21 26 28 -2056 2035 -2156 2135 -1356 1250 1256 2456 -2146 2846 3145 3244 3342 3339 3237 3136 2835 1735 -2846 3045 3144 3242 3239 3137 3036 2835 -13635 CZ 2828 21 30 28 -1556 1535 -1656 1635 -1256 1956 -1646 2346 2645 2744 2842 2839 2737 2636 2335 1235 -2346 2545 2644 2742 2739 2637 2536 2335 -3456 3435 -3556 3535 -3156 3856 -3135 3835 -14035 C[ 2829 21 21 28 -1556 1535 -1656 1635 -1256 1956 -1646 2346 2645 2744 2842 2839 2737 2636 2335 1235 -2346 2545 2644 2742 2739 2637 2536 2335 -13135 C\ 2830 21 21 28 -1453 1356 1350 1453 1655 1956 2156 2455 2653 2751 2848 2843 2740 2638 2436 2135 1835 1536 1437 1339 1340 1441 1540 1439 -2156 2355 2553 2651 2748 2743 2640 2538 2336 2135 -1846 2746 -13135 C] 2831 21 31 28 -1556 1535 -1656 1635 -1256 1956 -1235 1935 -2956 2655 2453 2351 2247 2244 2340 2438 2636 2935 3135 3436 3638 3740 3844 3847 3751 3653 3455 3156 2956 -2956 2755 2553 2451 2347 2344 2440 2538 2736 2935 -3135 3336 3538 3640 3744 3747 3651 3553 3355 3156 -1646 2246 -14135 C_ 2832 21 22 28 -2656 2635 -2756 2735 -3056 1856 1555 1454 1352 1350 1448 1547 1846 2646 -1856 1655 1554 1452 1450 1548 1647 1846 -2146 1945 1844 1537 1436 1336 1237 -1945 1843 1636 1535 1335 1237 1238 -2335 3035 -13235 C^ 2901 21 20 28 -1547 1546 1446 1447 1548 1749 2149 2348 2447 2545 2538 2636 2735 -2447 2438 2536 2735 2835 -2445 2344 1743 1442 1340 1338 1436 1735 2035 2236 2438 -1743 1542 1440 1438 1536 1735 -13035 Ca 2902 21 20 28 -2656 2555 1953 1651 1448 1345 1341 1438 1636 1935 2135 2436 2638 2741 2743 2646 2448 2149 1949 1648 1446 1343 -2656 2554 2353 1952 1650 1448 -1949 1748 1546 1443 1441 1538 1736 1935 -2135 2336 2538 2641 2643 2546 2348 2149 -13035 Cb 2903 21 20 28 -1549 1535 -1649 1635 -1249 2349 2648 2746 2745 2643 2342 -2349 2548 2646 2645 2543 2342 -1642 2342 2641 2739 2738 2636 2335 1235 -2342 2541 2639 2638 2536 2335 -13035 Cc 2904 21 18 28 -1549 1535 -1649 1635 -1249 2649 2644 2549 -1235 1935 -12835 Cd 2905 21 23 28 -1849 1845 1739 1636 1535 -2749 2735 -2849 2835 -1549 3149 -1335 1230 1235 3135 3130 3035 -13335 Ce 2906 21 19 28 -1443 2643 2645 2547 2448 2249 1949 1648 1446 1343 1341 1438 1636 1935 2135 2436 2638 -2543 2546 2448 -1949 1748 1546 1443 1441 1538 1736 1935 -12935 Cf 2907 21 27 28 -2349 2335 -2449 2435 -2049 2749 -1548 1447 1348 1449 1549 1648 1844 1943 2142 2642 2843 2944 3148 3249 3349 3448 3347 3248 -2142 1941 1840 1636 1535 -2142 1940 1736 1635 1435 1336 1238 -2642 2841 2940 3136 3235 -2642 2840 3036 3135 3335 3436 3538 -2035 2735 -13735 Cg 2908 21 18 28 -1447 1349 1345 1447 1548 1749 2149 2448 2546 2545 2443 2142 -2149 2348 2446 2445 2343 2142 -1842 2142 2441 2539 2538 2436 2135 1735 1436 1338 1339 1440 1539 1438 -2142 2341 2439 2438 2336 2135 -12835 Ch 2909 21 22 28 -1549 1535 -1649 1635 -2649 2635 -2749 2735 -1249 1949 -2349 3049 -1235 1935 -2335 3035 -2648 1636 -13235 Ci 2910 21 22 28 -1549 1535 -1649 1635 -2649 2635 -2749 2735 -1249 1949 -2349 3049 -1235 1935 -2335 3035 -2648 1636 -1855 1856 1756 1755 1853 2052 2252 2453 2555 -13235 Cj 2911 21 20 28 -1549 1535 -1649 1635 -1249 1949 -1642 1842 2143 2244 2448 2549 2649 2748 2647 2548 -1842 2141 2240 2436 2535 -1842 2041 2140 2336 2435 2635 2736 2838 -1235 1935 -13035 Ck 2912 21 22 28 -1749 1745 1639 1536 1435 1335 1236 1337 1436 -2649 2635 -2749 2735 -1449 3049 -2335 3035 -13235 Cl 2913 21 23 28 -1549 1535 -1549 2135 -1649 2137 -2749 2135 -2749 2735 -2849 2835 -1249 1649 -2749 3149 -1235 1835 -2435 3135 -13335 Cm 2914 21 22 28 -1549 1535 -1649 1635 -2649 2635 -2749 2735 -1249 1949 -2349 3049 -1642 2642 -1235 1935 -2335 3035 -13235 Cn 2915 21 20 28 -1949 1648 1446 1343 1341 1438 1636 1935 2135 2436 2638 2741 2743 2646 2448 2149 1949 -1949 1748 1546 1443 1441 1538 1736 1935 -2135 2336 2538 2641 2643 2546 2348 2149 -13035 Co 2916 21 22 28 -1549 1535 -1649 1635 -2649 2635 -2749 2735 -1249 3049 -1235 1935 -2335 3035 -13235 Cp 2917 21 21 28 -1549 1528 -1649 1628 -1646 1848 2049 2249 2548 2746 2843 2841 2738 2536 2235 2035 1836 1638 -2249 2448 2646 2743 2741 2638 2436 2235 -1249 1649 -1228 1928 -13135 Cq 2918 21 19 28 -2546 2445 2544 2645 2646 2448 2249 1949 1648 1446 1343 1341 1438 1636 1935 2135 2436 2638 -1949 1748 1546 1443 1441 1538 1736 1935 -12935 Cr 2919 21 19 28 -1949 1935 -2049 2035 -1449 1344 1349 2649 2644 2549 -1635 2335 -12935 Cs 2920 21 18 28 -1349 1935 -1449 1937 -2549 1935 1731 1529 1328 1228 1129 1230 1329 -1149 1749 -2149 2749 -12835 Ct 2921 21 21 28 -2056 2028 -2156 2128 -1756 2156 -2046 1948 1849 1649 1448 1345 1339 1436 1635 1835 1936 2038 -1649 1548 1445 1439 1536 1635 -2549 2648 2745 2739 2636 2535 -2146 2248 2349 2549 2748 2845 2839 2736 2535 2335 2236 2138 -1728 2428 -13135 Cu 2922 21 20 28 -1449 2535 -1549 2635 -2649 1435 -1249 1849 -2249 2849 -1235 1835 -2235 2835 -13035 Cv 2923 21 22 28 -1549 1535 -1649 1635 -2649 2635 -2749 2735 -1249 1949 -2349 3049 -1235 3035 3030 2935 -13235 Cw 2924 21 22 28 -1549 1542 1640 1939 2139 2440 2642 -1649 1642 1740 1939 -2649 2635 -2749 2735 -1249 1949 -2349 3049 -2335 3035 -13235 Cx 2925 21 31 28 -1549 1535 -1649 1635 -2549 2535 -2649 2635 -3549 3535 -3649 3635 -1249 1949 -2249 2949 -3249 3949 -1235 3935 -14135 Cy 2926 21 31 28 -1549 1535 -1649 1635 -2549 2535 -2649 2635 -3549 3535 -3649 3635 -1249 1949 -2249 2949 -3249 3949 -1235 3935 3930 3835 -14135 Cz 2927 21 21 28 -1949 1935 -2049 2035 -1449 1344 1349 2349 -2042 2442 2741 2839 2838 2736 2435 1635 -2442 2641 2739 2738 2636 2435 -13135 C{ 2928 21 26 28 -1549 1535 -1649 1635 -1249 1949 -1642 2042 2341 2439 2438 2336 2035 1235 -2042 2241 2339 2338 2236 2035 -3049 3035 -3149 3135 -2749 3449 -2735 3435 -13635 C| 2929 21 17 28 -1549 1535 -1649 1635 -1249 1949 -1642 2042 2341 2439 2438 2336 2035 1235 -2042 2241 2339 2338 2236 2035 -12735 C} 2930 21 19 28 -1447 1349 1345 1447 1548 1749 2049 2348 2546 2643 2641 2538 2336 2035 1735 1536 1338 1339 1440 1539 1438 -2049 2248 2446 2543 2541 2438 2236 2035 -1942 2542 -12935 C~ 2931 21 29 28 -1549 1535 -1649 1635 -1249 1949 -1235 1935 -2849 2548 2346 2243 2241 2338 2536 2835 3035 3336 3538 3641 3643 3546 3348 3049 2849 -2849 2648 2446 2343 2341 2438 2636 2835 -3035 3236 3438 3541 3543 3446 3248 3049 -1642 2242 -13935 C; 2932 21 21 28 -2549 2535 -2649 2635 -2949 1849 1548 1446 1445 1543 1842 2542 -1849 1648 1546 1545 1643 1842 -2342 2041 1940 1736 1635 -2342 2141 2040 1836 1735 1535 1436 1338 -2235 2935 -13135 C0 2700 21 20 28 -1956 1655 1452 1347 1344 1439 1636 1935 2135 2436 2639 2744 2747 2652 2455 2156 1956 -1755 1552 1447 1444 1539 1736 -1637 1936 2136 2437 -2336 2539 2644 2647 2552 2355 -2454 2155 1955 1654 -13035 C1 2701 21 20 28 -1652 1853 2156 2135 -1652 1651 1852 2054 2035 2135 -13035 C2 2702 21 20 28 -1451 1452 1554 1655 1856 2256 2455 2554 2652 2650 2548 2345 1435 -1451 1551 1552 1654 1855 2255 2454 2552 2550 2448 2245 1335 -1436 2736 2735 -1335 2735 -13035 C3 2703 21 20 28 -1556 2656 1947 -1556 1555 2555 -2556 1847 -1948 2148 2447 2645 2742 2741 2638 2436 2135 1835 1536 1437 1339 1439 -1847 2147 2446 2643 -2247 2545 2642 2641 2538 2236 -2640 2437 2136 1836 1537 1439 -1736 1438 -13035 C4 2704 21 20 28 -2353 2335 2435 -2456 2435 -2456 1340 2840 -2353 1440 -1441 2841 2840 -13035 C5 2705 21 20 28 -1556 1447 -1655 1548 -1556 2556 2555 -1655 2555 -1548 1849 2149 2448 2646 2743 2741 2638 2436 2135 1835 1536 1437 1339 1439 -1447 1547 1748 2148 2447 2644 -2248 2546 2643 2641 2538 2236 -2640 2437 2136 1836 1537 1439 -1736 1438 -13035 C6 2706 21 20 28 -2455 2553 2653 2555 2256 2056 1755 1552 1447 1442 1538 1736 2035 2135 2436 2638 2741 2742 2645 2447 2148 2048 1747 1545 -2554 2255 2055 1754 -1855 1652 1547 1542 1638 1936 -1540 1737 2036 2136 2437 2640 -2236 2538 2641 2642 2545 2247 -2643 2446 2147 2047 1746 1543 -1947 1645 1542 -13035 C7 2707 21 20 28 -1356 2756 1735 -1356 1355 2655 -2656 1635 1735 -13035 C8 2708 21 20 28 -1856 1555 1453 1451 1549 1648 1847 2246 2445 2544 2642 2639 2537 2236 1836 1537 1439 1442 1544 1645 1846 2247 2448 2549 2651 2653 2555 2256 1856 -1655 1553 1551 1649 1848 2247 2446 2644 2742 2739 2637 2536 2235 1835 1536 1437 1339 1342 1444 1646 1847 2248 2449 2551 2553 2455 -2554 2255 1855 1554 -1438 1736 -2336 2638 -13035 C9 2709 21 20 28 -2546 2344 2043 1943 1644 1446 1349 1350 1453 1655 1956 2056 2355 2553 2649 2644 2539 2336 2035 1835 1536 1438 1538 1636 -2549 2446 2144 -2548 2345 2044 1944 1645 1448 -1844 1546 1449 1450 1553 1855 -1451 1654 1955 2055 2354 2551 -2155 2453 2549 2544 2439 2236 -2337 2036 1836 1537 -13035 C. 2710 21 11 28 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C, 2711 21 11 28 -1736 1635 1535 1436 1437 1538 1638 1737 1734 1632 1431 -1537 1536 1636 1637 1537 -1635 1734 -1736 1632 -12135 C: 2712 21 11 28 -1549 1448 1447 1546 1646 1747 1748 1649 1549 -1548 1547 1647 1648 1548 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C; 2713 21 11 28 -1549 1448 1447 1546 1646 1747 1748 1649 1549 -1548 1547 1647 1648 1548 -1736 1635 1535 1436 1437 1538 1638 1737 1734 1632 1431 -1537 1536 1636 1637 1537 -1635 1734 -1736 1632 -12135 C! 2714 21 11 28 -1556 1542 1642 -1556 1656 1642 -1538 1437 1436 1535 1635 1736 1737 1638 1538 -1537 1536 1636 1637 1537 -12135 C' 2715 21 19 28 -1351 1352 1454 1555 1856 2156 2455 2554 2652 2650 2548 2447 2246 1945 -1351 1451 1452 1554 1855 2155 2454 2552 2550 2448 2247 1946 -1453 1755 -2255 2553 -2549 2146 -1946 1942 2042 2046 -1938 1837 1836 1935 2035 2136 2137 2038 1938 -1937 1936 2036 2037 1937 -12935 C/ 2720 21 23 28 -3060 1228 1328 -3060 3160 1328 -13335 C( 2721 21 14 28 -2060 1858 1655 1451 1346 1342 1437 1633 1830 2028 2128 -2060 2160 1958 1755 1551 1446 1442 1537 1733 1930 2128 -12435 C) 2722 21 14 28 -1360 1558 1755 1951 2046 2042 1937 1733 1530 1328 1428 -1360 1460 1658 1855 2051 2146 2142 2037 1833 1630 1428 -12435 C* 2723 21 16 28 -1856 1755 1945 1844 -1856 1844 -1856 1955 1745 1844 -1353 1453 2247 2347 -1353 2347 -1353 1352 2348 2347 -2353 2253 1447 1347 -2353 1347 -2353 2352 1348 1347 -12635 C- 2724 21 25 28 -1445 3145 3144 -1445 1444 3144 -13535 C 2699 21 16 28 -12635 PHYLIPNEW-3.69.650/data/.cvsignore0000664000175000017500000000002511326104717013176 00000000000000Makefile.in Makefile PHYLIPNEW-3.69.650/ChangeLog0000664000175000017500000000424511616234655012056 00000000000000Release 3.69 update July 2011 Changed -thresh to -dothreshold and -gamma to -gammatype to avoid qualifier name clashes with stricter ACD validation. The *boot applications reported the status of some boolean options and printed "Done" at the end. Removed the boolean reports and now only print "Done" unless -noprogress is set to allow piping. Release 3.69 Updated all code to include the changes in phylip 3.69. Release 3.68 Updated all code to include the changes in phylip 3.68. fdnadist no longer writes a blank line at the end of the output. fconsense writes trees with 2 decimal places. A bug in fprotdist for scores of 100.0 is fixed. Rearrangement messages are no longer printed by fpromlk. fprotdist and fprotpars now write "Done" when completed. Renamed fdrawgram qualifiers to make unambiguous names shorter: xmarginray => xrayshade ymarginray => yrayshade for RayShade output image size in pixels. Removed unused fdrawtree qualifiers plotterpcl and plotterpcx. Added 4 new plotter options for the additional resolutions previously defined by plotterpcl and plotterpcx. Renamed fdrawtree qualifiers to make unambiguous names shorter: xmarginray => xrayshade; ymarginray => yrayshade for RayShade output image size in pixels. Release 3.67 Updated all code to phylip 3.67. The output of several programs has changed - see the phylip release notes for details. Programs with changed output are all the molecular clock algorithms : fcontml, fdnaml, fdnamlk, proml, promlk, restml. ftreedist and ftreedistpair branch score distance output has also changed in phylip 3.67 fontfile now works correctly in fdrawgram and fdrawtree. The fontfile value must be a known phylip font in the data directory. previewing with X11 was broken. Setting "none" for previewer no longer prompts before plotting. frestdist had a memory access error reported by valgrind. The same error was found in the original phylip source, and fixed by copying in code changes in phylip 3.66 fclique had an error reading the ancestors file. Results now agree with those from the original phylip 3.6 code. fdnamove had an internal overflow. Array chh[11] needed at least one extra space. PHYLIPNEW-3.69.650/COPYING0000664000175000017500000000202511614250727011325 00000000000000Copyright Notice for PHYLIP 3.69 The following copyright notice is intended to cover all source code, all documentation, and all executable programs of the PHYLIP package. (c) Copyright 1980-2008. University of Washington. All rights reserved. Permission is granted to reproduce, perform, and modify these programs and documentation files. Permission is granted to distribute or provide access to these programs provided that this copyright notice is not removed, the programs are not integrated with or called by any product or service that generates revenue, and that your distribution of these documentation files and programs are free. Any modified versions of these materials that are distributed or accessible shall indicate that they are based on these programs. Institutions of higher education are granted permission to distribute this material to their students and staff for a fee to recover distribution costs. Permission requests for any other distribution of these program should be directed to license(at)u.washington.edu PHYLIPNEW-3.69.650/include/0002775000175000017500000000000012171071713011773 500000000000000PHYLIPNEW-3.69.650/include/discrete.h0000664000175000017500000001323410075203710013662 00000000000000 /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* discrete.h: included in pars */ typedef struct gbases { discbaseptr discbase; struct gbases *next; } gbases; struct LOC_hyptrav { boolean bottom; node *r; discbaseptr hypset; boolean maybe, nonzero; unsigned char tempset, anc; } ; extern long nonodes, endsite, outgrno, nextree, which; extern boolean interleaved, printdata, outgropt, treeprint, dotdiff; extern steptr weight, category, alias, location, ally; extern sequence y, convtab; #ifndef OLDC /*function prototypes*/ void discrete_inputdata(AjPPhyloState, long); void alloctree(pointarray *, long, boolean); void setuptree(pointarray, long, boolean); void alloctip(node *, long *, unsigned char *); void sitesort(long, steptr); void sitecombine(long); void sitescrunch(long); void makevalues(pointarray, long *, unsigned char *, boolean); void fillin(node *, node *, node *); long getlargest(long *); void multifillin(node *, node *, long); void sumnsteps(node *, node *, node *, long, long); void sumnsteps2(node *, node *, node *, long, long, long *); void multisumnsteps(node *, node *, long, long, long *); void multisumnsteps2(node *); void findoutgroup(node *, boolean *); boolean alltips(node *, node *); void gdispose(node *, node **, pointarray); void preorder(node *, node *, node *, node *, node *, node *, long ); void updatenumdesc(node *, node *, long); void add(node *, node *, node *, node **, boolean, pointarray, node **, long *, unsigned char *); void findbelow(node **, node *, node *); void re_move(node *, node **, node **, boolean, pointarray, node **, long *, unsigned char *); void postorder(node *); void getnufork(node **, node **, pointarray, long *, unsigned char *); void reroot(node *, node *); void reroot2(node *, node *); void reroot3(node *, node *, node *, node *, node **); void savetraverse(node *); void newindex(long, node *); void flipindexes(long, pointarray); boolean parentinmulti(node *); long sibsvisited(node *, long *); long smallest(node *, long *); void bintomulti(node **, node **, node **, long *, unsigned char *); void backtobinary(node **, node *, node **); boolean outgrin(node *, node *); void flipnodes(node *, node *); void moveleft(node *, node *, node **); void savetree(node *, long *, pointarray, node **, long *, unsigned char *); void addnsave(node *, node *, node *, node **, node **, boolean multf, pointarray , long *, long *, unsigned char *); void addbestever(long *, long *, long, boolean, long *, bestelm *); void addtiedtree(long, long *, long, boolean, long *, bestelm *); void clearcollapse(pointarray); void clearbottom(pointarray); void collabranch(node *,node *,node *); boolean allcommonbases(node *, node *, boolean *); void findbottom(node *, node **); boolean moresteps(node *, node *); boolean passdown(node *, node *, node *, node *, node *, node *, node *, node *, node *, boolean); boolean trycollapdesc(node *, node *, node *, node *, node *, node *, node *, node *, node *, boolean ,long *, unsigned char *); void setbottom(node *); boolean zeroinsubtree(node *, node *, node *, node *, node *, node *, node *, node *, boolean , node *, long *, unsigned char *); boolean collapsible(node *, node *, node *, node *, node *, node *, node *, node *, boolean , node *, long *, unsigned char *, pointarray); void replaceback(node **,node *,node *,node **,long *,unsigned char *); void putback(node *, node *, node *, node **); void savelocrearr(node *, node *, node *, node *, node *, node *, node *, node *, node *, node **, long, long *, boolean, boolean, boolean *, long *, bestelm *, pointarray, node **, long *, unsigned char *); void clearvisited(pointarray); void hyprint(long,long,struct LOC_hyptrav *, pointarray); void gnubase(gbases **, gbases **, long); void chuckbase(gbases *, gbases **); void hyptrav(node *, discbaseptr, long, long, boolean, pointarray, gbases **); void hypstates(long, node *, pointarray, gbases **); void initbranchlen(node *); void initmin(node *, long, boolean); void initbase(node *, long); void inittreetrav(node *, long); void compmin(node *, node *); void minpostorder(node *, pointarray); void branchlength(node *, node *, double *, pointarray); void printbranchlengths(node *); void branchlentrav(node *, node *, long, long, double *, pointarray); void treelength(node *, long, pointarray); void coordinates(node *, long *, double , long *); void drawline(long, double, node *); void printree(node *, double); void writesteps(long, boolean, steptr, node *); void treeout(node *, long, long *, node *); void drawline3(long, double, node *); void standev(long, long, long, double, double *, long **, longer); void freetip(node *); void freenontip(node *); void freenodes(long, pointarray); void freenode(node **); void freetree(long, pointarray); void freegarbage(gbases **); void freegrbg(node **); void treeout3(node *p, long nextree, long *col, node *root); void collapsetree(node *, node *, node **, pointarray, long *, unsigned char *); void collapsebestrees(node **, node **, pointarray, bestelm *, long *, long *, unsigned char *, long, boolean, boolean); /*function prototypes*/ #endif PHYLIPNEW-3.69.650/include/dollo.h0000664000175000017500000000177207712247476013222 00000000000000 /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* dollo.h: included in dollop, dolmove & dolpenny */ #ifndef OLDC /* function prototypes */ void correct(node *, long, boolean, bitptr, pointptr); void fillin(node *); void postorder(node *); void count(long *, bitptr, steptr, steptr); void filltrav(node *); void hyprint(struct htrav_vars *, boolean *, bitptr, Char *); void hyptrav(node *, boolean *, bitptr, long, boolean, Char *, pointptr, gbit *, bitptr, bitptr); void hypstates(long, boolean, Char *, pointptr, node *, gbit *, bitptr, bitptr); void drawline(long, double, node *); void printree(double, boolean, node *); void writesteps(boolean, boolean, steptr); /* function prototypes */ #endif PHYLIPNEW-3.69.650/include/cons.h0000664000175000017500000000313311727433657013043 00000000000000 #define OVER 8 #define ADJACENT_PAIRS 1 #define CORR_IN_1_AND_2 2 #define ALL_IN_1_AND_2 3 #define NO_PAIRING 4 #define ALL_IN_FIRST 5 #define TREE1 8 #define TREE2 9 #define FULL_MATRIX 11 #define VERBOSE 22 #define SPARSE 33 /* Number of columns per block in a matrix output */ #define COLUMNS_PER_BLOCK 10 typedef struct pattern_elm { group_type *apattern; long *patternsize; double *length; } pattern_elm; #ifndef OLDC /* function prototypes */ void initconsnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); void phylipcompress(long *); void sort(long); void eliminate(long *, long *); void printset(long); void bigsubset(group_type *, long); void recontraverse(node **, group_type *, long, long *); void reconstruct(long); void coordinates(node *, long *); void drawline(long i); void printree(void); void consensus(pattern_elm ***, long); void rehash(void); void enternodeset(node *r); void accumulate(node *); void dupname2(Char *, node *, node *); void dupname(node *); void missingname(node *); void gdispose(node *); void initreenode(node *); void reroot(node *, long *); void store_pattern (pattern_elm ***, int); boolean samename(naym, plotstring); void reordertips(void); void read_groups (pattern_elm ****, long , long, AjPPhyloTree *); void clean_up_final(void); void clean_up_final_consense(void); void freegrbg(node **); /* function prototypes */ #endif extern long setsz; PHYLIPNEW-3.69.650/include/seq.h0000664000175000017500000001745011253743724012671 00000000000000/* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* seq.h: included in dnacomp, dnadist, dnainvar, dnaml, dnamlk, dnamove, dnapars, dnapenny, protdist, protpars, restdist & restml */ #ifndef SEQ_H #define SEQ_H #define ebcdic EBCDIC #define MAXNCH 20 /* All of this came over from cons.h -plc*/ #define OVER 7 #define ADJACENT_PAIRS 1 #define CORR_IN_1_AND_2 2 #define ALL_IN_1_AND_2 3 #define NO_PAIRING 4 #define ALL_IN_FIRST 5 #define TREE1 8 #define TREE2 9 #define FULL_MATRIX 11 #define VERBOSE 22 #define SPARSE 33 /* Number of columns per block in a matrix output */ #define COLUMNS_PER_BLOCK 10 /*end move*/ typedef struct gbases { baseptr base; struct gbases *next; } gbases; typedef struct nuview_data { /* A big 'ol collection of pointers used in nuview */ double *yy, *wwzz, *vvzz, *vzsumr, *vzsumy, *sum, *sumr, *sumy; sitelike *xx; } nuview_data; struct LOC_hyptrav { boolean bottom; node *r; long *hypset; boolean maybe, nonzero; long tempset, anc; } ; extern long nonodes, endsite, outgrno, nextree, which; extern boolean interleaved, printdata, outgropt, treeprint, dotdiff, transvp; extern steptr weight, category, alias, location, ally; extern sequence y; #ifndef OLDC /* function prototypes */ void free_all_x_in_array (long, pointarray); void free_all_x2_in_array (long, pointarray); void alloctemp(node **, long *, long); void freetemp(node **); void freetree2 (pointarray, long); void alloctree(pointarray *, long, boolean); void allocx(long, long, pointarray, boolean); void prot_allocx(long, long, pointarray, boolean); void setuptree(pointarray, long, boolean); void setuptree2(tree *); void alloctip(node *, long *); void getbasefreqs(double, double, double, double, double *, double *, double *, double *, double *, double *, double *, double *xi, double *, double *, boolean, boolean); void empiricalfreqs(double *,double *,double *,double *,steptr,pointarray); void sitesort(long, steptr); void sitecombine(long); void sitescrunch(long); void sitesort2(long, steptr); void sitecombine2(long, steptr); void sitescrunch2(long, long, long, steptr); void makevalues(pointarray, long *, boolean); void makevalues2(long, pointarray, long, long, sequence, steptr); void fillin(node *, node *, node *); long getlargest(long *); void multifillin(node *, node *, long); void sumnsteps(node *, node *, node *, long, long); void sumnsteps2(node *, node *, node *, long, long, long *); void multisumnsteps(node *, node *, long, long, long *); void multisumnsteps2(node *); boolean alltips(node *, node *); void gdispose(node *, node **, pointarray); void preorder(node *, node *, node *, node *, node *, node *, long); void updatenumdesc(node *, node *, long); void add(node *,node *,node *,node **,boolean,pointarray,node **,long *); void findbelow(node **below, node *item, node *fork); void re_move(node *item, node **fork, node **root, boolean recompute, pointarray, node **, long *); void postorder(node *p); void getnufork(node **, node **, pointarray, long *); void reroot(node *, node *); void reroot2(node *, node *); void reroot3(node *, node *, node *, node *, node **); void savetraverse(node *); void newindex(long, node *); void flipindexes(long, pointarray); boolean parentinmulti(node *); long sibsvisited(node *, long *); long smallest(node *, long *); void bintomulti(node **, node **, node **, long *); void backtobinary(node **, node *, node **); boolean outgrin(node *, node *); void flipnodes(node *, node *); void moveleft(node *, node *, node **); void savetree(node *, long *, pointarray, node **, long *); void addnsave(node *, node *, node *, node **, node **,boolean, pointarray, long *, long *); void addbestever(long *, long *, long, boolean, long *, bestelm *); void addtiedtree(long, long *, long, boolean,long *, bestelm *); void clearcollapse(pointarray); void clearbottom(pointarray); void collabranch(node *, node *, node *); boolean allcommonbases(node *, node *, boolean *); void findbottom(node *, node **); boolean moresteps(node *, node *); boolean passdown(node *, node *, node *, node *, node *, node *, node *, node *, node *, boolean); boolean trycollapdesc(node *, node *, node *, node *, node *, node *, node *, node *, node *, boolean , long *); void setbottom(node *); boolean zeroinsubtree(node *, node *, node *, node *, node *, node *, node *, node *, boolean, node *, long *); boolean collapsible(node *, node *, node *, node *, node *, node *, node *, node *, boolean, node *, long *, pointarray); void replaceback(node **, node *, node *, node **, long *); void putback(node *, node *, node *, node **); void savelocrearr(node *, node *, node *, node *, node *, node *, node *, node *, node *, node **, long, long *, boolean, boolean , boolean *, long *, bestelm *, pointarray , node **, long *); void clearvisited(pointarray); void hyprint(long, long, struct LOC_hyptrav *,pointarray, Char *); void gnubase(gbases **, gbases **, long); void chuckbase(gbases *, gbases **); void hyptrav(node *, long *, long, long, boolean,pointarray, gbases **, Char *); void hypstates(long , node *, pointarray, gbases **, Char *); void initbranchlen(node *p); void initmin(node *, long, boolean); void initbase(node *, long); void inittreetrav(node *, long); void compmin(node *, node *); void minpostorder(node *, pointarray); void branchlength(node *,node *,double *,pointarray); void printbranchlengths(node *); void branchlentrav(node *,node *,long,long,double *,pointarray); void treelength(node *, long, pointarray); void coordinates(node *, long *, double, long *); void drawline(long, double, node *); void printree(node *, double); void writesteps(long, boolean, steptr, node *); void treeout(node *, long, long *, node *); void treeout3(node *, long, long *, node *); void fdrawline2(FILE *fp, long i, double scale, tree *curtree); void drawline2(long, double, tree); void drawline3(long, double, node *); void copynode(node *, node *, long); void prot_copynode(node *, node *, long); void copy_(tree *, tree *, long, long); void prot_copy_(tree *, tree *, long, long); void standev(long, long, long, double, double *, long **, longer); void standev2(long, long, long, long, double, double *, double **, steptr, longer); void freetip(node *); void freenontip(node *); void freenodes(long, pointarray); void freenode(node **); void freetree(long, pointarray); void freex(long, pointarray); void freex_notip(long, pointarray); void prot_freex_notip(long nonodes, pointarray treenode); void prot_freex(long nonodes, pointarray treenode); void freegarbage(gbases **); void freegrbg(node **); /* new functions for EMBOSS */ void seq_inputdata(AjPSeqset, long); void inputdata(long); void emboss_treeout(node *, AjPOutfile, long); void emboss_treeout3(node *, AjPOutfile, long, long *, node *); void collapsetree(node *, node *, node **, pointarray, long *); void collapsebestrees(node **, node **, pointarray, bestelm *, long *, long *, long, boolean, boolean); void fix_x(node* p,long site, double maxx, long rcategs); void fix_protx(node* p,long site,double maxx, long rcategs); /*function prototypes*/ #endif #endif /* SEQ_H */ PHYLIPNEW-3.69.650/include/disc.h0000664000175000017500000000540610775447511013024 00000000000000 /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* disc.h: included in mix, move, penny, dollop, dolmove, dolpenny, & clique */ /* node and pointptr used in Dollop, Dolmove, Dolpenny, Move, & Clique */ typedef node **pointptr; /* node and pointptr used in Mix & Penny */ typedef struct node2 { /* describes a tip species or an ancestor */ struct node2 *next, *back; long index; boolean tip, bottom,visited;/* present species are tips of tree */ bitptr fulstte1, fulstte0; /* see in PROCEDURE fillin */ bitptr empstte1, empstte0; /* see in PROCEDURE fillin */ bitptr fulsteps,empsteps; long xcoord, ycoord, ymin; /* used by printree */ long ymax; } node2; typedef node2 **pointptr2; typedef struct gbit { bitptr bits_; struct gbit *next; } gbit; typedef struct htrav_vars { node *r; boolean bottom, nonzero; gbit *zerobelow, *onebelow; } htrav_vars; typedef struct htrav_vars2 { node2 *r; boolean bottom, maybe, nonzero; gbit *zerobelow, *onebelow; } htrav_vars2; extern long chars, nonodes, nextree, which; /* nonodes = number of nodes in tree * * chars = number of binary characters */ extern steptr weight, extras; extern boolean printdata; #ifndef OLDC /*function prototypes*/ void disc_inputdata(AjPPhyloState, pointptr, boolean, boolean, FILE *); void disc_inputdata2(AjPPhyloState, pointptr2); void alloctree(pointptr *); void alloctree2(pointptr2 *); void setuptree(pointptr); void setuptree2(pointptr2); void inputancestors(boolean *, boolean *); void inputancestorsstr(AjPStr, boolean *, boolean *); void inputancestorsnew(boolean *, boolean *); void printancestors(FILE *, boolean *, boolean *); void add(node *, node *, node *, node **, pointptr); void add2(node *, node *, node *, node **, boolean, boolean, pointptr); void add3(node2 *, node2 *, node2 *, node2 **, pointptr2); void re_move(node **, node **, node **, pointptr); void re_move2(node **, node **, node **, boolean *, pointptr); void re_move3(node2 **, node2 **, node2 **, pointptr2); void coordinates(node *, long *, double , long *); void coordinates2(node2 *, long *); void treeout(node *, long, long *, node *); void treeout2(node2 *, long *, node2 *); void standev(long, long, double, double *, double **, longer); void guesstates(Char *); void freegarbage(gbit **); void disc_gnu(gbit **, gbit **); void disc_chuck(gbit *, gbit **); /*function prototypes*/ #endif PHYLIPNEW-3.69.650/include/cont.h0000664000175000017500000000123010052145534013020 00000000000000 /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* cont.h: included in contml & contrast */ #ifndef OLDC /*function prototypes*/ void alloctree(pointarray *, long); void freetree(pointarray *, long); void setuptree(tree *, long); void allocview(tree *, long, long); void freeview(tree *, long); void standev2(long, long, long, long, double, double *, double **, longer); /*function prototypes*/ #endif PHYLIPNEW-3.69.650/include/wagner.h0000664000175000017500000000237407712247476013373 00000000000000 /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* wagner.h: included in move, mix & penny */ #ifndef OLDC /* function prototypes */ void inputmixture(bitptr); void inputmixturestr(AjPStr, bitptr); void printmixture(FILE *, bitptr); void fillin(node2 *,long, boolean, bitptr, bitptr); void count(long *, bitptr, steptr, steptr); void postorder(node2 *, long, boolean, bitptr, bitptr); void cpostorder(node2 *, boolean, bitptr, steptr, steptr); void filltrav(node2 *, long, boolean, bitptr, bitptr); void hyprint(struct htrav_vars2 *,boolean,boolean,boolean,bitptr,Char *); void hyptrav(node2 *, boolean, bitptr, long, boolean, boolean, bitptr, bitptr, bitptr, pointptr2, Char *, gbit *); void hypstates(long, boolean, boolean, boolean, node2 *, bitptr, bitptr, bitptr, pointptr2, Char *, gbit *); void drawline(long, double, node2 *); void printree(boolean, boolean, boolean, node2 *); void writesteps(boolean, steptr); /* function prototypes */ #endif PHYLIPNEW-3.69.650/include/dist.h0000664000175000017500000000210610052145534013023 00000000000000 /* version 3.6. (c) Copyright 1993-2000 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* dist.h: included in fitch, kitsch, & neighbor */ #define over 60 typedef long *intvector; typedef node **pointptr; #ifndef OLDC /*function prototypes*/ void alloctree(pointptr *, long); void freetree(pointptr *, long); void allocd(long, pointptr); void freed(long, pointptr); void allocw(long, pointptr); void freew(long, pointptr); void setuptree(tree *, long); void dist_inputdata(AjPPhyloDist, boolean, boolean, boolean, boolean, vector *, intvector *); void coordinates(node *, double, long *, double *, node *, boolean); void drawline(long, double, node *, boolean); void printree(node *, boolean, boolean, boolean); void treeoutr(node *, long *, tree *); void treeout(node *, long *, double, boolean, node *); /*function prototypes*/ #endif PHYLIPNEW-3.69.650/include/moves.h0000664000175000017500000000167110061567662013231 00000000000000 /* moves.h: included in dnamove, move, dolmove, & retree */ typedef enum { left, downn, upp, right } adjwindow; #ifndef OLDC /* function prototypes */ void inpnum(long *, boolean *); void prereverse(boolean); void postreverse(boolean); void chwrite(Char, long, long *, long, long); void nnwrite(long, long, long *, long, long); void stwrite(const char *,long,long *,long,long); void help(const char *); void window(adjwindow, long *, long *, long, long, long, long, long, long, boolean); void pregraph(boolean); void pregraph2(boolean); void postgraph(boolean); void postgraph2(boolean); void nextinc(long *, long *, long *, long, long, boolean *, steptr, steptr); void nextchar(long *, long *, long *, long, long, boolean *); void prevchar(long *, long *, long *, long, long, boolean *); void show(long *, long *, long *, long, long, boolean *); /* function prototypes */ #endif PHYLIPNEW-3.69.650/include/macface.h0000664000175000017500000000122207712247476013456 00000000000000 #ifndef LAMARC_MAC_INTERFACE #define LAMARC_MAC_INTERFACE /* version 3.6. (c) Copyright 1997-2000 by the University of Washington. * mac interfacing * this is only inlcuded and used for macs/powermacs * and needs to be used in conjunction with the customized LAMARC-library * which is a modification of the Metrowerks custom library v10. * This file defines a function to fix the outputfiles which was written by * Sean Lamont 1994. * * Peter Beerli 1997 * beerli@genetics.washington.edu */ #endif /*LAMARC_MAC_INTERFACE*/ #ifndef OLDC /* function prototypes */ void fixmacfile(char *); void eventloop(void); /* function prototypes */ #endif PHYLIPNEW-3.69.650/include/io.h0000664000175000017500000000014207712247476012506 00000000000000#define MAC_OFFSET 60 #include #include #include #define DRAW PHYLIPNEW-3.69.650/include/interface.h0000664000175000017500000000167110052145534014026 00000000000000 #ifndef _INTERFACE_H_ #define _INTERFACE_H_ /* interface.h: access to interface.c, a 2 window text/graphics environment, with a scrolling text window and c-like I/O functions. This also sets up some defines for the standard c stuff. */ #ifdef OSX_CARBON #define MAC_OFFSET 60 #endif /* function prototypes */ void macsetup(char *,char *); void queryevent(); void eventloop(); void process_window_closure(); int handleevent(); void textmode(); void gfxmode(); void scroll(); int process_char(); void paint_gfx_window(); #ifndef OSX_CARBON void resize_gfx_window(EventRecord ev); #endif void menu_select(long what); /*debug void fixmacfile(char *);*/ typedef struct { char* fn; double* xo; double* yo; double* scale; long nt; void* root; } mpreviewparams; extern mpreviewparams macpreviewparms; /* function prototypes */ #ifdef __MWERKS__ #define MAC #endif #endif PHYLIPNEW-3.69.650/include/mlclock.h0000664000175000017500000000247710775447511013533 00000000000000#include "phylip.h" /* Uncomment this line to dump details to dnamlk.log */ /* #define DEBUG */ #ifdef DEBUG extern double dump_likelihood_graph(FILE *fp, node *p, double min, double max, double step); extern double dump_likelihood_graph_twonode(FILE *fp, node *p1, double npoints); double dump_likelihood_graph_2d(FILE *fp, node *p1, double npoints); extern void get_limits(node **nodea, double *min, double *max); extern double set_tyme_evaluate(node *, double); #endif /* DEBUG */ typedef double (*evaluator_t)(node *); extern const double MIN_BRANCH_LENGTH; extern const double MIN_ROOT_TYME; /* module initialization */ extern void mlclock_init(tree *t, evaluator_t f); /* check or fix node tymes and branch lengths */ extern boolean all_tymes_valid(node *, double, boolean); /* change node tymes */ extern void setnodetymes(node* p, double newtyme); /* limits of node movement */ extern double min_child_tyme(node *p); extern double parent_tyme(node *p); extern boolean valid_tyme(node *p, double tyme); /* save/restore tymes */ extern void save_tymes(tree* save_tree, double tymes[]); extern void restore_tymes(tree *load_tree, double tymes[]); /* optimize a node tyme */ double maximize(double min_tyme, double cur, double max_tyme, double(*f)(double), double eps, boolean *success); extern boolean makenewv(node *p); PHYLIPNEW-3.69.650/include/draw.h0000664000175000017500000001216610775447511013040 00000000000000 #include "phylip.h" #ifndef X_DISPLAY_MISSING #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif #ifdef MAC #include "interface.h" #endif #include "math.h" #define DEFAULT_STRIPE_HEIGHT 20 #define minus '-' #define stripewidth 3000L #define maxstripedepth 3500 #define fontsize 3800 #define pi 3.1415926535897932384626433 #define ebcdic EBCDIC #define segments 40 #define xstart 10 #define ystart 35 #define LF 10 #define CR 13 #define escape (ebcdic ? '\'' : '\033') #define null '\000' #define AFMDIR "/usr/lib/transcript/" /* note trailing slash */ typedef unsigned char byte; /*typedef char byte; */ typedef enum {treepen, labelpen} pentype; typedef enum {lw,hp,tek,ibm,mac,houston,decregis,epson,oki,fig, citoh,toshiba,pcx,pcl,pict,ray,pov,xpreview,xbm,bmp, gif,idraw,vrml,winpreview,other} plottertype; typedef enum {vertical, horizontal} growth; typedef enum {cladogram,phenogram,curvogram, eurogram,swoopogram,circular} treestyle; typedef enum {penup,pendown} pensttstype; typedef enum {plotnow, changeparms, quitnow} winactiontype; typedef short fonttype[fontsize]; typedef Char *striparray; typedef striparray striptype[maxstripedepth]; struct LOC_plottext { /* Local variables for plottext: */ double height, compress; short *font; short coord; double heightfont, xfactor, yfactor, xfont, yfont, xplot, yplot, sinslope, cosslope, xx, yy; pensttstype penstatus; } ; typedef struct colortype { const char *name; double red, green, blue; } colortype; typedef struct vrmllighttype { double intensity, x, y, z; } vrmllighttype; double lengthtext(char *, long, char *, fonttype); double heighttext(fonttype, char *); void plotrparms(long ntips); void clearit(void); void getplotter(char); void getpreview(void); const char *figfontname(int id); boolean isfigfont(char *); void plot(pensttstype, double, double); void curvespline(double, double, double, double, boolean, long); void swoopspline(double x1, double y1, double x2, double y2, double x3, double y3, boolean sense, long segs); void changepen(pentype pen); void plottext(Char *pstring,long nchars,double height_,double cmpress2, double x,double y,double slope,short *font_,char *fontname); void loadfont(short *font, char *application); long allocstripe(striptype stripe, long x, long y); boolean plotpreview(char *, double *, double *, double *, long , node *); void initplotter(long ntips, char *fontname); void drawit(char *fontname, double *xoffset, double *yoffset, long numlines, node *root); void finishplotter(void); void write_bmp_header(FILE *, int, int); void turn_rows(byte *, int, int); void write_full_pic(byte *, int); void translate_stripe_to_bmp(striptype *stripe, byte *full_pic, int increment, int width, int div, int *total_bytes); void plottree(node *, node *); void plotlabels(char *fontname); void pout(long); double computeAngle(double oldx, double oldy, double newx, double newy); boolean plot_without_preview(char *, double *, double *, double *, long, node *); /* For povray, added by Dan F. */ #define TREE_TEXTURE "T_Tree\0" #define NAME_TEXTURE "T_Name\0" #ifndef X_DISPLAY_MISSING Display *display; /* the X display */ extern Window mainwin; /* the main display window */ int x, y; /* the corner of the window */ unsigned int width, height; /* the width and height of the window */ #define FONT "-*-new century schoolbook-medium-r-*-*-14-*" char *fontrsc; /* the font resource */ XFontStruct *fontst; /* the font strcture for the font */ XGCValues gcv; /* graphics context values */ GC gc1; /* a graphics context */ XtAppContext appcontext; Widget toplevel; int nargc; char** nargv; extern String res[16]; #define DEFGEOMETRY "600x400+20+50" #endif #define LARGE_BUF_LENGTH 500 extern char fontname[LARGE_BUF_LENGTH]; /* the font name to use */ #ifdef WIN32 #define DEFPLOTTER lw #define DEFPREV winpreview #endif #ifdef DOS #define DEFPLOTTER lw #define DEFPREV ibm #endif #ifdef MAC #ifdef OSX_CARBON #define DEFPLOTTER lw #define DEFPREV mac #else #define DEFPLOTTER pict #define DEFPREV mac #endif #endif #ifdef VMS #define DEFPLOTTER lw #define DEFPREV decregis #endif #ifndef DOS #ifndef MAC #ifndef VMS #ifndef WIN32 #define DEFPLOTTER lw #ifndef X_DISPLAY_MISSING #define DEFPREV xpreview #endif #ifdef X_DISPLAY_MISSING #define DEFPREV tek #endif #endif #endif #endif #endif /* Define SEEK_SET (needed for fseek()) for machines that haven't got it already, */ #ifndef SEEK_SET #define SEEK_SET 0 #endif PHYLIPNEW-3.69.650/include/phylip.h0000664000175000017500000006244311774775165013424 00000000000000#ifndef _PHYLIP_H_ #define _PHYLIP_H_ /* version 3.6b. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, Mike Palczewski, Doug Buxton and Dan Fineman. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ /* now done via config.h */ /*#define VERSION "3.69"*/ /* Debugging options */ /* Define this to disable assertions ... but this leads to 'unused variable' warnings */ /*#define NDEBUG*/ /* Define this to enable debugging code */ /* #define DEBUG */ /* machine-specific stuff: based on a number of factors in the library stdlib.h, we will try to determine what kind of machine/compiler this program is being built on. However, it doesn't always succeed. However, if you have ANSI conforming C, it will probably work. We will try to figure out machine type based on defines in stdio, and compiler-defined things as well.: */ #include #include #include "emboss.h" typedef struct EmbossSTreeNode { AjPStr Name; } EmbossOTreeNode; #define EmbossPTreeNode EmbossOTreeNode* #ifndef X_DISPLAY_MISSING /* X11 is available */ #define X #endif #ifdef WIN32 #include void phyClearScreen(void); void phySaveConsoleAttributes(void); void phySetConsoleAttributes(void); void phyRestoreConsoleAttributes(void); void phyFillScreenColor(void); #endif #ifdef GNUDOS #define DJGPP #define DOS #endif #ifdef THINK_C #define MAC #endif #ifdef __MWERKS__ #ifndef WIN32 #define MAC #endif #endif /* ** To keep ACD clean let's exclude the CMS (VMS code management system?) ** definitions here */ #define EBCDIC false /* ** these definitions are used in openfile calls ** and match the expected ACD qualifiers for each in/out file type ** They are changed to match ACD standards by appending "file" ** but the shorter phylip names will also work */ #define INFILE "infile" #define OUTFILE "outfile" #define FONTFILE "fontfile" /* on unix this might be /usr/local/lib/fontfile */ #define PLOTFILE "plotfile" #define INTREE "intreefile" #define INTREE2 "bintreefile" /* treedist only */ #define OUTTREE "outtreefile" #define CATFILE "categoriesfile" #define WEIGHTFILE "weightsfile" #define ANCFILE "ancestorsfile" #define MIXFILE "mixturefile" #define FACTFILE "factorsfile" #ifdef L_ctermid /* try and detect for sysV or V7. */ #define SYSTEM_FIVE #endif #ifdef sequent #define SYSTEM_FIVE #endif #ifndef SYSTEM_FIVE #include # if defined(_STDLIB_H_) || defined(_H_STDLIB) || defined(H_SCCSID) || defined(unix) # define UNIX # define MACHINE_TYPE "BSD Unix C" # endif #endif #ifdef __STDIO_LOADED #define VMS #define MACHINE_TYPE "VAX/VMS C" #endif #ifdef __WATCOMC__ #define QUICKC #define WATCOM #define DOS #include "graph.h" #endif /* watcom-c has graphics library calls that are almost identical to * * quick-c, so the "QUICKC" symbol name stays. */ #ifdef _QC #define MACHINE_TYPE "MS-DOS / Quick C" #define QUICKC #include "graph.h" #define DOS #endif #ifdef _DOS_MODE #define MACHINE_TYPE "MS-DOS /Microsoft C " #define DOS /* DOS is always defined if on a DOS machine */ #define MSC /* MSC is defined for microsoft C */ #endif #ifdef __MSDOS__ /* TURBO c compiler, ONLY (no other DOS C compilers) */ #define DOS #define TURBOC #include #include #endif #ifdef DJGPP /* DJ Delorie's original gnu C/C++ port */ #include #endif #ifndef MACHINE_TYPE #define MACHINE_TYPE "ANSI C" #endif #ifdef DOS #define MALLOCRETURN void #else #define MALLOCRETURN void #endif #ifdef VMS #define signed /* signed doesn't exist in VMS */ #endif /* default screen types */ /* if on a DOS but not a Windows system can use IBM PC screen controls */ #ifdef DOS #ifndef WIN32 #define IBMCRT true #define ANSICRT false #endif #endif /* if on a Mac cannot use screen controls */ #ifdef MAC #define IBMCRT false #define ANSICRT false #endif /* if on a Windows system cannot use screen controls */ #ifdef WIN32 #define IBMCRT true #define ANSICRT false #endif /* otherwise, let's assume we are on a Linux or Unix system with no ANSI terminal controls */ #ifndef MAC #ifndef DOS #ifndef WIN32 #define IBMCRT false #define ANSICRT false #endif #endif #endif #ifdef DJGPP #undef MALLOCRETURN #define MALLOCRETURN void #endif /* includes: */ #ifdef UNIX #include #else #include #endif #include #include #include #ifdef MAC #ifdef DRAW #include "interface.h" #else #include "macface.h" #endif #define getch gettch #endif /* directory delimiters */ #ifdef MAC #define DELIMITER ':' #else #ifdef WIN32 #define DELIMITER '\\' #else #define DELIMITER '/' #endif #endif #define FClose(file) if (file) fclose(file) ; file=NULL #define Malloc(x) mymalloc((long)x) typedef void *Anyptr; #define Signed signed #define Const const #define Volatile volatile #define Char char /* Characters (not bytes) */ #define Static static /* Private global funcs and vars */ #define Local static /* Nested functions */ typedef unsigned char boolean; #define true 1 #define false 0 /* Number of items per machine word in set. * Used in consensus programs and clique */ #define SETBITS 31 MALLOCRETURN *mymalloc(long); /*** UI behavior ***/ /* Set to 1 to not ask before overwriting files */ #define OVERWRITE_FILES 0 /*** Static memory parameters ***/ #define FNMLNGTH 200 /* length of array to store a file name */ #define MAXNCH 20 /* must be greater than or equal to nmlngth */ #define nmlngth 10 /* number of characters in species name */ #define maxcategs 9 /* maximum number of site types */ #define maxcategs2 11 /* maximum number of site types + 2 */ #define point "." #define pointe '.' #define down 2 #define MAXSHIMOTREES 100 /*** Maximum likelihood parameters ***/ /* Used in proml, promlk, dnaml, dnamlk, etc. */ #define UNDEFINED 1.0 /* undefined or invalid likelihood */ #define smoothings 4 /* number of passes through smoothing algorithm */ #define iterations 8 /* number of iterates for each branch */ #define epsilon 0.0001 /* small number used in makenewv */ #define EPSILON 0.00001 /* small number used in hermite root-finding */ #define initialv 0.1 /* starting branch length unless otherwise */ #define INSERT_MIN_TYME 0.0001 /* Minimum tyme between nodes during inserts */ #define over 60 /* maximum width all branches of tree on screen */ #define LIKE_EPSILON 1e-10 /* Estimate of round-off error in likelihood * calculations. */ /*** Math constants ***/ #define SQRTPI 1.7724538509055160273 #define SQRT2 1.4142135623730950488 /*** Rearrangement parameters ***/ #define NLRSAVES 5 /* number of views that need to be saved during local * * rearrangement */ /*** Output options ***/ /* Number of significant figures to display in numeric output */ #define PRECISION 6 /* Maximum line length of matrix output - 0 for unlimited */ #define OUTPUT_TEXTWIDTH 78 /** output_matrix() flags **/ /* Block output: Matrices are vertically split into blocks that * fit within OUTPUT_TEXTWIDTH columns */ #define MAT_BLOCK 0x1 /* Lower triangle: Values on or above the diagonal are not printed */ #define MAT_LOWER 0x2 /* Print a border between headings and data */ #define MAT_BORDER 0x4 /* Do not print the column header */ #define MAT_NOHEAD 0x8 /* Output the number of columns before the matrix */ #define MAT_PCOLS 0x10 /* Do not enforce maximum line width */ #define MAT_NOBREAK 0x20 /* Pad row header with spaces to 10 char */ #define MAT_PADHEAD 0x40 /* Human-readable format. */ #define MAT_HUMAN MAT_BLOCK /* Machine-readable format. */ #define MAT_MACHINE (MAT_PCOLS | MAT_NOHEAD | MAT_PADHEAD) /* Lower-triangular format. */ #define MAT_LOWERTRI (MAT_LOWER | MAT_MACHINE) typedef long *steptr; typedef long longer[6]; typedef char naym[MAXNCH]; typedef long *bitptr; typedef double raterootarray[maxcategs2][maxcategs2]; typedef struct bestelm { long *btree; boolean gloreange; boolean locreange; boolean collapse; } bestelm; extern FILE *infile, *outfile, *intree, *intree2, *outtree, *weightfile, *catfile, *ancfile, *mixfile, *factfile; extern AjPFile embossinfile; extern AjPFile embossoutfile; extern AjPFile embossintre; extern AjPFile embossintree2; extern AjPFile embossouttree; extern AjPFile embossweightfile; extern AjPFile embosscatfile; extern AjPFile embossancfile; extern AjPFile embossmixfile; extern AjPFile embossfactfile; extern long spp, words, bits; extern boolean ibmpc, ansi, tranvsp; extern naym *nayme; /* names of species */ #define ebcdic EBCDIC typedef Char plotstring[MAXNCH]; /* Approx. 1GB, used to test for memory request errors */ #define TOO_MUCH_MEMORY 1000000000 /* The below pre-processor commands define the type used to store group arrays. We can't use #elif for metrowerks, so we use cascaded if statements */ #include /* minimum double we feel safe with, anything less will be considered underflow */ #define MIN_DOUBLE 10e-100 /* K&R says that there should be a plus in front of the number, but no machine we've seen actually uses one; we'll include it just in case. */ #define MAX_32BITS 2147483647 #define MAX_32BITS_PLUS +2147483647 /* If ints are 4 bytes, use them */ #if INT_MAX == MAX_32BITS typedef int group_type; #else #if INT_MAX == MAX_32BITS_PLUS typedef int group_type; #else /* Else, if longs are 4 bytes, use them */ #if LONG_MAX == MAX_32BITS typedef long group_type; #else #if LONG_MAX == MAX_32BITS_PLUS typedef long group_type; /* Default to longs */ #else typedef long group_type; #endif #endif #endif #endif #define maxuser 1000 /* maximum number of user-defined trees */ typedef Char **sequence; typedef enum { A, C, G, T, O } bases; typedef enum { alanine, arginine, asparagine, aspartic, cysteine, glutamine, glutamic, glycine, histidine, isoleucine, leucine, lysine, methionine, phenylalanine, proline, serine, threonine, tryptophan, tyrosine, valine } acids; /* for Pars */ typedef enum { zero = 0, one, two, three, four, five, six, seven } discbases; /* for Protpars */ typedef enum { ala, arg, asn, asp, cys, gln, glu, gly, his, ileu, leu, lys, met, phe, pro, ser1, ser2, thr, trp, tyr, val, del, stop, asx, glx, ser, unk, quest } aas; typedef double sitelike[(long)T - (long)A + 1]; /* used in dnaml, dnadist */ typedef double psitelike[(long)valine - (long)alanine + 1]; /* used in proml */ typedef long *baseptr; /* baseptr used in dnapars, dnacomp & dnapenny */ typedef long *baseptr2; /* baseptr used in dnamove */ typedef unsigned char *discbaseptr; /* discbaseptr used in pars */ typedef sitelike *ratelike; /* used in dnaml ... */ typedef psitelike *pratelike; /* used in proml */ typedef ratelike *phenotype; /* phenotype used in dnaml, dnamlk, dnadist */ typedef pratelike *pphenotype; /* phenotype used in proml */ typedef double *sitelike2; typedef sitelike2 *phenotype2; /* phenotype2 used in restml */ typedef double *phenotype3; /* for continuous char programs */ typedef double *vector; /* used in distance programs */ typedef long nucarray[(long)O - (long)A + 1]; typedef long discnucarray[(long)seven - (long)zero + 1]; typedef enum { nocollap, tocollap, undefined } collapstates; typedef enum { bottom, nonbottom, hslength, tip, iter, length, hsnolength, treewt, unittrwt } initops; typedef double **transmatrix; typedef transmatrix *transptr; /* transptr used in restml */ typedef long sitearray[3]; typedef sitearray *seqptr; /* seqptr used in protpars */ typedef struct node { struct node *next, *back; plotstring nayme; long naymlength, tipsabove, index; double times_in_tree; /* Previously known as cons_index */ double xcoord, ycoord; long long_xcoord, long_ycoord; /* for use in cons. */ double oldlen, length, r, theta, oldtheta, width, depth, tipdist, lefttheta, righttheta; group_type *nodeset; /* used by accumulate -plc */ long ymin, ymax; /* used by printree -plc */ boolean haslength; /* haslength used in dnamlk */ boolean iter; /* iter used in dnaml, fitch & restml */ boolean initialized; /* initialized used in dnamlk & restml */ long branchnum; /* branchnum used in restml */ phenotype x; /* x used in dnaml, dnamlk, dnadist */ phenotype2 x2; /* x2 used in restml */ phenotype3 view; /* contml etc */ pphenotype protx; /* protx used in proml */ aas *seq; /* the sequence used in protpars */ seqptr siteset; /* temporary storage for aa's used in protpars*/ double v, deltav, ssq; /* ssq used only in contrast */ double bigv; /* bigv used in contml */ double tyme, oldtyme; /* used in dnamlk */ double t; /* time in kitsch */ boolean sametime; /* bookkeeps scrunched nodes in kitsch */ double weight; /* weight of node used by scrunch in kitsch */ boolean processed; /* used by evaluate in kitsch */ boolean deleted; /* true if node is deleted (retree) */ boolean hasname; /* true if tip has a name (retree) */ double beyond; /* distance beyond this node to most distant tip */ /* (retree) */ boolean deadend; /* true if no undeleted nodes beyond this node */ /* (retree) */ boolean onebranch; /* true if there is one undeleted node beyond */ /* this node (retree) */ struct node *onebranchnode; /* if there is, a pointer to that node (retree)*/ double onebranchlength; /* if there is, the distance from here to there*/ /* (retree) */ boolean onebranchhaslength; /* true if there is a valid combined length*/ /* from here to there (retree) */ collapstates collapse; /* used in dnapars & dnacomp */ boolean tip; boolean bottom; /* used in dnapars & dnacomp, disc char */ boolean visited; /* used in dnapars & dnacomp disc char */ baseptr base; /* the sequence in dnapars/comp/penny */ discbaseptr discbase; /* the sequence in pars */ baseptr2 base2; /* the sequence in dnamove */ baseptr oldbase; /* record previous sequence */ discbaseptr olddiscbase; /* record previous sequence */ long numdesc; /* number of immediate descendants */ nucarray *numnuc; /* bookkeeps number of nucleotides */ discnucarray *discnumnuc; /* bookkeeps number of nucleotides */ steptr numsteps; /* bookkeeps steps */ steptr oldnumsteps; /* record previous steps */ double sumsteps; /* bookkeeps sum of steps */ nucarray cumlengths; /* bookkeeps cummulative minimum lengths */ discnucarray disccumlengths; /* bookkeeps cummulative minimum lengths */ nucarray numreconst; /* bookkeeps number of reconstructions */ discnucarray discnumreconst; /* bookkeeps number of reconstructions */ vector d, w; /* for distance matrix programs */ double dist; /* dist used in fitch */ bitptr stateone, statezero; /* discrete char programs */ long maxpos; /* maxpos used in Clique */ Char state; /* state used in Dnamove, Dolmove & Move */ double* underflows; /* used to record underflow */ } node; typedef node **pointarray; /*** tree structure ***/ typedef struct tree { /* An array of pointers to nodes. Each tip node and ring of nodes has a * unique index starting from one. The nodep array contains pointers to each * one, starting from 0. In the case of internal nodes, the entries in nodep * point to the rootward node in the group. Since the trees are otherwise * entirely symmetrical, except at the root, this is the only way to resolve * parent, child, and sibling relationships. * * Indices in range [0, spp) point to tips, while indices [spp, nonodes) * point to fork nodes */ pointarray nodep; /* A pointer to the first node. Typically, root is used when the tree is rooted, * and points to an internal node with no back link. */ node *root; /* start is used when trees are unrooted. It points to an internal node whose * back link typically points to the outgroup leaf. */ node *start; /* In maximum likelihood programs, the most recent evaluation is stored here */ double likelihood; /* Branch transition matrices for restml */ transptr trans; /* all transition matrices */ long *freetrans; /* an array of indexes of free matrices */ long transindex; /* index of last valid entry in freetrans[] */ } tree; typedef void (*initptr)(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, char **); #ifndef OLDC /* function prototypes */ int filexists(char *); const char* get_command_name (const char *); void EOF_error(void); void getstryng(char *); /*void openfile(FILE **,const char *,const char *,const char *,const char *, const char **);*/ void cleerhome(void); void loopcount(long *, long); double randum(longer); void randumize(longer, long *); double normrand(longer); void uppercase(Char *); /*void initseed(long *, long *, longer);*/ /*void initjumble(long *, long *, longer, long *);*/ /*void initoutgroup(long *, long);*/ /*void initthreshold(double *);*/ /*void initcatn(long *);*/ /*void initcategs(long, double *);*/ /*void initprobcat(long, double *, double *);*/ double logfac (long); double halfroot(double (*func)(long , double), long, double, double); double hermite(long, double); void initlaguerrecat(long, double, double *, double *); void root_hermite(long, double *); void hermite_weight(long, double *, double *); void inithermitcat(long, double, double *, double *); void lgr(long, double, raterootarray); double glaguerre(long, double, double); void initgammacat(long, double, double *, double *); void inithowmany(long *, long); void inithowoften(long *); void initlambda(double *); void initfreqs(double *, double *, double *, double *); void initratio(double *); void initpower(double *); void initdatasets(long *); void justweights(long *); void initterminal(boolean *, boolean *); void initnumlines(long *); void initbestrees(bestelm *, long, boolean); void newline(FILE *, long, long, long); void inputnumbers(long *, long *, long *, long); void inputnumbersold(long *, long *, long *, long); void inputnumbers2(long *, long *, long n); void inputnumbers3(long *, long *); void samenumsp(long *, long); void samenumsp2(long); void readoptions(long *, const char *); void matchoptions(Char *, const char *); void inputweightsstr(AjPStr, long, steptr, boolean *); void inputweightsstrold(AjPStr, long, steptr, boolean *); void inputweightsstr2(AjPStr, long, long, long *, steptr, boolean *, const char *); void printweights(FILE *, long, long, steptr, const char *); void inputcategsstr(AjPStr, long, long, steptr, long, const char *); void printcategs(FILE *, long, steptr, const char *); void inputfactors(long, Char *, boolean *); void inputfactorsstr(AjPStr, long, Char *, boolean *); void printfactors(FILE *, long, Char *, const char *); void headings(long, const char *, const char *); void initname(long); void findtree(boolean *,long *,long,long *,bestelm *); void addtree(long,long *,boolean,long *,bestelm *); long findunrearranged(bestelm *, long, boolean); boolean torearrange(bestelm *, long); void reducebestrees(bestelm *, long *); void shellsort(double *, long *, long); void findch(Char, Char *, long); void findch2(Char, long *, long *, Char *); void findch3(Char, Char *, long, long); void processlength(double *,double *,Char *,boolean *,char **,long *); void writename(long, long, long *); void memerror(void); void odd_malloc(long); void gnu(node **, node **); void chuck(node **, node *); void zeronumnuc(node *, long); void zerodiscnumnuc(node *, long); void allocnontip(node *, long *, long); void allocdiscnontip(node *, long *, unsigned char *, long ); void allocnode(node **, long *, long); void allocdiscnode(node **, long *, unsigned char *, long ); void gnutreenode(node **, node **, long, long, long *); void gnudisctreenode(node **, node **, long , long, long *, unsigned char *); void setupnode(node *, long); node * pnode(tree *t, node *p); long count_sibs (node *); void inittrav (node *); void commentskipper(char **, long *); long countcomma(char *, long *); void hookup(node *, node *); void unhookup(node *, node *); void link_trees(long, long , long, pointarray); void allocate_nodep(pointarray *, char *, long *); void malloc_pheno(node *, long, long); void malloc_ppheno(node *, long, long); long take_name_from_tree (Char *, Char *, char **); void match_names_to_data (Char *, pointarray, node **, long); void addelement(node **, node *, Char *, long *, char **, pointarray, boolean *, boolean *, pointarray, long *, long *, boolean *, node **, initptr,boolean,long); void treeread (char **, node **, pointarray, boolean *, boolean *, pointarray, long *, boolean *, node **, initptr,boolean,long); void addelement2(node *, Char *, long *, char **, pointarray, boolean, double *, boolean *, long *, long *, long, boolean *,boolean, long); void treeread2 (char **, node **, pointarray, boolean, double *, boolean *, boolean *, long *,boolean,long); void exxit (int); void countup(long *loopcount, long maxcount); char gettc(FILE* file); void unroot_r(node* p,node ** nodep, long nonodes); void unroot(tree* t,long nonodes); void unroot_here(node* root, node** nodep, long nonodes); void clear_connections(tree *t, long nonodes); void init(int argc, char** argv); char **stringnames_new(void); void stringnames_delete(char **names); int fieldwidth_double(double val, unsigned int precision); void output_matrix_d(FILE *fp, double **matrix, unsigned long rows, unsigned long cols, char **row_head, char **col_head, int flags); void debugtree (tree *, FILE *); void debugtree2 (pointarray, long, FILE *); /* new functions for EMBOSS */ void inputnumbersseq(AjPSeqset seqset, long *spp, long *chars, long *nonodes, long n); void inputnumbersseq2(AjPSeqset seqset, long *, long *, long n); void inputnumbersfreq(AjPPhyloFreq, long *, long *, long *, long); void inputnumbersstate(AjPPhyloState, long *, long *, long *, long); void inputnumbers2seq(AjPPhyloDist, long *, long *, long n); void samenumspfreq(AjPPhyloFreq, long *, long); void samenumspstate(AjPPhyloState, long *, long); void samenumspseq(AjPSeqset, long *, long); void samenumspseq2(AjPPhyloDist, long); void initnameseq(AjPSeqset, long); void initnamedist(AjPPhyloDist, long); void initnamestate(AjPPhyloState, long); void initnamefreq(AjPPhyloFreq, long); void emboss_initcatn(long *categs); void emboss_initcategs(AjPFloat arrayvals, long categs, double *rate); void sgetch(Char *, long *, char **); void getch(Char *, long *, FILE *); void getch2(Char *, long *); void emboss_openfile(AjPFile outfile, FILE **fp, const char **perm); void emboss_initseed(long inseed, long *inseed0, longer seed); void emboss_initoutgroup(long *outgrno, long spp); void emboss_initcatn(long *categs); void emboss_initcategs(AjPFloat arrayvals, long categs, double *rate); double emboss_initprobcat(AjPFloat arrayvals, long categs, double *probcat); void emboss_printtree(node *p, char* title); #endif /* OLDC */ #endif /* _PHYLIP_H_ */ PHYLIPNEW-3.69.650/include/printree.h0000664000175000017500000000022610775447511013725 00000000000000#include #include "phylip.h" extern void mlk_printree(FILE *fp, tree *t); extern void mlk_describe(FILE *fp, tree *t, double fracchange); PHYLIPNEW-3.69.650/depcomp0000755000175000017500000005055212171071677011661 00000000000000#! /bin/sh # depcomp - compile a program generating dependencies as side-effects scriptversion=2012-03-27.16; # UTC # Copyright (C) 1999-2012 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Alexandre Oliva . case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: depcomp [--help] [--version] PROGRAM [ARGS] Run PROGRAMS ARGS to compile a file, generating dependencies as side-effects. Environment variables: depmode Dependency tracking mode. source Source file read by 'PROGRAMS ARGS'. object Object file output by 'PROGRAMS ARGS'. DEPDIR directory where to store dependencies. depfile Dependency file to output. tmpdepfile Temporary file to use when outputting dependencies. libtool Whether libtool is used (yes/no). Report bugs to . EOF exit $? ;; -v | --v*) echo "depcomp $scriptversion" exit $? ;; esac # A tabulation character. tab=' ' # A newline character. nl=' ' if test -z "$depmode" || test -z "$source" || test -z "$object"; then echo "depcomp: Variables source, object and depmode must be set" 1>&2 exit 1 fi # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. depfile=${depfile-`echo "$object" | sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case # here, because this file can only contain one case statement. if test "$depmode" = hp; then # HP compiler uses -M and no extra arg. gccflag=-M depmode=gcc fi if test "$depmode" = dashXmstdout; then # This is just like dashmstdout with a different argument. dashmflag=-xM depmode=dashmstdout fi cygpath_u="cygpath -u -f -" if test "$depmode" = msvcmsys; then # This is just like msvisualcpp but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvisualcpp fi if test "$depmode" = msvc7msys; then # This is just like msvc7 but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvc7 fi if test "$depmode" = xlc; then # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations. gccflag=-qmakedep=gcc,-MF depmode=gcc fi case "$depmode" in gcc3) ## gcc 3 implements dependency tracking that does exactly what ## we want. Yay! Note: for some reason libtool 1.4 doesn't like ## it if -MD -MP comes after the -MF stuff. Hmm. ## Unfortunately, FreeBSD c89 acceptance of flags depends upon ## the command line argument order; so add the flags where they ## appear in depend2.am. Note that the slowdown incurred here ## affects only configure: in makefiles, %FASTDEP% shortcuts this. for arg do case $arg in -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; *) set fnord "$@" "$arg" ;; esac shift # fnord shift # $arg done "$@" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi mv "$tmpdepfile" "$depfile" ;; gcc) ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like ## -MM, not -M (despite what the docs say). ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then gccflag=-MD, fi "$@" -Wp,"$gccflag$tmpdepfile" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ## The second -e expression handles DOS-style file names with drive letters. sed -e 's/^[^:]*: / /' \ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" ## This next piece of magic avoids the "deleted header file" problem. ## The problem is that when a header file which appears in a .P file ## is deleted, the dependency causes make to die (because there is ## typically no way to rebuild the header). We avoid this by adding ## dummy dependencies for each header file. Too bad gcc doesn't do ## this for us directly. tr ' ' "$nl" < "$tmpdepfile" | ## Some versions of gcc put a space before the ':'. On the theory ## that the space means something, we add a space to the output as ## well. hp depmode also adds that space, but also prefixes the VPATH ## to the object. Take care to not repeat it in the output. ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; sgi) if test "$libtool" = yes; then "$@" "-Wp,-MDupdate,$tmpdepfile" else "$@" -MDupdate "$tmpdepfile" fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files echo "$object : \\" > "$depfile" # Clip off the initial element (the dependent). Don't try to be # clever and replace this with sed code, as IRIX sed won't handle # lines with more than a fixed number of characters (4096 in # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # the IRIX cc adds comments like '#:fec' to the end of the # dependency line. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ tr "$nl" ' ' >> "$depfile" echo >> "$depfile" # The second pass generates a dummy entry for each header file. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ >> "$depfile" else # The sourcefile does not contain any dependencies, so just # store a dummy comment line, to avoid errors with the Makefile # "include basename.Plo" scheme. echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; xlc) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; aix) # The C for AIX Compiler uses -M and outputs the dependencies # in a .u file. In older versions, this file always lives in the # current directory. Also, the AIX compiler puts '$object:' at the # start of each line; $object doesn't have directory information. # Version 6 uses the directory in both cases. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then tmpdepfile1=$dir$base.u tmpdepfile2=$base.u tmpdepfile3=$dir.libs/$base.u "$@" -Wc,-M else tmpdepfile1=$dir$base.u tmpdepfile2=$dir$base.u tmpdepfile3=$dir$base.u "$@" -M fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then # Each line is of the form 'foo.o: dependent.h'. # Do two passes, one to just change these to # '$object: dependent.h' and one to simply 'dependent.h:'. sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" else # The sourcefile does not contain any dependencies, so just # store a dummy comment line, to avoid errors with the Makefile # "include basename.Plo" scheme. echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; icc) # Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'. # However on # $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c # ICC 7.0 will fill foo.d with something like # foo.o: sub/foo.c # foo.o: sub/foo.h # which is wrong. We want # sub/foo.o: sub/foo.c # sub/foo.o: sub/foo.h # sub/foo.c: # sub/foo.h: # ICC 7.1 will output # foo.o: sub/foo.c sub/foo.h # and will wrap long lines using '\': # foo.o: sub/foo.c ... \ # sub/foo.h ... \ # ... # tcc 0.9.26 (FIXME still under development at the moment of writing) # will emit a similar output, but also prepend the continuation lines # with horizontal tabulation characters. "$@" -MD -MF "$tmpdepfile" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each line is of the form 'foo.o: dependent.h', # or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'. # Do two passes, one to just change these to # '$object: dependent.h' and one to simply 'dependent.h:'. sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \ < "$tmpdepfile" > "$depfile" sed ' s/[ '"$tab"'][ '"$tab"']*/ /g s/^ *// s/ *\\*$// s/^[^:]*: *// /^$/d /:$/d s/$/ :/ ' < "$tmpdepfile" >> "$depfile" rm -f "$tmpdepfile" ;; hp2) # The "hp" stanza above does not work with aCC (C++) and HP's ia64 # compilers, which have integrated preprocessors. The correct option # to use with these is +Maked; it writes dependencies to a file named # 'foo.d', which lands next to the object file, wherever that # happens to be. # Much of this is similar to the tru64 case; see comments there. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then tmpdepfile1=$dir$base.d tmpdepfile2=$dir.libs/$base.d "$@" -Wc,+Maked else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d "$@" +Maked fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" # Add 'dependent.h:' lines. sed -ne '2,${ s/^ *// s/ \\*$// s/$/:/ p }' "$tmpdepfile" >> "$depfile" else echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" "$tmpdepfile2" ;; tru64) # The Tru64 compiler uses -MD to generate dependencies as a side # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # dependencies in 'foo.d' instead, so we check for that too. # Subdirectories are respected. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then # With Tru64 cc, shared objects can also be used to make a # static library. This mechanism is used in libtool 1.4 series to # handle both shared and static libraries in a single compilation. # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. # # With libtool 1.5 this exception was removed, and libtool now # generates 2 separate objects for the 2 libraries. These two # compilations output dependencies in $dir.libs/$base.o.d and # in $dir$base.o.d. We have to check for both files, because # one of the two compilations can be disabled. We should prefer # $dir$base.o.d over $dir.libs/$base.o.d because the latter is # automatically cleaned when .libs/ is deleted, while ignoring # the former would cause a distcleancheck panic. tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 tmpdepfile2=$dir$base.o.d # libtool 1.5 tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 "$@" -Wc,-MD else tmpdepfile1=$dir$base.o.d tmpdepfile2=$dir$base.d tmpdepfile3=$dir$base.d tmpdepfile4=$dir$base.d "$@" -MD fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" else echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; msvc7) if test "$libtool" = yes; then showIncludes=-Wc,-showIncludes else showIncludes=-showIncludes fi "$@" $showIncludes > "$tmpdepfile" stat=$? grep -v '^Note: including file: ' "$tmpdepfile" if test "$stat" = 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The first sed program below extracts the file names and escapes # backslashes for cygpath. The second sed program outputs the file # name when reading, but also accumulates all include files in the # hold buffer in order to output them again at the end. This only # works with sed implementations that can handle large buffers. sed < "$tmpdepfile" -n ' /^Note: including file: *\(.*\)/ { s//\1/ s/\\/\\\\/g p }' | $cygpath_u | sort -u | sed -n ' s/ /\\ /g s/\(.*\)/'"$tab"'\1 \\/p s/.\(.*\) \\/\1:/ H $ { s/.*/'"$tab"'/ G p }' >> "$depfile" rm -f "$tmpdepfile" ;; msvc7msys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; #nosideeffect) # This comment above is used by automake to tell side-effect # dependency tracking mechanisms from slower ones. dashmstdout) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done test -z "$dashmflag" && dashmflag=-M # Require at least two characters before searching for ':' # in the target name. This is to cope with DOS-style filenames: # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. "$@" $dashmflag | sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" tr ' ' "$nl" < "$tmpdepfile" | \ ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; dashXmstdout) # This case only exists to satisfy depend.m4. It is never actually # run, as this mode is specially recognized in the preamble. exit 1 ;; makedepend) "$@" || exit $? # Remove any Libtool call if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # X makedepend shift cleared=no eat=no for arg do case $cleared in no) set ""; shift cleared=yes ;; esac if test $eat = yes; then eat=no continue fi case "$arg" in -D*|-I*) set fnord "$@" "$arg"; shift ;; # Strip any option that makedepend may not understand. Remove # the object too, otherwise makedepend will parse it as a source file. -arch) eat=yes ;; -*|$object) ;; *) set fnord "$@" "$arg"; shift ;; esac done obj_suffix=`echo "$object" | sed 's/^.*\././'` touch "$tmpdepfile" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" rm -f "$depfile" # makedepend may prepend the VPATH from the source file name to the object. # No need to regex-escape $object, excess matching of '.' is harmless. sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \ ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" "$tmpdepfile".bak ;; cpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done "$@" -E | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | sed '$ s: \\$::' > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" cat < "$tmpdepfile" >> "$depfile" sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; msvisualcpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi IFS=" " for arg do case "$arg" in -o) shift ;; $object) shift ;; "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") set fnord "$@" shift shift ;; *) set fnord "$@" "$arg" shift shift ;; esac done "$@" -E 2>/dev/null | sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" echo "$tab" >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" rm -f "$tmpdepfile" ;; msvcmsys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; none) exec "$@" ;; *) echo "Unknown depmode $depmode" 1>&2 exit 1 ;; esac exit 0 # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: PHYLIPNEW-3.69.650/m4/0002775000175000017500000000000012171071711010666 500000000000000PHYLIPNEW-3.69.650/m4/ltsugar.m40000644000175000017500000001042412171071672012534 00000000000000# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # # Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 6 ltsugar.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) # lt_join(SEP, ARG1, [ARG2...]) # ----------------------------- # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their # associated separator. # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier # versions in m4sugar had bugs. m4_define([lt_join], [m4_if([$#], [1], [], [$#], [2], [[$2]], [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) m4_define([_lt_join], [m4_if([$#$2], [2], [], [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) # lt_car(LIST) # lt_cdr(LIST) # ------------ # Manipulate m4 lists. # These macros are necessary as long as will still need to support # Autoconf-2.59 which quotes differently. m4_define([lt_car], [[$1]]) m4_define([lt_cdr], [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], [$#], 1, [], [m4_dquote(m4_shift($@))])]) m4_define([lt_unquote], $1) # lt_append(MACRO-NAME, STRING, [SEPARATOR]) # ------------------------------------------ # Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. # Note that neither SEPARATOR nor STRING are expanded; they are appended # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). # No SEPARATOR is output if MACRO-NAME was previously undefined (different # than defined and empty). # # This macro is needed until we can rely on Autoconf 2.62, since earlier # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. m4_define([lt_append], [m4_define([$1], m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) # ---------------------------------------------------------- # Produce a SEP delimited list of all paired combinations of elements of # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list # has the form PREFIXmINFIXSUFFIXn. # Needed until we can rely on m4_combine added in Autoconf 2.62. m4_define([lt_combine], [m4_if(m4_eval([$# > 3]), [1], [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl [[m4_foreach([_Lt_prefix], [$2], [m4_foreach([_Lt_suffix], ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) # ----------------------------------------------------------------------- # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. m4_define([lt_if_append_uniq], [m4_ifdef([$1], [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], [lt_append([$1], [$2], [$3])$4], [$5])], [lt_append([$1], [$2], [$3])$4])]) # lt_dict_add(DICT, KEY, VALUE) # ----------------------------- m4_define([lt_dict_add], [m4_define([$1($2)], [$3])]) # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) # -------------------------------------------- m4_define([lt_dict_add_subkey], [m4_define([$1($2:$3)], [$4])]) # lt_dict_fetch(DICT, KEY, [SUBKEY]) # ---------------------------------- m4_define([lt_dict_fetch], [m4_ifval([$3], m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) # ----------------------------------------------------------------- m4_define([lt_if_dict_fetch], [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], [$5], [$6])]) # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) # -------------------------------------------------------------- m4_define([lt_dict_filter], [m4_if([$5], [], [], [lt_join(m4_quote(m4_default([$4], [[, ]])), lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl ]) PHYLIPNEW-3.69.650/m4/lf_x11.m40000664000175000017500000000547311546377050012163 00000000000000dnl Copyright (C) 1988 Eleftherios Gkioulekas dnl dnl This program is free software; you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation; either version 2 of the License, or dnl (at your option) any later version. dnl dnl This program is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the dnl GNU General Public License for more details. dnl dnl You should have received a copy of the GNU General Public License dnl along with this program; if not, write to the Free Software dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. dnl dnl As a special exception to the GNU General Public License, if you dnl distribute this file as part of a program that contains a configuration dnl script generated by Autoconf, you may include it under the same dnl distribution terms that you use for the rest of that program. #----------------------------------------------------------------------- # This macro searches for Xlib and when it finds it it adds the # appropriate flags to CFLAGS and export the link sequence to # the variable XLIB. # In your configure.in file add: # LF_PATH_XLIB # In your Makefile.am add # program_LDADD = .... $(XLIB) #------------------------------------------------------------------------ # # Just added EMBOSS into LF_PATH_XLIB so that on the systems where # LF_PATH_XLIB exists there are no duplication errors. AC_DEFUN([LF_EMBOSS_PATH_XLIB],[ CFLAGS="$CFLAGS $X_CFLAGS" case $host_os in irix*) XLIB="-lX11 $X_EXTRA_LIBS" ;; *) XLIB="$X_LIBS -lX11 $X_EXTRA_LIBS" ;; esac AC_SUBST([XLIB]) AC_CHECK_HEADER(X11/Xlib.h, [ AC_DEFINE([PLD_xwin], [1], [Define to 1 if X11 support is available]) ], [ echo "" echo "X11 graphics have been selected but no X11 header files" echo "have been found." echo "" echo "This error usually happens on Linux/MacOSX distributions" echo "where the optional X11 development files have not been installed." echo "On Linux RPM systems this package is usually called something" echo "like xorg-x11-proto-devel whereas on Debian/Ubuntu it may" echo "be called x-dev. On MacOSX installation DVDs the X11 files" echo "can usually be found as an explicitly named optional" echo "installation." echo "" echo "After installing the X11 development files you should do a" echo "'make clean' and perform the configure stage again." echo "" echo "Alternatively, to install EMBOSS without X11 support, you can add" echo "the --without-x switch to the configure command." echo "" exit $? ]) ]) PHYLIPNEW-3.69.650/m4/pngdriver.m40000664000175000017500000000641211430325237013053 00000000000000dnl @synopsis CHECK_PNGDRIVER() dnl dnl This macro searches for an installed png/gd/zlib library. If nothing dnl was specified when calling configure, it searches first in /usr/local dnl and then in /usr. If the --with-pngdriver=DIR is specified, it will try dnl to find it in DIR/include/zlib.h and DIR/lib/libz.a. If --without-pngdriver dnl is specified, the library is not searched at all. dnl dnl It defines the symbol PLD_png if the librarys are found. You should dnl use autoheader to include a definition for this symbol in a config.h dnl file. dnl dnl Sources files should then use something like dnl dnl #ifdef PLD_png dnl #include dnl #endif /* PLD_png */ dnl dnl @author Ian Longden dnl Modified: Alan Bleasby. Corrected library order dnl AC_DEFUN([CHECK_PNGDRIVER], # # Handle user hints # [AC_MSG_CHECKING([if png driver is wanted]) AC_ARG_WITH([pngdriver], [AS_HELP_STRING([--with-pngdriver=@<:@DIR@:>@], [root directory path of png/gd/zlib installation (defaults to /usr)])], [if test "$withval" != no ; then AC_MSG_RESULT([yes]) ALT_HOME="$withval" else AC_MSG_RESULT([no]) fi], [ AC_MSG_RESULT([yes]) ALT_HOME=/usr ]) # # Locate png/gd/zlib, if wanted # if test -d "${ALT_HOME}" then # # Keep a copy if it fails # ALT_LDFLAGS="$LDFLAGS" ALT_CPPFLAGS="$CPPFLAGS" # # Set # LDFLAGS="${LDFLAGS} -L${ALT_HOME}/lib" CPPFLAGS="$CPPFLAGS -I$ALT_HOME/include" ICCHECK=0 case $host_os in solaris*) AC_CHECK_LIB(iconv, libiconv_close, ICCHECK=1, ICCHECK=0, -L${ALT_HOME}/lib -liconv) if test $ICCHECK = "1" ; then LDFLAGS="${LDFLAGS} -L${ALT_HOME}/lib -liconv" fi LDFLAGS="$LDFLAGS -R$ALT_HOME/lib" ;; esac # # Check for zlib in ALT_HOME # AC_CHECK_LIB(z, inflateEnd, CHECK=1, CHECK=0, -L${ALT_HOME}/lib -lz) # # # Check for png # if test $CHECK = "1" ; then AC_CHECK_LIB(png, png_destroy_read_struct, CHECK=1, CHECK=0 , -L${ALT_HOME}/lib -lz) fi # # Check for gd # if test $CHECK = "1"; then AC_CHECK_LIB(gd, gdImageCreateFromPng, CHECK=1, CHECK=0 , -L${ALT_HOME}/lib -lgd -lpng -lz -lm) if test $CHECK = "0"; then echo need to upgrade gd for png driver for plplot fi fi # # If everything found okay then proceed to include png driver in config. # if test $CHECK = "1" ; then LIBS="$LIBS -lgd -lpng -lz -lm" if test $ICCHECK = "1" ; then LIBS="$LIBS -liconv" fi case $host_os in solaris*) LDFLAGS="$LDFLAGS -R$ALT_HOME/lib" ;; esac AC_DEFINE([PLD_png], [1], [Define to 1 is PNG support is available]) AM_CONDITIONAL(AMPNG, true) echo PNG libraries found if test $ALT_HOME = "/usr" ; then LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" fi else # # If not okay then reset FLAGS. # AM_CONDITIONAL(AMPNG, false) LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" echo No png driver will be made due to librarys missing/old. fi # echo PNG STUFF FOLLOWS!!! # echo CHECK = $CHECK # echo LIBS = $LIBS # echo LDFLAGS = $LDFLAGS # echo CPPFLAGS = $CPPFLAGS else if test $withval != "no"; then echo "Directory $ALT_HOME does not exist" exit 0 fi fi ]) PHYLIPNEW-3.69.650/m4/postgresql.m40000664000175000017500000001334311732713445013266 00000000000000dnl -*- Autoconf -*- ##### http://autoconf-archive.cryp.to/ax_lib_postgresql.html # # SYNOPSIS # # AX_LIB_POSTGRESQL([MINIMUM-VERSION]) # # DESCRIPTION # # This macro provides tests of availability of PostgreSQL 'libpq' # library of particular version or newer. # # AX_LIB_POSTGRESQL macro takes only one argument which is optional. # If there is no required version passed, then macro does not run # version test. # # The --with-postgresql option takes one of three possible values: # # no - do not check for PostgreSQL client library # # yes - do check for PostgreSQL library in standard locations # (pg_config should be in the PATH) # # path - complete path to pg_config utility, use this option if # pg_config can't be found in the PATH # # This macro calls: # # AC_SUBST([POSTGRESQL_CFLAGS]) # AC_SUBST([POSTGRESQL_CPPFLAGS]) # AC_SUBST([POSTGRESQL_LDFLAGS]) # AC_SUBST([POSTGRESQL_VERSION]) # # And sets: # # HAVE_POSTGRESQL # # LAST MODIFICATION # # 2006-07-16 # 2010-05-14 MKS: Added POSTGRESQL_CPPFLAGS # 2011-06-21 AJB: Added workaround for Fedora pg_config oddity # 2011-08-01 MKS: Changed PG_CONFIG to POSTGRESQL_CONFIG # Made test constructs more portable # # COPYLEFT # # Copyright (c) 2006 Mateusz Loskot # # Copying and distribution of this file, with or without # modification, are permitted in any medium without royalty provided # the copyright notice and this notice are preserved. AC_DEFUN([AX_LIB_POSTGRESQL], [ POSTGRESQL_CFLAGS="" POSTGRESQL_CPPFLAGS="" POSTGRESQL_LDFLAGS="" POSTGRESQL_CONFIG="" POSTGRESQL_VERSION="" AC_ARG_WITH([postgresql], [AS_HELP_STRING([--with-postgresql@<:=@ARG@:>@], [use PostgreSQL library @<:@default=yes@:>@, optionally specify path to pg_config])], [ AS_IF([test "x${withval}" = "xno"], [want_postgresql="no"], [test "x${withval}" = "xyes"], [want_postgresql="yes"], [ want_postgresql="yes" POSTGRESQL_CONFIG="${withval}" ]) ], [want_postgresql="yes"]) dnl dnl Check PostgreSQL libraries (libpq) dnl AS_IF([test "x${want_postgresql}" = "xyes"], [ AS_IF([test -z "${POSTGRESQL_CONFIG}" -o test], [AC_PATH_PROG([POSTGRESQL_CONFIG], [pg_config], [no])]) AS_IF([test "x${POSTGRESQL_CONFIG}" != "xno"], [ AC_MSG_CHECKING([for PostgreSQL libraries]) POSTGRESQL_CFLAGS="-I`${POSTGRESQL_CONFIG} --includedir`" POSTGRESQL_CPPFLAGS="-I`${POSTGRESQL_CONFIG} --includedir`" POSTGRESQL_LDFLAGS="-L`${POSTGRESQL_CONFIG} --libdir` -lpq" POSTGRESQL_VERSION=`${POSTGRESQL_CONFIG} --version | sed -e 's#PostgreSQL ##'` dnl It isn't enough to just test for pg_config as Fedora dnl provides it in the postgresql RPM even though postgresql-devel may dnl not be installed EMBCPPFLAGS="${CPPFLAGS}" EMBLDFLAGS="${LDFLAGS}" CPPFLAGS="${POSTGRESQL_CPPFLAGS} ${EMBCPPFLAGS}" LDFLAGS="${POSTGRESQL_LDFLAGS} ${EMBLDFLAGS}" AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include #include "libpq-fe.h"]], [[PQconnectdb(NULL)]])], [havepostgresql="yes"], [havepostgresql="no"]) CPPFLAGS="${EMBCPPFLAGS}" LDFLAGS="${EMBLDFLAGS}" AS_IF([test "x${havepostgresql}" = "xyes"], [ AC_DEFINE([HAVE_POSTGRESQL], [1], [Define to 1 if PostgreSQL libraries are available.]) found_postgresql="yes" AC_MSG_RESULT([yes]) ], [ POSTGRESQL_CFLAGS="" POSTGRESQL_CPPFLAGS="" POSTGRESQL_LDFLAGS="" found_postgresql="no" AC_MSG_RESULT([no]) ]) ], [ found_postgresql="no" ]) ]) dnl dnl Check if required version of PostgreSQL is available dnl postgresql_version_req=ifelse([$1], [], [], [$1]) AS_IF([test "x${found_postgresql}" = "xyes" -a -n "${postgresql_version_req}"], [ AC_MSG_CHECKING([if PostgreSQL version is >= ${postgresql_version_req}]) dnl Decompose required version string of PostgreSQL dnl and calculate its number representation postgresql_version_req_major=`expr ${postgresql_version_req} : '\([[0-9]]*\)'` postgresql_version_req_minor=`expr ${postgresql_version_req} : '[[0-9]]*\.\([[0-9]]*\)'` postgresql_version_req_micro=`expr ${postgresql_version_req} : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` AS_IF([test "x${postgresql_version_req_micro}" = "x"], [postgresql_version_req_micro="0"]) postgresql_version_req_number=`expr ${postgresql_version_req_major} \* 1000000 \ \+ ${postgresql_version_req_minor} \* 1000 \ \+ ${postgresql_version_req_micro}` dnl Decompose version string of installed PostgreSQL dnl and calculate its number representation postgresql_version_major=`expr ${POSTGRESQL_VERSION} : '\([[0-9]]*\)'` postgresql_version_minor=`expr ${POSTGRESQL_VERSION} : '[[0-9]]*\.\([[0-9]]*\)'` postgresql_version_micro=`expr ${POSTGRESQL_VERSION} : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` AS_IF([test "x${postgresql_version_micro}" = "x"], [postgresql_version_micro="0"]) postgresql_version_number=`expr ${postgresql_version_major} \* 1000000 \ \+ ${postgresql_version_minor} \* 1000 \ \+ ${postgresql_version_micro}` postgresql_version_check=`expr ${postgresql_version_number} \>\= ${postgresql_version_req_number}` AS_IF([test "x${postgresql_version_check}" = "x1"], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) ]) AC_SUBST([POSTGRESQL_CFLAGS]) AC_SUBST([POSTGRESQL_CPPFLAGS]) AC_SUBST([POSTGRESQL_LDFLAGS]) AC_SUBST([POSTGRESQL_VERSION]) ]) PHYLIPNEW-3.69.650/m4/libtool.m40000644000175000017500000105743212171071672012532 00000000000000# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. m4_define([_LT_COPYING], [dnl # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is part of GNU Libtool. # # GNU Libtool is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Libtool; see the file COPYING. If not, a copy # can be downloaded from http://www.gnu.org/licenses/gpl.html, or # obtained by writing to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ]) # serial 57 LT_INIT # LT_PREREQ(VERSION) # ------------------ # Complain and exit if this libtool version is less that VERSION. m4_defun([LT_PREREQ], [m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, [m4_default([$3], [m4_fatal([Libtool version $1 or higher is required], 63)])], [$2])]) # _LT_CHECK_BUILDDIR # ------------------ # Complain if the absolute build directory name contains unusual characters m4_defun([_LT_CHECK_BUILDDIR], [case `pwd` in *\ * | *\ *) AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; esac ]) # LT_INIT([OPTIONS]) # ------------------ AC_DEFUN([LT_INIT], [AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl AC_BEFORE([$0], [LT_LANG])dnl AC_BEFORE([$0], [LT_OUTPUT])dnl AC_BEFORE([$0], [LTDL_INIT])dnl m4_require([_LT_CHECK_BUILDDIR])dnl dnl Autoconf doesn't catch unexpanded LT_ macros by default: m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 dnl unless we require an AC_DEFUNed macro: AC_REQUIRE([LTOPTIONS_VERSION])dnl AC_REQUIRE([LTSUGAR_VERSION])dnl AC_REQUIRE([LTVERSION_VERSION])dnl AC_REQUIRE([LTOBSOLETE_VERSION])dnl m4_require([_LT_PROG_LTMAIN])dnl _LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) dnl Parse OPTIONS _LT_SET_OPTIONS([$0], [$1]) # This can be used to rebuild libtool when needed LIBTOOL_DEPS="$ltmain" # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' AC_SUBST(LIBTOOL)dnl _LT_SETUP # Only expand once: m4_define([LT_INIT]) ])# LT_INIT # Old names: AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_PROG_LIBTOOL], []) dnl AC_DEFUN([AM_PROG_LIBTOOL], []) # _LT_CC_BASENAME(CC) # ------------------- # Calculate cc_basename. Skip known compiler wrappers and cross-prefix. m4_defun([_LT_CC_BASENAME], [for cc_temp in $1""; do case $cc_temp in compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` ]) # _LT_FILEUTILS_DEFAULTS # ---------------------- # It is okay to use these file commands and assume they have been set # sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'. m4_defun([_LT_FILEUTILS_DEFAULTS], [: ${CP="cp -f"} : ${MV="mv -f"} : ${RM="rm -f"} ])# _LT_FILEUTILS_DEFAULTS # _LT_SETUP # --------- m4_defun([_LT_SETUP], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl _LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl dnl _LT_DECL([], [host_alias], [0], [The host system])dnl _LT_DECL([], [host], [0])dnl _LT_DECL([], [host_os], [0])dnl dnl _LT_DECL([], [build_alias], [0], [The build system])dnl _LT_DECL([], [build], [0])dnl _LT_DECL([], [build_os], [0])dnl dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([LT_PATH_LD])dnl AC_REQUIRE([LT_PATH_NM])dnl dnl AC_REQUIRE([AC_PROG_LN_S])dnl test -z "$LN_S" && LN_S="ln -s" _LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl dnl AC_REQUIRE([LT_CMD_MAX_LEN])dnl _LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl _LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl m4_require([_LT_CMD_RELOAD])dnl m4_require([_LT_CHECK_MAGIC_METHOD])dnl m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl m4_require([_LT_CMD_OLD_ARCHIVE])dnl m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl m4_require([_LT_WITH_SYSROOT])dnl _LT_CONFIG_LIBTOOL_INIT([ # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes INIT. if test -n "\${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi ]) if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi _LT_CHECK_OBJDIR m4_require([_LT_TAG_COMPILER])dnl case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Global variables: ofile=libtool can_build_shared=yes # All known linkers require a `.a' archive for static linking (except MSVC, # which needs '.lib'). libext=a with_gnu_ld="$lt_cv_prog_gnu_ld" old_CC="$CC" old_CFLAGS="$CFLAGS" # Set sane defaults for various variables test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$LD" && LD=ld test -z "$ac_objext" && ac_objext=o _LT_CC_BASENAME([$compiler]) # Only perform the check for file, if the check method requires it test -z "$MAGIC_CMD" && MAGIC_CMD=file case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then _LT_PATH_MAGIC fi ;; esac # Use C for the default configuration in the libtool script LT_SUPPORTED_TAG([CC]) _LT_LANG_C_CONFIG _LT_LANG_DEFAULT_CONFIG _LT_CONFIG_COMMANDS ])# _LT_SETUP # _LT_PREPARE_SED_QUOTE_VARS # -------------------------- # Define a few sed substitution that help us do robust quoting. m4_defun([_LT_PREPARE_SED_QUOTE_VARS], [# Backslashify metacharacters that are still active within # double-quoted strings. sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\([["`\\]]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to delay expansion of an escaped single quote. delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' ]) # _LT_PROG_LTMAIN # --------------- # Note that this code is called both from `configure', and `config.status' # now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, # `config.status' has no value for ac_aux_dir unless we are using Automake, # so we pass a copy along to make sure it has a sensible value anyway. m4_defun([_LT_PROG_LTMAIN], [m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl _LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) ltmain="$ac_aux_dir/ltmain.sh" ])# _LT_PROG_LTMAIN ## ------------------------------------- ## ## Accumulate code for creating libtool. ## ## ------------------------------------- ## # So that we can recreate a full libtool script including additional # tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS # in macros and then make a single call at the end using the `libtool' # label. # _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) # ---------------------------------------- # Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. m4_define([_LT_CONFIG_LIBTOOL_INIT], [m4_ifval([$1], [m4_append([_LT_OUTPUT_LIBTOOL_INIT], [$1 ])])]) # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_INIT]) # _LT_CONFIG_LIBTOOL([COMMANDS]) # ------------------------------ # Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. m4_define([_LT_CONFIG_LIBTOOL], [m4_ifval([$1], [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], [$1 ])])]) # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) # _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) # ----------------------------------------------------- m4_defun([_LT_CONFIG_SAVE_COMMANDS], [_LT_CONFIG_LIBTOOL([$1]) _LT_CONFIG_LIBTOOL_INIT([$2]) ]) # _LT_FORMAT_COMMENT([COMMENT]) # ----------------------------- # Add leading comment marks to the start of each line, and a trailing # full-stop to the whole comment if one is not present already. m4_define([_LT_FORMAT_COMMENT], [m4_ifval([$1], [ m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) )]) ## ------------------------ ## ## FIXME: Eliminate VARNAME ## ## ------------------------ ## # _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) # ------------------------------------------------------------------- # CONFIGNAME is the name given to the value in the libtool script. # VARNAME is the (base) name used in the configure script. # VALUE may be 0, 1 or 2 for a computed quote escaped value based on # VARNAME. Any other value will be used directly. m4_define([_LT_DECL], [lt_if_append_uniq([lt_decl_varnames], [$2], [, ], [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], [m4_ifval([$1], [$1], [$2])]) lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) m4_ifval([$4], [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) lt_dict_add_subkey([lt_decl_dict], [$2], [tagged?], [m4_ifval([$5], [yes], [no])])]) ]) # _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) # -------------------------------------------------------- m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) # lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) # ------------------------------------------------ m4_define([lt_decl_tag_varnames], [_lt_decl_filter([tagged?], [yes], $@)]) # _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) # --------------------------------------------------------- m4_define([_lt_decl_filter], [m4_case([$#], [0], [m4_fatal([$0: too few arguments: $#])], [1], [m4_fatal([$0: too few arguments: $#: $1])], [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], [lt_dict_filter([lt_decl_dict], $@)])[]dnl ]) # lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) # -------------------------------------------------- m4_define([lt_decl_quote_varnames], [_lt_decl_filter([value], [1], $@)]) # lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) # --------------------------------------------------- m4_define([lt_decl_dquote_varnames], [_lt_decl_filter([value], [2], $@)]) # lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) # --------------------------------------------------- m4_define([lt_decl_varnames_tagged], [m4_assert([$# <= 2])dnl _$0(m4_quote(m4_default([$1], [[, ]])), m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) m4_define([_lt_decl_varnames_tagged], [m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) # lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) # ------------------------------------------------ m4_define([lt_decl_all_varnames], [_$0(m4_quote(m4_default([$1], [[, ]])), m4_if([$2], [], m4_quote(lt_decl_varnames), m4_quote(m4_shift($@))))[]dnl ]) m4_define([_lt_decl_all_varnames], [lt_join($@, lt_decl_varnames_tagged([$1], lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl ]) # _LT_CONFIG_STATUS_DECLARE([VARNAME]) # ------------------------------------ # Quote a variable value, and forward it to `config.status' so that its # declaration there will have the same value as in `configure'. VARNAME # must have a single quote delimited value for this to work. m4_define([_LT_CONFIG_STATUS_DECLARE], [$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) # _LT_CONFIG_STATUS_DECLARATIONS # ------------------------------ # We delimit libtool config variables with single quotes, so when # we write them to config.status, we have to be sure to quote all # embedded single quotes properly. In configure, this macro expands # each variable declared with _LT_DECL (and _LT_TAGDECL) into: # # ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], [m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) # _LT_LIBTOOL_TAGS # ---------------- # Output comment and list of tags supported by the script m4_defun([_LT_LIBTOOL_TAGS], [_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl available_tags="_LT_TAGS"dnl ]) # _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) # ----------------------------------- # Extract the dictionary values for VARNAME (optionally with TAG) and # expand to a commented shell variable setting: # # # Some comment about what VAR is for. # visible_name=$lt_internal_name m4_define([_LT_LIBTOOL_DECLARE], [_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [description])))[]dnl m4_pushdef([_libtool_name], m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), [0], [_libtool_name=[$]$1], [1], [_libtool_name=$lt_[]$1], [2], [_libtool_name=$lt_[]$1], [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl ]) # _LT_LIBTOOL_CONFIG_VARS # ----------------------- # Produce commented declarations of non-tagged libtool config variables # suitable for insertion in the LIBTOOL CONFIG section of the `libtool' # script. Tagged libtool config variables (even for the LIBTOOL CONFIG # section) are produced by _LT_LIBTOOL_TAG_VARS. m4_defun([_LT_LIBTOOL_CONFIG_VARS], [m4_foreach([_lt_var], m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) # _LT_LIBTOOL_TAG_VARS(TAG) # ------------------------- m4_define([_LT_LIBTOOL_TAG_VARS], [m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) # _LT_TAGVAR(VARNAME, [TAGNAME]) # ------------------------------ m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) # _LT_CONFIG_COMMANDS # ------------------- # Send accumulated output to $CONFIG_STATUS. Thanks to the lists of # variables for single and double quote escaping we saved from calls # to _LT_DECL, we can put quote escaped variables declarations # into `config.status', and then the shell code to quote escape them in # for loops in `config.status'. Finally, any additional code accumulated # from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. m4_defun([_LT_CONFIG_COMMANDS], [AC_PROVIDE_IFELSE([LT_OUTPUT], dnl If the libtool generation code has been placed in $CONFIG_LT, dnl instead of duplicating it all over again into config.status, dnl then we will have config.status run $CONFIG_LT later, so it dnl needs to know what name is stored there: [AC_CONFIG_COMMANDS([libtool], [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], dnl If the libtool generation code is destined for config.status, dnl expand the accumulated commands and init code now: [AC_CONFIG_COMMANDS([libtool], [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) ])#_LT_CONFIG_COMMANDS # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], [ # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' _LT_CONFIG_STATUS_DECLARATIONS LTCC='$LTCC' LTCFLAGS='$LTCFLAGS' compiler='$compiler_DEFAULT' # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$[]1 _LTECHO_EOF' } # Quote evaled strings. for var in lt_decl_all_varnames([[ \ ]], lt_decl_quote_varnames); do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[[\\\\\\\`\\"\\\$]]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done # Double-quote double-evaled strings. for var in lt_decl_all_varnames([[ \ ]], lt_decl_dquote_varnames); do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[[\\\\\\\`\\"\\\$]]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done _LT_OUTPUT_LIBTOOL_INIT ]) # _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) # ------------------------------------ # Generate a child script FILE with all initialization necessary to # reuse the environment learned by the parent script, and make the # file executable. If COMMENT is supplied, it is inserted after the # `#!' sequence but before initialization text begins. After this # macro, additional text can be appended to FILE to form the body of # the child script. The macro ends with non-zero status if the # file could not be fully written (such as if the disk is full). m4_ifdef([AS_INIT_GENERATED], [m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], [m4_defun([_LT_GENERATED_FILE_INIT], [m4_require([AS_PREPARE])]dnl [m4_pushdef([AS_MESSAGE_LOG_FD])]dnl [lt_write_fail=0 cat >$1 <<_ASEOF || lt_write_fail=1 #! $SHELL # Generated by $as_me. $2 SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$1 <<\_ASEOF || lt_write_fail=1 AS_SHELL_SANITIZE _AS_PREPARE exec AS_MESSAGE_FD>&1 _ASEOF test $lt_write_fail = 0 && chmod +x $1[]dnl m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT # LT_OUTPUT # --------- # This macro allows early generation of the libtool script (before # AC_OUTPUT is called), incase it is used in configure for compilation # tests. AC_DEFUN([LT_OUTPUT], [: ${CONFIG_LT=./config.lt} AC_MSG_NOTICE([creating $CONFIG_LT]) _LT_GENERATED_FILE_INIT(["$CONFIG_LT"], [# Run this file to recreate a libtool stub with the current configuration.]) cat >>"$CONFIG_LT" <<\_LTEOF lt_cl_silent=false exec AS_MESSAGE_LOG_FD>>config.log { echo AS_BOX([Running $as_me.]) } >&AS_MESSAGE_LOG_FD lt_cl_help="\ \`$as_me' creates a local libtool stub from the current configuration, for use in further configure time tests before the real libtool is generated. Usage: $[0] [[OPTIONS]] -h, --help print this help, then exit -V, --version print version number, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files Report bugs to ." lt_cl_version="\ m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) configured by $[0], generated by m4_PACKAGE_STRING. Copyright (C) 2011 Free Software Foundation, Inc. This config.lt script is free software; the Free Software Foundation gives unlimited permision to copy, distribute and modify it." while test $[#] != 0 do case $[1] in --version | --v* | -V ) echo "$lt_cl_version"; exit 0 ;; --help | --h* | -h ) echo "$lt_cl_help"; exit 0 ;; --debug | --d* | -d ) debug=: ;; --quiet | --q* | --silent | --s* | -q ) lt_cl_silent=: ;; -*) AC_MSG_ERROR([unrecognized option: $[1] Try \`$[0] --help' for more information.]) ;; *) AC_MSG_ERROR([unrecognized argument: $[1] Try \`$[0] --help' for more information.]) ;; esac shift done if $lt_cl_silent; then exec AS_MESSAGE_FD>/dev/null fi _LTEOF cat >>"$CONFIG_LT" <<_LTEOF _LT_OUTPUT_LIBTOOL_COMMANDS_INIT _LTEOF cat >>"$CONFIG_LT" <<\_LTEOF AC_MSG_NOTICE([creating $ofile]) _LT_OUTPUT_LIBTOOL_COMMANDS AS_EXIT(0) _LTEOF chmod +x "$CONFIG_LT" # configure is writing to config.log, but config.lt does its own redirection, # appending to config.log, which fails on DOS, as config.log is still kept # open by configure. Here we exec the FD to /dev/null, effectively closing # config.log, so it can be properly (re)opened and appended to by config.lt. lt_cl_success=: test "$silent" = yes && lt_config_lt_args="$lt_config_lt_args --quiet" exec AS_MESSAGE_LOG_FD>/dev/null $SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false exec AS_MESSAGE_LOG_FD>>config.log $lt_cl_success || AS_EXIT(1) ])# LT_OUTPUT # _LT_CONFIG(TAG) # --------------- # If TAG is the built-in tag, create an initial libtool script with a # default configuration from the untagged config vars. Otherwise add code # to config.status for appending the configuration named by TAG from the # matching tagged config vars. m4_defun([_LT_CONFIG], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl _LT_CONFIG_SAVE_COMMANDS([ m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl m4_if(_LT_TAG, [C], [ # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi cfgfile="${ofile}T" trap "$RM \"$cfgfile\"; exit 1" 1 2 15 $RM "$cfgfile" cat <<_LT_EOF >> "$cfgfile" #! $SHELL # `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. # Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # NOTE: Changes made to this file will be lost: look at ltmain.sh. # _LT_COPYING _LT_LIBTOOL_TAGS # ### BEGIN LIBTOOL CONFIG _LT_LIBTOOL_CONFIG_VARS _LT_LIBTOOL_TAG_VARS # ### END LIBTOOL CONFIG _LT_EOF case $host_os in aix3*) cat <<\_LT_EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi _LT_EOF ;; esac _LT_PROG_LTMAIN # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? sed '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) _LT_PROG_REPLACE_SHELLFNS mv -f "$cfgfile" "$ofile" || (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" ], [cat <<_LT_EOF >> "$ofile" dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded dnl in a comment (ie after a #). # ### BEGIN LIBTOOL TAG CONFIG: $1 _LT_LIBTOOL_TAG_VARS(_LT_TAG) # ### END LIBTOOL TAG CONFIG: $1 _LT_EOF ])dnl /m4_if ], [m4_if([$1], [], [ PACKAGE='$PACKAGE' VERSION='$VERSION' TIMESTAMP='$TIMESTAMP' RM='$RM' ofile='$ofile'], []) ])dnl /_LT_CONFIG_SAVE_COMMANDS ])# _LT_CONFIG # LT_SUPPORTED_TAG(TAG) # --------------------- # Trace this macro to discover what tags are supported by the libtool # --tag option, using: # autoconf --trace 'LT_SUPPORTED_TAG:$1' AC_DEFUN([LT_SUPPORTED_TAG], []) # C support is built-in for now m4_define([_LT_LANG_C_enabled], []) m4_define([_LT_TAGS], []) # LT_LANG(LANG) # ------------- # Enable libtool support for the given language if not already enabled. AC_DEFUN([LT_LANG], [AC_BEFORE([$0], [LT_OUTPUT])dnl m4_case([$1], [C], [_LT_LANG(C)], [C++], [_LT_LANG(CXX)], [Go], [_LT_LANG(GO)], [Java], [_LT_LANG(GCJ)], [Fortran 77], [_LT_LANG(F77)], [Fortran], [_LT_LANG(FC)], [Windows Resource], [_LT_LANG(RC)], [m4_ifdef([_LT_LANG_]$1[_CONFIG], [_LT_LANG($1)], [m4_fatal([$0: unsupported language: "$1"])])])dnl ])# LT_LANG # _LT_LANG(LANGNAME) # ------------------ m4_defun([_LT_LANG], [m4_ifdef([_LT_LANG_]$1[_enabled], [], [LT_SUPPORTED_TAG([$1])dnl m4_append([_LT_TAGS], [$1 ])dnl m4_define([_LT_LANG_]$1[_enabled], [])dnl _LT_LANG_$1_CONFIG($1)])dnl ])# _LT_LANG m4_ifndef([AC_PROG_GO], [ ############################################################ # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_GO. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # ############################################################ m4_defun([AC_PROG_GO], [AC_LANG_PUSH(Go)dnl AC_ARG_VAR([GOC], [Go compiler command])dnl AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl _AC_ARG_VAR_LDFLAGS()dnl AC_CHECK_TOOL(GOC, gccgo) if test -z "$GOC"; then if test -n "$ac_tool_prefix"; then AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) fi fi if test -z "$GOC"; then AC_CHECK_PROG(GOC, gccgo, gccgo, false) fi ])#m4_defun ])#m4_ifndef # _LT_LANG_DEFAULT_CONFIG # ----------------------- m4_defun([_LT_LANG_DEFAULT_CONFIG], [AC_PROVIDE_IFELSE([AC_PROG_CXX], [LT_LANG(CXX)], [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) AC_PROVIDE_IFELSE([AC_PROG_F77], [LT_LANG(F77)], [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) AC_PROVIDE_IFELSE([AC_PROG_FC], [LT_LANG(FC)], [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal dnl pulling things in needlessly. AC_PROVIDE_IFELSE([AC_PROG_GCJ], [LT_LANG(GCJ)], [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], [LT_LANG(GCJ)], [AC_PROVIDE_IFELSE([LT_PROG_GCJ], [LT_LANG(GCJ)], [m4_ifdef([AC_PROG_GCJ], [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) m4_ifdef([A][M_PROG_GCJ], [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) m4_ifdef([LT_PROG_GCJ], [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) AC_PROVIDE_IFELSE([AC_PROG_GO], [LT_LANG(GO)], [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) AC_PROVIDE_IFELSE([LT_PROG_RC], [LT_LANG(RC)], [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) ])# _LT_LANG_DEFAULT_CONFIG # Obsolete macros: AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_CXX], []) dnl AC_DEFUN([AC_LIBTOOL_F77], []) dnl AC_DEFUN([AC_LIBTOOL_FC], []) dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) dnl AC_DEFUN([AC_LIBTOOL_RC], []) # _LT_TAG_COMPILER # ---------------- m4_defun([_LT_TAG_COMPILER], [AC_REQUIRE([AC_PROG_CC])dnl _LT_DECL([LTCC], [CC], [1], [A C compiler])dnl _LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl _LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl _LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC ])# _LT_TAG_COMPILER # _LT_COMPILER_BOILERPLATE # ------------------------ # Check for compiler boilerplate output or warnings with # the simple compiler test code. m4_defun([_LT_COMPILER_BOILERPLATE], [m4_require([_LT_DECL_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ])# _LT_COMPILER_BOILERPLATE # _LT_LINKER_BOILERPLATE # ---------------------- # Check for linker boilerplate output or warnings with # the simple link test code. m4_defun([_LT_LINKER_BOILERPLATE], [m4_require([_LT_DECL_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* ])# _LT_LINKER_BOILERPLATE # _LT_REQUIRED_DARWIN_CHECKS # ------------------------- m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ case $host_os in rhapsody* | darwin*) AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) AC_CHECK_TOOL([LIPO], [lipo], [:]) AC_CHECK_TOOL([OTOOL], [otool], [:]) AC_CHECK_TOOL([OTOOL64], [otool64], [:]) _LT_DECL([], [DSYMUTIL], [1], [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) _LT_DECL([], [NMEDIT], [1], [Tool to change global to local symbols on Mac OS X]) _LT_DECL([], [LIPO], [1], [Tool to manipulate fat objects and archives on Mac OS X]) _LT_DECL([], [OTOOL], [1], [ldd/readelf like tool for Mach-O binaries on Mac OS X]) _LT_DECL([], [OTOOL64], [1], [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], [lt_cv_apple_cc_single_mod=no if test -z "${LT_MULTI_MODULE}"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE # non-empty at configure time, or by adding -multi_module to the # link flags. rm -rf libconftest.dylib* echo "int foo(void){return 1;}" > conftest.c echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c 2>conftest.err _lt_result=$? # If there is a non-empty error log, and "single_module" # appears in it, assume the flag caused a linker warning if test -s conftest.err && $GREP single_module conftest.err; then cat conftest.err >&AS_MESSAGE_LOG_FD # Otherwise, if the output was created with a 0 exit code from # the compiler, it worked. elif test -f libconftest.dylib && test $_lt_result -eq 0; then lt_cv_apple_cc_single_mod=yes else cat conftest.err >&AS_MESSAGE_LOG_FD fi rm -rf libconftest.dylib* rm -f conftest.* fi]) AC_CACHE_CHECK([for -exported_symbols_list linker flag], [lt_cv_ld_exported_symbols_list], [lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [lt_cv_ld_exported_symbols_list=yes], [lt_cv_ld_exported_symbols_list=no]) LDFLAGS="$save_LDFLAGS" ]) AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], [lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD cat > conftest.c << _LT_EOF int main() { return 0;} _LT_EOF echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err _lt_result=$? if test -s conftest.err && $GREP force_load conftest.err; then cat conftest.err >&AS_MESSAGE_LOG_FD elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then lt_cv_ld_force_load=yes else cat conftest.err >&AS_MESSAGE_LOG_FD fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM ]) case $host_os in rhapsody* | darwin1.[[012]]) _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; darwin*) # darwin 5.x on # if running on 10.5 or later, the deployment target defaults # to the OS version, if on x86, and 10.4, the deployment # target defaults to 10.4. Don't you love it? case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; 10.[[012]]*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; esac ;; esac if test "$lt_cv_apple_cc_single_mod" = "yes"; then _lt_dar_single_mod='$single_module' fi if test "$lt_cv_ld_exported_symbols_list" = "yes"; then _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' else _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' fi if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then _lt_dsymutil='~$DSYMUTIL $lib || :' else _lt_dsymutil= fi ;; esac ]) # _LT_DARWIN_LINKER_FEATURES([TAG]) # --------------------------------- # Checks for linker and compiler features on darwin m4_defun([_LT_DARWIN_LINKER_FEATURES], [ m4_require([_LT_REQUIRED_DARWIN_CHECKS]) _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported if test "$lt_cv_ld_force_load" = "yes"; then _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) else _LT_TAGVAR(whole_archive_flag_spec, $1)='' fi _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" case $cc_basename in ifort*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test "$_lt_dar_can_shared" = "yes"; then output_verbose_link_cmd=func_echo_all _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" m4_if([$1], [CXX], [ if test "$lt_cv_apple_cc_single_mod" != "yes"; then _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" fi ],[]) else _LT_TAGVAR(ld_shlibs, $1)=no fi ]) # _LT_SYS_MODULE_PATH_AIX([TAGNAME]) # ---------------------------------- # Links a minimal program and checks the executable # for the system default hardcoded library path. In most cases, # this is /usr/lib:/lib, but when the MPI compilers are used # the location of the communication and MPI libs are included too. # If we don't find anything, use the default library path according # to the aix ld manual. # Store the results from the different compilers for each TAGNAME. # Allow to override them for all tags through lt_cv_aix_libpath. m4_defun([_LT_SYS_MODULE_PATH_AIX], [m4_require([_LT_DECL_SED])dnl if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ lt_aix_libpath_sed='[ /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }]' _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi],[]) if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib" fi ]) aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) fi ])# _LT_SYS_MODULE_PATH_AIX # _LT_SHELL_INIT(ARG) # ------------------- m4_define([_LT_SHELL_INIT], [m4_divert_text([M4SH-INIT], [$1 ])])# _LT_SHELL_INIT # _LT_PROG_ECHO_BACKSLASH # ----------------------- # Find how we can fake an echo command that does not interpret backslash. # In particular, with Autoconf 2.60 or later we add some code to the start # of the generated configure script which will find a shell with a builtin # printf (which we can use as an echo command). m4_defun([_LT_PROG_ECHO_BACKSLASH], [ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO AC_MSG_CHECKING([how to print strings]) # Test print first, because it will be a builtin if present. if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='print -r --' elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='printf %s\n' else # Use this function as a fallback that always works. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $[]1 _LTECHO_EOF' } ECHO='func_fallback_echo' fi # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "$*" } case "$ECHO" in printf*) AC_MSG_RESULT([printf]) ;; print*) AC_MSG_RESULT([print -r]) ;; *) AC_MSG_RESULT([cat]) ;; esac m4_ifdef([_AS_DETECT_SUGGESTED], [_AS_DETECT_SUGGESTED([ test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO PATH=/empty FPATH=/empty; export PATH FPATH test "X`printf %s $ECHO`" = "X$ECHO" \ || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) _LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) _LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) ])# _LT_PROG_ECHO_BACKSLASH # _LT_WITH_SYSROOT # ---------------- AC_DEFUN([_LT_WITH_SYSROOT], [AC_MSG_CHECKING([for sysroot]) AC_ARG_WITH([sysroot], [ --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified).], [], [with_sysroot=no]) dnl lt_sysroot will always be passed unquoted. We quote it here dnl in case the user passed a directory name. lt_sysroot= case ${with_sysroot} in #( yes) if test "$GCC" = yes; then lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` ;; #( no|'') ;; #( *) AC_MSG_RESULT([${with_sysroot}]) AC_MSG_ERROR([The sysroot must be an absolute path.]) ;; esac AC_MSG_RESULT([${lt_sysroot:-no}]) _LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl [dependent libraries, and in which our libraries should be installed.])]) # _LT_ENABLE_LOCK # --------------- m4_defun([_LT_ENABLE_LOCK], [AC_ARG_ENABLE([libtool-lock], [AS_HELP_STRING([--disable-libtool-lock], [avoid locking (might break parallel builds)])]) test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE="32" ;; *ELF-64*) HPUX_IA64_MODE="64" ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out which ABI we are using. echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then if test "$lt_cv_prog_gnu_ld" = yes; then case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; ppc*-*linux*|powerpc*-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*|s390*-*tpf*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -belf" AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, [AC_LANG_PUSH(C) AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) AC_LANG_POP]) if test x"$lt_cv_cc_needs_belf" != x"yes"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS="$SAVE_CFLAGS" fi ;; *-*solaris*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) case $host in i?86-*-solaris*) LD="${LD-ld} -m elf_x86_64" ;; sparc*-*-solaris*) LD="${LD-ld} -m elf64_sparc" ;; esac # GNU ld 2.21 introduced _sol2 emulations. Use them if available. if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then LD="${LD-ld}_sol2" fi ;; *) if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then LD="${LD-ld} -64" fi ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks="$enable_libtool_lock" ])# _LT_ENABLE_LOCK # _LT_PROG_AR # ----------- m4_defun([_LT_PROG_AR], [AC_CHECK_TOOLS(AR, [ar], false) : ${AR=ar} : ${AR_FLAGS=cru} _LT_DECL([], [AR], [1], [The archiver]) _LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], [lt_cv_ar_at_file=no AC_COMPILE_IFELSE([AC_LANG_PROGRAM], [echo conftest.$ac_objext > conftest.lst lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' AC_TRY_EVAL([lt_ar_try]) if test "$ac_status" -eq 0; then # Ensure the archiver fails upon bogus file names. rm -f conftest.$ac_objext libconftest.a AC_TRY_EVAL([lt_ar_try]) if test "$ac_status" -ne 0; then lt_cv_ar_at_file=@ fi fi rm -f conftest.* libconftest.a ]) ]) if test "x$lt_cv_ar_at_file" = xno; then archiver_list_spec= else archiver_list_spec=$lt_cv_ar_at_file fi _LT_DECL([], [archiver_list_spec], [1], [How to feed a file listing to the archiver]) ])# _LT_PROG_AR # _LT_CMD_OLD_ARCHIVE # ------------------- m4_defun([_LT_CMD_OLD_ARCHIVE], [_LT_PROG_AR AC_CHECK_TOOL(STRIP, strip, :) test -z "$STRIP" && STRIP=: _LT_DECL([], [STRIP], [1], [A symbol stripping program]) AC_CHECK_TOOL(RANLIB, ranlib, :) test -z "$RANLIB" && RANLIB=: _LT_DECL([], [RANLIB], [1], [Commands used to install an old-style archive]) # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in darwin*) lock_old_archive_extraction=yes ;; *) lock_old_archive_extraction=no ;; esac _LT_DECL([], [old_postinstall_cmds], [2]) _LT_DECL([], [old_postuninstall_cmds], [2]) _LT_TAGDECL([], [old_archive_cmds], [2], [Commands used to build an old-style archive]) _LT_DECL([], [lock_old_archive_extraction], [0], [Whether to use a lock for old archive extraction]) ])# _LT_CMD_OLD_ARCHIVE # _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------------------- # Check whether the given compiler option works AC_DEFUN([_LT_COMPILER_OPTION], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$3" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi fi $RM conftest* ]) if test x"[$]$2" = xyes; then m4_if([$5], , :, [$5]) else m4_if([$6], , :, [$6]) fi ])# _LT_COMPILER_OPTION # Old name: AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) # _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------- # Check whether the given linker option works AC_DEFUN([_LT_LINKER_OPTION], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $3" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&AS_MESSAGE_LOG_FD $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi else $2=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" ]) if test x"[$]$2" = xyes; then m4_if([$4], , :, [$4]) else m4_if([$5], , :, [$5]) fi ])# _LT_LINKER_OPTION # Old name: AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) # LT_CMD_MAX_LEN #--------------- AC_DEFUN([LT_CMD_MAX_LEN], [AC_REQUIRE([AC_CANONICAL_HOST])dnl # find the maximum length of command line arguments AC_MSG_CHECKING([the maximum length of command line arguments]) AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl i=0 teststring="ABCD" case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; mint*) # On MiNT this can take a long time and run out of memory. lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; os2*) # The test takes a long time on OS/2. lt_cv_sys_max_cmd_len=8192 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else # Make teststring a little bigger before we do anything with it. # a 1K string should be a reasonable start. for i in 1 2 3 4 5 6 7 8 ; do teststring=$teststring$teststring done SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} # If test is not a shell built-in, we'll probably end up computing a # maximum length that is only half of the actual maximum length, but # we can't tell. while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ = "X$teststring$teststring"; } >/dev/null 2>&1 && test $i != 17 # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done # Only check the string length outside the loop. lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` teststring= # Add a significant safety factor because C++ compilers can tack on # massive amounts of additional arguments before passing them to the # linker. It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac ]) if test -n $lt_cv_sys_max_cmd_len ; then AC_MSG_RESULT($lt_cv_sys_max_cmd_len) else AC_MSG_RESULT(none) fi max_cmd_len=$lt_cv_sys_max_cmd_len _LT_DECL([], [max_cmd_len], [0], [What is the maximum length of a command?]) ])# LT_CMD_MAX_LEN # Old name: AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) # _LT_HEADER_DLFCN # ---------------- m4_defun([_LT_HEADER_DLFCN], [AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl ])# _LT_HEADER_DLFCN # _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, # ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) # ---------------------------------------------------------------- m4_defun([_LT_TRY_DLOPEN_SELF], [m4_require([_LT_HEADER_DLFCN])dnl if test "$cross_compiling" = yes; then : [$4] else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF [#line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisbility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; }] _LT_EOF if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) $1 ;; x$lt_dlneed_uscore) $2 ;; x$lt_dlunknown|x*) $3 ;; esac else : # compilation failed $3 fi fi rm -fr conftest* ])# _LT_TRY_DLOPEN_SELF # LT_SYS_DLOPEN_SELF # ------------------ AC_DEFUN([LT_SYS_DLOPEN_SELF], [m4_require([_LT_HEADER_DLFCN])dnl if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen="load_add_on" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32* | cegcc*) lt_cv_dlopen="LoadLibrary" lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen="dlopen" lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ lt_cv_dlopen="dyld" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ]) ;; *) AC_CHECK_FUNC([shl_load], [lt_cv_dlopen="shl_load"], [AC_CHECK_LIB([dld], [shl_load], [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], [AC_CHECK_FUNC([dlopen], [lt_cv_dlopen="dlopen"], [AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], [AC_CHECK_LIB([svld], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], [AC_CHECK_LIB([dld], [dld_link], [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) ]) ]) ]) ]) ]) ;; esac if test "x$lt_cv_dlopen" != xno; then enable_dlopen=yes else enable_dlopen=no fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS="$CPPFLAGS" test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS="$LDFLAGS" wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS="$LIBS" LIBS="$lt_cv_dlopen_libs $LIBS" AC_CACHE_CHECK([whether a program can dlopen itself], lt_cv_dlopen_self, [dnl _LT_TRY_DLOPEN_SELF( lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) ]) if test "x$lt_cv_dlopen_self" = xyes; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" AC_CACHE_CHECK([whether a statically linked program can dlopen itself], lt_cv_dlopen_self_static, [dnl _LT_TRY_DLOPEN_SELF( lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) ]) fi CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi _LT_DECL([dlopen_support], [enable_dlopen], [0], [Whether dlopen is supported]) _LT_DECL([dlopen_self], [enable_dlopen_self], [0], [Whether dlopen of programs is supported]) _LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], [Whether dlopen of statically linked programs is supported]) ])# LT_SYS_DLOPEN_SELF # Old name: AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) # _LT_COMPILER_C_O([TAGNAME]) # --------------------------- # Check to see if options -c and -o are simultaneously supported by compiler. # This macro does not hard code the compiler like AC_PROG_CC_C_O. m4_defun([_LT_COMPILER_C_O], [m4_require([_LT_DECL_SED])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_TAG_COMPILER])dnl AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes fi fi chmod u+w . 2>&AS_MESSAGE_LOG_FD $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* ]) _LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], [Does compiler simultaneously support -c and -o options?]) ])# _LT_COMPILER_C_O # _LT_COMPILER_FILE_LOCKS([TAGNAME]) # ---------------------------------- # Check to see if we can do hard links to lock some files if needed m4_defun([_LT_COMPILER_FILE_LOCKS], [m4_require([_LT_ENABLE_LOCK])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl _LT_COMPILER_C_O([$1]) hard_links="nottested" if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user AC_MSG_CHECKING([if we can lock with hard links]) hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no AC_MSG_RESULT([$hard_links]) if test "$hard_links" = no; then AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) need_locks=warn fi else need_locks=no fi _LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) ])# _LT_COMPILER_FILE_LOCKS # _LT_CHECK_OBJDIR # ---------------- m4_defun([_LT_CHECK_OBJDIR], [AC_CACHE_CHECK([for objdir], [lt_cv_objdir], [rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null]) objdir=$lt_cv_objdir _LT_DECL([], [objdir], [0], [The name of the directory that contains temporary libtool files])dnl m4_pattern_allow([LT_OBJDIR])dnl AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/", [Define to the sub-directory in which libtool stores uninstalled libraries.]) ])# _LT_CHECK_OBJDIR # _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) # -------------------------------------- # Check hardcoding attributes. m4_defun([_LT_LINKER_HARDCODE_LIBPATH], [AC_MSG_CHECKING([how to hardcode library paths into programs]) _LT_TAGVAR(hardcode_action, $1)= if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || test -n "$_LT_TAGVAR(runpath_var, $1)" || test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then # We can hardcode non-existent directories. if test "$_LT_TAGVAR(hardcode_direct, $1)" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no && test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then # Linking always hardcodes the temporary library directory. _LT_TAGVAR(hardcode_action, $1)=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. _LT_TAGVAR(hardcode_action, $1)=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. _LT_TAGVAR(hardcode_action, $1)=unsupported fi AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) if test "$_LT_TAGVAR(hardcode_action, $1)" = relink || test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi _LT_TAGDECL([], [hardcode_action], [0], [How to hardcode a shared library path into an executable]) ])# _LT_LINKER_HARDCODE_LIBPATH # _LT_CMD_STRIPLIB # ---------------- m4_defun([_LT_CMD_STRIPLIB], [m4_require([_LT_DECL_EGREP]) striplib= old_striplib= AC_MSG_CHECKING([whether stripping libraries is possible]) if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" test -z "$striplib" && striplib="$STRIP --strip-unneeded" AC_MSG_RESULT([yes]) else # FIXME - insert some real tests, host_os isn't really good enough case $host_os in darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" old_striplib="$STRIP -S" AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) fi ;; *) AC_MSG_RESULT([no]) ;; esac fi _LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) _LT_DECL([], [striplib], [1]) ])# _LT_CMD_STRIPLIB # _LT_SYS_DYNAMIC_LINKER([TAG]) # ----------------------------- # PORTME Fill in your ld.so characteristics m4_defun([_LT_SYS_DYNAMIC_LINKER], [AC_REQUIRE([AC_CANONICAL_HOST])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_OBJDUMP])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl AC_MSG_CHECKING([dynamic linker characteristics]) m4_if([$1], [], [ if test "$GCC" = yes; then case $host_os in darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; *) lt_awk_arg="/^libraries:/" ;; esac case $host_os in mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;; *) lt_sed_strip_eq="s,=/,/,g" ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` case $lt_search_path_spec in *\;*) # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` ;; *) lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` ;; esac # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary. lt_tmp_lt_search_path_spec= lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path/$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" else test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' BEGIN {RS=" "; FS="/|\n";} { lt_foo=""; lt_count=0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo="/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[[lt_foo]]++; } if (lt_freq[[lt_foo]] == 1) { print lt_foo; } }'` # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's,/\([[A-Za-z]]:\),\1,g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi]) library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix[[4-9]]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[[01]] | aix4.[[01]].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[[45]]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl*) # Native MSVC libname_spec='$name' soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' library_names_spec='${libname}.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec="$LIB" if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC wrapper library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[[23]].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[[01]]* | freebsdelf3.[[01]]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=yes sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[[3-9]]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], [lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], [lt_cv_shlibpath_overrides_runpath=yes])]) LDFLAGS=$save_LDFLAGS libdir=$save_libdir ]) shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Add ABI-specific directories to the system library path. sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[[89]] | openbsd2.[[89]].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac AC_MSG_RESULT([$dynamic_linker]) test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" fi if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" fi _LT_DECL([], [variables_saved_for_relink], [1], [Variables whose values should be saved in libtool wrapper scripts and restored at link time]) _LT_DECL([], [need_lib_prefix], [0], [Do we need the "lib" prefix for modules?]) _LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) _LT_DECL([], [version_type], [0], [Library versioning type]) _LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) _LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) _LT_DECL([], [shlibpath_overrides_runpath], [0], [Is shlibpath searched before the hard-coded library search path?]) _LT_DECL([], [libname_spec], [1], [Format of library name prefix]) _LT_DECL([], [library_names_spec], [1], [[List of archive names. First name is the real one, the rest are links. The last name is the one that the linker finds with -lNAME]]) _LT_DECL([], [soname_spec], [1], [[The coded name of the library, if different from the real name]]) _LT_DECL([], [install_override_mode], [1], [Permission mode override for installation of shared libraries]) _LT_DECL([], [postinstall_cmds], [2], [Command to use after installation of a shared archive]) _LT_DECL([], [postuninstall_cmds], [2], [Command to use after uninstallation of a shared archive]) _LT_DECL([], [finish_cmds], [2], [Commands used to finish a libtool library installation in a directory]) _LT_DECL([], [finish_eval], [1], [[As "finish_cmds", except a single script fragment to be evaled but not shown]]) _LT_DECL([], [hardcode_into_libs], [0], [Whether we should hardcode library paths into libraries]) _LT_DECL([], [sys_lib_search_path_spec], [2], [Compile-time system search path for libraries]) _LT_DECL([], [sys_lib_dlsearch_path_spec], [2], [Run-time system search path for libraries]) ])# _LT_SYS_DYNAMIC_LINKER # _LT_PATH_TOOL_PREFIX(TOOL) # -------------------------- # find a file program which can recognize shared library AC_DEFUN([_LT_PATH_TOOL_PREFIX], [m4_require([_LT_DECL_EGREP])dnl AC_MSG_CHECKING([for $1]) AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, [case $MAGIC_CMD in [[\\/*] | ?:[\\/]*]) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR dnl $ac_dummy forces splitting on constant user-supplied paths. dnl POSIX.2 word splitting is done only on the output of word expansions, dnl not every word. This closes a longstanding sh security hole. ac_dummy="m4_if([$2], , $PATH, [$2])" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$1; then lt_cv_path_MAGIC_CMD="$ac_dir/$1" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac]) MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then AC_MSG_RESULT($MAGIC_CMD) else AC_MSG_RESULT(no) fi _LT_DECL([], [MAGIC_CMD], [0], [Used to examine libraries when file_magic_cmd begins with "file"])dnl ])# _LT_PATH_TOOL_PREFIX # Old name: AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) # _LT_PATH_MAGIC # -------------- # find a file program which can recognize a shared library m4_defun([_LT_PATH_MAGIC], [_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) else MAGIC_CMD=: fi fi ])# _LT_PATH_MAGIC # LT_PATH_LD # ---------- # find the pathname to the GNU or non-GNU linker AC_DEFUN([LT_PATH_LD], [AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_PROG_ECHO_BACKSLASH])dnl AC_ARG_WITH([gnu-ld], [AS_HELP_STRING([--with-gnu-ld], [assume the C compiler uses GNU ld @<:@default=no@:>@])], [test "$withval" = no || with_gnu_ld=yes], [with_gnu_ld=no])dnl ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. AC_MSG_CHECKING([for ld used by $CC]) case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [[\\/]]* | ?:[[\\/]]*) re_direlt='/[[^/]][[^/]]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then AC_MSG_CHECKING([for GNU ld]) else AC_MSG_CHECKING([for non-GNU ld]) fi AC_CACHE_VAL(lt_cv_path_LD, [if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &1 /dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else # Keep this pattern in sync with the one in func_win32_libid. lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; cegcc*) # use the weaker test based on 'objdump'. See mingw*. lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' lt_cv_file_magic_cmd='$OBJDUMP -f' ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; gnu*) lt_cv_deplibs_check_method=pass_all ;; haiku*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=/usr/bin/file case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[[3-9]]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) lt_cv_deplibs_check_method=pass_all ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; *nto* | *qnx*) lt_cv_deplibs_check_method=pass_all ;; openbsd*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; tpf*) lt_cv_deplibs_check_method=pass_all ;; esac ]) file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` fi ;; esac fi file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown _LT_DECL([], [deplibs_check_method], [1], [Method to check whether dependent libraries are shared objects]) _LT_DECL([], [file_magic_cmd], [1], [Command to use when deplibs_check_method = "file_magic"]) _LT_DECL([], [file_magic_glob], [1], [How to find potential files when deplibs_check_method = "file_magic"]) _LT_DECL([], [want_nocaseglob], [1], [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) ])# _LT_CHECK_MAGIC_METHOD # LT_PATH_NM # ---------- # find the pathname to a BSD- or MS-compatible name lister AC_DEFUN([LT_PATH_NM], [AC_REQUIRE([AC_PROG_CC])dnl AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, [if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. tmp_nm="$ac_dir/$lt_tmp_nm" if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then # Check to see if the nm accepts a BSD-compat flag. # Adding the `sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in */dev/null* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS="$lt_save_ifs" done : ${lt_cv_path_NM=no} fi]) if test "$lt_cv_path_NM" != "no"; then NM="$lt_cv_path_NM" else # Didn't find any BSD compatible name lister, look for dumpbin. if test -n "$DUMPBIN"; then : # Let the user override the test. else AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols" ;; *) DUMPBIN=: ;; esac fi AC_SUBST([DUMPBIN]) if test "$DUMPBIN" != ":"; then NM="$DUMPBIN" fi fi test -z "$NM" && NM=nm AC_SUBST([NM]) _LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], [lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&AS_MESSAGE_LOG_FD (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&AS_MESSAGE_LOG_FD (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) cat conftest.out >&AS_MESSAGE_LOG_FD if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi rm -f conftest*]) ])# LT_PATH_NM # Old names: AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_PROG_NM], []) dnl AC_DEFUN([AC_PROG_NM], []) # _LT_CHECK_SHAREDLIB_FROM_LINKLIB # -------------------------------- # how to determine the name of the shared library # associated with a specific link library. # -- PORTME fill in with the dynamic library characteristics m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], [m4_require([_LT_DECL_EGREP]) m4_require([_LT_DECL_OBJDUMP]) m4_require([_LT_DECL_DLLTOOL]) AC_CACHE_CHECK([how to associate runtime and link libraries], lt_cv_sharedlib_from_linklib_cmd, [lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh # decide which to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in *--identify-strict*) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib ;; *) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback ;; esac ;; *) # fallback: assume linklib IS sharedlib lt_cv_sharedlib_from_linklib_cmd="$ECHO" ;; esac ]) sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO _LT_DECL([], [sharedlib_from_linklib_cmd], [1], [Command to associate shared and link libraries]) ])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB # _LT_PATH_MANIFEST_TOOL # ---------------------- # locate the manifest tool m4_defun([_LT_PATH_MANIFEST_TOOL], [AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], [lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&AS_MESSAGE_LOG_FD if $GREP 'Manifest Tool' conftest.out > /dev/null; then lt_cv_path_mainfest_tool=yes fi rm -f conftest*]) if test "x$lt_cv_path_mainfest_tool" != xyes; then MANIFEST_TOOL=: fi _LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl ])# _LT_PATH_MANIFEST_TOOL # LT_LIB_M # -------- # check for math library AC_DEFUN([LT_LIB_M], [AC_REQUIRE([AC_CANONICAL_HOST])dnl LIBM= case $host in *-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) # These system don't have libm, or don't need it ;; *-ncr-sysv4.3*) AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") ;; *) AC_CHECK_LIB(m, cos, LIBM="-lm") ;; esac AC_SUBST([LIBM]) ])# LT_LIB_M # Old name: AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_CHECK_LIBM], []) # _LT_COMPILER_NO_RTTI([TAGNAME]) # ------------------------------- m4_defun([_LT_COMPILER_NO_RTTI], [m4_require([_LT_TAG_COMPILER])dnl _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= if test "$GCC" = yes; then case $cc_basename in nvcc*) _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; *) _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; esac _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], lt_cv_prog_compiler_rtti_exceptions, [-fno-rtti -fno-exceptions], [], [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) fi _LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], [Compiler flag to turn off builtin functions]) ])# _LT_COMPILER_NO_RTTI # _LT_CMD_GLOBAL_SYMBOLS # ---------------------- m4_defun([_LT_CMD_GLOBAL_SYMBOLS], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([LT_PATH_NM])dnl AC_REQUIRE([LT_PATH_LD])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_TAG_COMPILER])dnl # Check for command to grab the raw symbol name followed by C symbol from nm. AC_MSG_CHECKING([command to parse $NM output from $compiler object]) AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [ # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[[BCDEGRST]]' # Regexp to match symbols that can be accessed directly from C. sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' # Define system-specific variables. case $host_os in aix*) symcode='[[BCDT]]' ;; cygwin* | mingw* | pw32* | cegcc*) symcode='[[ABCDGISTW]]' ;; hpux*) if test "$host_cpu" = ia64; then symcode='[[ABCDEGRST]]' fi ;; irix* | nonstopux*) symcode='[[BCDEGRST]]' ;; osf*) symcode='[[BCDEGQRST]]' ;; solaris*) symcode='[[BDRT]]' ;; sco3.2v5*) symcode='[[DT]]' ;; sysv4.2uw2*) symcode='[[DT]]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[[ABDT]]' ;; sysv4) symcode='[[DFNSTU]]' ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[[ABCDGIRSTW]]' ;; esac # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'" lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'" # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # Try without a prefix underscore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function # and D for any global variable. # Also find C++ and __fastcall symbols from MSVC++, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK ['"\ " {last_section=section; section=\$ 3};"\ " /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ " /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ " \$ 0!~/External *\|/{next};"\ " / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ " {if(hide[section]) next};"\ " {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ " {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ " s[1]~/^[@?]/{print s[1], s[1]; next};"\ " s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx]" else lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <<_LT_EOF #ifdef __cplusplus extern "C" { #endif char nm_test_var; void nm_test_func(void); void nm_test_func(void){} #ifdef __cplusplus } #endif int main(){nm_test_var='a';nm_test_func();return(0);} _LT_EOF if AC_TRY_EVAL(ac_compile); then # Now try to grab the symbols. nlist=conftest.nm if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if $GREP ' nm_test_var$' "$nlist" >/dev/null; then if $GREP ' nm_test_func$' "$nlist" >/dev/null; then cat <<_LT_EOF > conftest.$ac_ext /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) /* DATA imports from DLLs on WIN32 con't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT@&t@_DLSYM_CONST #elif defined(__osf__) /* This system does not cope well with relocations in const data. */ # define LT@&t@_DLSYM_CONST #else # define LT@&t@_DLSYM_CONST const #endif #ifdef __cplusplus extern "C" { #endif _LT_EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' cat <<_LT_EOF >> conftest.$ac_ext /* The mapping between symbol names and symbols. */ LT@&t@_DLSYM_CONST struct { const char *name; void *address; } lt__PROGRAM__LTX_preloaded_symbols[[]] = { { "@PROGRAM@", (void *) 0 }, _LT_EOF $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext cat <<\_LT_EOF >> conftest.$ac_ext {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt__PROGRAM__LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif _LT_EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_globsym_save_LIBS=$LIBS lt_globsym_save_CFLAGS=$CFLAGS LIBS="conftstm.$ac_objext" CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then pipe_works=yes fi LIBS=$lt_globsym_save_LIBS CFLAGS=$lt_globsym_save_CFLAGS else echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD fi else echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD cat conftest.$ac_ext >&5 fi rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then break else lt_cv_sys_global_symbol_pipe= fi done ]) if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then AC_MSG_RESULT(failed) else AC_MSG_RESULT(ok) fi # Response file support. if test "$lt_cv_nm_interface" = "MS dumpbin"; then nm_file_list_spec='@' elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then nm_file_list_spec='@' fi _LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], [Take the output of nm and produce a listing of raw symbols and C names]) _LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], [Transform the output of nm in a proper C declaration]) _LT_DECL([global_symbol_to_c_name_address], [lt_cv_sys_global_symbol_to_c_name_address], [1], [Transform the output of nm in a C name address pair]) _LT_DECL([global_symbol_to_c_name_address_lib_prefix], [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], [Transform the output of nm in a C name address pair when lib prefix is needed]) _LT_DECL([], [nm_file_list_spec], [1], [Specify filename containing input files for $NM]) ]) # _LT_CMD_GLOBAL_SYMBOLS # _LT_COMPILER_PIC([TAGNAME]) # --------------------------- m4_defun([_LT_COMPILER_PIC], [m4_require([_LT_TAG_COMPILER])dnl _LT_TAGVAR(lt_prog_compiler_wl, $1)= _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)= m4_if([$1], [CXX], [ # C++ specific cases for pic, static, wl, etc. if test "$GXX" = yes; then _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all _LT_TAGVAR(lt_prog_compiler_pic, $1)= ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. _LT_TAGVAR(lt_prog_compiler_static, $1)= ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac else case $host_os in aix[[4-9]]*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; dgux*) case $cc_basename in ec++*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; ghcx*) # Green Hills C++ Compiler _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; freebsd* | dragonfly*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' if test "$host_cpu" != ia64; then _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' fi ;; aCC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in KCC*) # KAI C++ Compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; ecpc* ) # old Intel C++ for x86_64 which still supported -KPIC. _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; icpc* ) # Intel C++, used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; pgCC* | pgcpp*) # Portland Group C++ compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL 8.0, 9.0 on PPC and BlueGene _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' ;; *) ;; esac ;; netbsd*) ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; cxx*) # Digital/Compaq C++ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; lcc*) # Lucid _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; *) ;; esac ;; vxworks*) ;; *) _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ], [ if test "$GCC" = yes; then _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. _LT_TAGVAR(lt_prog_compiler_static, $1)= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; hpux9* | hpux10* | hpux11*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC (with -KPIC) is the default. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in # old Intel for x86_64 which still supported -KPIC. ecc*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # Lahey Fortran 8.1. lf95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' ;; nagfor*) # NAG Fortran compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; ccc*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All Alpha code is PIC. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='' ;; *Sun\ F* | *Sun*Fortran*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' ;; *Intel*\ [[CF]]*Compiler*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; *Portland\ Group*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; esac ;; newsos6) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; osf3* | osf4* | osf5*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All OSF/1 code is PIC. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; rdos*) _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; solaris*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; *) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; esac ;; sunos4*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; unicos*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; uts4*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *) _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ]) case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) _LT_TAGVAR(lt_prog_compiler_pic, $1)= ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" ;; esac AC_CACHE_CHECK([for $compiler option to produce PIC], [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) _LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) # # Check to make sure the PIC flag actually works. # if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in "" | " "*) ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; esac], [_LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) fi _LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], [Additional compiler flags for building library objects]) _LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], [How to pass a linker flag through the compiler]) # # Check to make sure the static flag actually works. # wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" _LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), $lt_tmp_static_flag, [], [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) _LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], [Compiler flag to prevent dynamic linking]) ])# _LT_COMPILER_PIC # _LT_LINKER_SHLIBS([TAGNAME]) # ---------------------------- # See if the linker supports building shared libraries. m4_defun([_LT_LINKER_SHLIBS], [AC_REQUIRE([LT_PATH_LD])dnl AC_REQUIRE([LT_PATH_NM])dnl m4_require([_LT_PATH_MANIFEST_TOOL])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl m4_require([_LT_TAG_COMPILER])dnl AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) m4_if([$1], [CXX], [ _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] case $host_os in aix[[4-9]]*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global defined # symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi ;; pw32*) _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" ;; cygwin* | mingw* | cegcc*) case $cc_basename in cl*) _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] ;; esac ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac ], [ runpath_var= _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_cmds, $1)= _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(compiler_needs_object, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(old_archive_from_new_cmds, $1)= _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= _LT_TAGVAR(thread_safe_flag_spec, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list _LT_TAGVAR(include_expsyms, $1)= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. dnl Note also adjust exclude_expsyms for C++ above. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; esac _LT_TAGVAR(ld_shlibs, $1)=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test "$with_gnu_ld" = yes; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test "$lt_use_gnu_ld_interface" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= fi supports_anon_versioning=no case `$LD -v 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[[3-9]]*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='' ;; m68k) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes ;; interix[[3-9]]*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test "$host_os" = linux-dietlibc; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test "$tmp_diet" = no then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 _LT_TAGVAR(whole_archive_flag_spec, $1)= tmp_sharedflag='--shared' ;; xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi case $cc_basename in xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test "x$supports_anon_versioning" = xyes; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; sunos4*) _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then runpath_var= _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. _LT_TAGVAR(hardcode_minus_L, $1)=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. _LT_TAGVAR(hardcode_direct, $1)=unsupported fi ;; aix[[4-9]]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global # defined symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_TAGVAR(archive_cmds, $1)='' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' if test "$GCC" = yes; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. _LT_TAGVAR(always_export_symbols, $1)=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' fi _LT_TAGVAR(archive_cmds_need_lc, $1)=yes # This is similar to how AIX traditionally builds its shared libraries. _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='' ;; m68k) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac ;; bsdi[[45]]*) _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl*) # Native MSVC _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC wrapper _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' # FIXME: Should let the user specify the lib program. _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes ;; esac ;; darwin* | rhapsody*) _LT_DARWIN_LINKER_FEATURES($1) ;; dgux*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; hpux9*) if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_direct, $1)=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; hpux10*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes fi ;; hpux11*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) m4_if($1, [], [ # Older versions of the 11.00 compiler do not understand -b yet # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) _LT_LINKER_OPTION([if $CC understands -b], _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) ;; esac fi if test "$with_gnu_ld" = no; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], [lt_cv_irix_exported_symbol], [save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" AC_LINK_IFELSE( [AC_LANG_SOURCE( [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], [C++], [[int foo (void) { return 0; }]], [Fortran 77], [[ subroutine foo end]], [Fortran], [[ subroutine foo end]])])], [lt_cv_irix_exported_symbol=yes], [lt_cv_irix_exported_symbol=no]) LDFLAGS="$save_LDFLAGS"]) if test "$lt_cv_irix_exported_symbol" = yes; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' fi else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(inherit_rpath, $1)=yes _LT_TAGVAR(link_all_deplibs, $1)=yes ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; newsos6) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *nto* | *qnx*) ;; openbsd*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' else case $host_os in openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' ;; esac fi else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; os2*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' else _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_separator, $1)=: ;; solaris*) _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' if test "$GCC" = yes; then wlarc='${wl}' _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='${wl}' _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' fi ;; esac _LT_TAGVAR(link_all_deplibs, $1)=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4) case $host_vendor in sni) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' _LT_TAGVAR(hardcode_direct, $1)=no ;; motorola) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4.3*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes _LT_TAGVAR(ld_shlibs, $1)=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(ld_shlibs, $1)=no ;; esac if test x$host_vendor = xsni; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym' ;; esac fi fi ]) AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no _LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld _LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl _LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl _LT_DECL([], [extract_expsyms_cmds], [2], [The commands to extract the exported symbol list from a shared archive]) # # Do we need to explicitly link libc? # case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in x|xyes) # Assume -lc should be added _LT_TAGVAR(archive_cmds_need_lc, $1)=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $_LT_TAGVAR(archive_cmds, $1) in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. AC_CACHE_CHECK([whether -lc should be explicitly linked in], [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), [$RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if AC_TRY_EVAL(ac_compile) 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) _LT_TAGVAR(allow_undefined_flag, $1)= if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) then lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no else lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes fi _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* ]) _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) ;; esac fi ;; esac _LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], [Whether or not to add -lc for building shared libraries]) _LT_TAGDECL([allow_libtool_libs_with_static_runtimes], [enable_shared_with_static_runtimes], [0], [Whether or not to disallow shared libs when runtime libs are static]) _LT_TAGDECL([], [export_dynamic_flag_spec], [1], [Compiler flag to allow reflexive dlopens]) _LT_TAGDECL([], [whole_archive_flag_spec], [1], [Compiler flag to generate shared objects directly from archives]) _LT_TAGDECL([], [compiler_needs_object], [1], [Whether the compiler copes with passing no objects directly]) _LT_TAGDECL([], [old_archive_from_new_cmds], [2], [Create an old-style archive from a shared archive]) _LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], [Create a temporary old-style archive to link instead of a shared archive]) _LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) _LT_TAGDECL([], [archive_expsym_cmds], [2]) _LT_TAGDECL([], [module_cmds], [2], [Commands used to build a loadable module if different from building a shared archive.]) _LT_TAGDECL([], [module_expsym_cmds], [2]) _LT_TAGDECL([], [with_gnu_ld], [1], [Whether we are building with GNU ld or not]) _LT_TAGDECL([], [allow_undefined_flag], [1], [Flag that allows shared libraries with undefined symbols to be built]) _LT_TAGDECL([], [no_undefined_flag], [1], [Flag that enforces no undefined symbols]) _LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], [Flag to hardcode $libdir into a binary during linking. This must work even if $libdir does not exist]) _LT_TAGDECL([], [hardcode_libdir_separator], [1], [Whether we need a single "-rpath" flag with a separated argument]) _LT_TAGDECL([], [hardcode_direct], [0], [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_direct_absolute], [0], [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the resulting binary and the resulting library dependency is "absolute", i.e impossible to change by setting ${shlibpath_var} if the library is relocated]) _LT_TAGDECL([], [hardcode_minus_L], [0], [Set to "yes" if using the -LDIR flag during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_shlibpath_var], [0], [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_automatic], [0], [Set to "yes" if building a shared library automatically hardcodes DIR into the library and all subsequent libraries and executables linked against it]) _LT_TAGDECL([], [inherit_rpath], [0], [Set to yes if linker adds runtime paths of dependent libraries to runtime path list]) _LT_TAGDECL([], [link_all_deplibs], [0], [Whether libtool must link a program against all its dependency libraries]) _LT_TAGDECL([], [always_export_symbols], [0], [Set to "yes" if exported symbols are required]) _LT_TAGDECL([], [export_symbols_cmds], [2], [The commands to list exported symbols]) _LT_TAGDECL([], [exclude_expsyms], [1], [Symbols that should not be listed in the preloaded symbols]) _LT_TAGDECL([], [include_expsyms], [1], [Symbols that must always be exported]) _LT_TAGDECL([], [prelink_cmds], [2], [Commands necessary for linking programs (against libraries) with templates]) _LT_TAGDECL([], [postlink_cmds], [2], [Commands necessary for finishing linking programs]) _LT_TAGDECL([], [file_list_spec], [1], [Specify filename containing input files]) dnl FIXME: Not yet implemented dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], dnl [Compiler flag to generate thread safe objects]) ])# _LT_LINKER_SHLIBS # _LT_LANG_C_CONFIG([TAG]) # ------------------------ # Ensure that the configuration variables for a C compiler are suitably # defined. These variables are subsequently used by _LT_CONFIG to write # the compiler configuration to `libtool'. m4_defun([_LT_LANG_C_CONFIG], [m4_require([_LT_DECL_EGREP])dnl lt_save_CC="$CC" AC_LANG_PUSH(C) # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' _LT_TAG_COMPILER # Save the default compiler, since it gets overwritten when the other # tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. compiler_DEFAULT=$CC # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) LT_SYS_DLOPEN_SELF _LT_CMD_STRIPLIB # Report which library types will actually be built AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_CONFIG($1) fi AC_LANG_POP CC="$lt_save_CC" ])# _LT_LANG_C_CONFIG # _LT_LANG_CXX_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for a C++ compiler are suitably # defined. These variables are subsequently used by _LT_CONFIG to write # the compiler configuration to `libtool'. m4_defun([_LT_LANG_CXX_CONFIG], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_PATH_MANIFEST_TOOL])dnl if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then AC_PROG_CXXCPP else _lt_caught_CXX_error=yes fi AC_LANG_PUSH(C++) _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(compiler_needs_object, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the CXX compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_caught_CXX_error" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} CFLAGS=$CXXFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) if test -n "$compiler"; then # We don't want -fno-exception when compiling C++ code, so set the # no_builtin_flag separately if test "$GXX" = yes; then _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' else _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= fi if test "$GXX" = yes; then # Set up default GNU C++ configuration LT_PATH_LD # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test "$with_gnu_ld" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='${wl}' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) _LT_TAGVAR(ld_shlibs, $1)=yes case $host_os in aix3*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aix[[4-9]]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_TAGVAR(archive_cmds, $1)='' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' if test "$GXX" = yes; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)= fi esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to # export. _LT_TAGVAR(always_export_symbols, $1)=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an empty # executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' fi _LT_TAGVAR(archive_cmds_need_lc, $1)=yes # This is similar to how AIX traditionally builds its shared # libraries. _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in ,cl* | no,cl*) # Native MSVC # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes # Don't use ranlib _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ func_to_tool_file "$lt_outputfile"~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # g++ # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; darwin* | rhapsody*) _LT_DARWIN_LINKER_FEATURES($1) ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; freebsd2.*) # C++ shared libraries reported to be fairly broken before # switch to ELF _LT_TAGVAR(ld_shlibs, $1)=no ;; freebsd-elf*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; freebsd* | dragonfly*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions _LT_TAGVAR(ld_shlibs, $1)=yes ;; gnu*) ;; haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes ;; hpux9*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aCC*) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes; then _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; hpux10*|hpux11*) if test $with_gnu_ld = no; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) ;; *) _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aCC*) case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes; then if test $with_gnu_ld = no; then case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; interix[[3-9]]*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test "$GXX" = yes; then if test "$with_gnu_ld" = no; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' fi fi _LT_TAGVAR(link_all_deplibs, $1)=yes ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(inherit_rpath, $1)=yes ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc* | ecpc* ) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; esac _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' ;; pgCC* | pgcpp*) # Portland Group C++ compiler case `$CC -V` in *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ $RANLIB $oldlib' _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' ;; *) # Version 6 and above use weak symbols _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' ;; cxx*) # Compaq C++ _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' ;; xl* | mpixl* | bgxl*) # IBM XL 8.0 on PPC, with GNU ld _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; m88k*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; *nto* | *qnx*) _LT_TAGVAR(ld_shlibs, $1)=yes ;; openbsd2*) # C++ shared libraries are fairly broken _LT_TAGVAR(ld_shlibs, $1)=no ;; openbsd*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' fi output_verbose_link_cmd=func_echo_all else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Archives containing C++ object files must be created using # the KAI C++ compiler. case $host in osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; esac ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; cxx*) case $host in osf3*) _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' ;; *) _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ $RM $lib.exp' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' ;; esac _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' case $host in osf3*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_TAGVAR(archive_cmds_need_lc,$1)=yes _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;; esac _LT_TAGVAR(link_all_deplibs, $1)=yes output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' if $CC --version | $GREP -v '^2\.7' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else # g++ 2.7 appears to require `-G' NOT `-shared' on this # platform. _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ '"$_LT_TAGVAR(old_archive_cmds, $1)" _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ '"$_LT_TAGVAR(reload_cmds, $1)" ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no _LT_TAGVAR(GCC, $1)="$GXX" _LT_TAGVAR(LD, $1)="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_SYS_HIDDEN_LIBDEPS($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld fi # test "$_lt_caught_CXX_error" != yes AC_LANG_POP ])# _LT_LANG_CXX_CONFIG # _LT_FUNC_STRIPNAME_CNF # ---------------------- # func_stripname_cnf prefix suffix name # strip PREFIX and SUFFIX off of NAME. # PREFIX and SUFFIX must not contain globbing or regex special # characters, hashes, percent signs, but SUFFIX may contain a leading # dot (in which case that matches only a dot). # # This function is identical to the (non-XSI) version of func_stripname, # except this one can be used by m4 code that may be executed by configure, # rather than the libtool script. m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl AC_REQUIRE([_LT_DECL_SED]) AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) func_stripname_cnf () { case ${2} in .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; esac } # func_stripname_cnf ])# _LT_FUNC_STRIPNAME_CNF # _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) # --------------------------------- # Figure out "hidden" library dependencies from verbose # compiler output when linking a shared library. # Parse the compiler output and extract the necessary # objects, libraries and library flags. m4_defun([_LT_SYS_HIDDEN_LIBDEPS], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl # Dependencies to place before and after the object being linked: _LT_TAGVAR(predep_objects, $1)= _LT_TAGVAR(postdep_objects, $1)= _LT_TAGVAR(predeps, $1)= _LT_TAGVAR(postdeps, $1)= _LT_TAGVAR(compiler_lib_search_path, $1)= dnl we can't use the lt_simple_compile_test_code here, dnl because it contains code intended for an executable, dnl not a library. It's possible we should let each dnl tag define a new lt_????_link_test_code variable, dnl but it's only used here... m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF int a; void foo (void) { a = 0; } _LT_EOF ], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF class Foo { public: Foo (void) { a = 0; } private: int a; }; _LT_EOF ], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer*4 a a=0 return end _LT_EOF ], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer a a=0 return end _LT_EOF ], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF public class foo { private int a; public void bar (void) { a = 0; } }; _LT_EOF ], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF package foo func foo() { } _LT_EOF ]) _lt_libdeps_save_CFLAGS=$CFLAGS case "$CC $CFLAGS " in #( *\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; *\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; *\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; esac dnl Parse the compiler output and extract the necessary dnl objects, libraries and library flags. if AC_TRY_EVAL(ac_compile); then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no for p in `eval "$output_verbose_link_cmd"`; do case ${prev}${p} in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test $p = "-L" || test $p = "-R"; then prev=$p continue fi # Expand the sysroot to ease extracting the directories later. if test -z "$prev"; then case $p in -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; esac fi case $p in =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; esac if test "$pre_test_object_deps_done" = no; then case ${prev} in -L | -R) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}" else _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$_LT_TAGVAR(postdeps, $1)"; then _LT_TAGVAR(postdeps, $1)="${prev}${p}" else _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}" fi fi prev= ;; *.lto.$objext) ;; # Ignore GCC LTO objects *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test "$pre_test_object_deps_done" = no; then if test -z "$_LT_TAGVAR(predep_objects, $1)"; then _LT_TAGVAR(predep_objects, $1)="$p" else _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" fi else if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then _LT_TAGVAR(postdep_objects, $1)="$p" else _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling $1 test program" fi $RM -f confest.$objext CFLAGS=$_lt_libdeps_save_CFLAGS # PORTME: override above test on systems where it is broken m4_if([$1], [CXX], [case $host_os in interix[[3-9]]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. _LT_TAGVAR(predep_objects,$1)= _LT_TAGVAR(postdep_objects,$1)= _LT_TAGVAR(postdeps,$1)= ;; linux*) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac if test "$solaris_use_stlport4" != yes; then _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' fi ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac # Adding this requires a known-good setup of shared libraries for # Sun compiler versions before 5.6, else PIC objects from an old # archive will be linked into the output, leading to subtle bugs. if test "$solaris_use_stlport4" != yes; then _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' fi ;; esac ;; esac ]) case " $_LT_TAGVAR(postdeps, $1) " in *" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; esac _LT_TAGVAR(compiler_lib_search_dirs, $1)= if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` fi _LT_TAGDECL([], [compiler_lib_search_dirs], [1], [The directories searched by this compiler when creating a shared library]) _LT_TAGDECL([], [predep_objects], [1], [Dependencies to place before and after the objects being linked to create a shared library]) _LT_TAGDECL([], [postdep_objects], [1]) _LT_TAGDECL([], [predeps], [1]) _LT_TAGDECL([], [postdeps], [1]) _LT_TAGDECL([], [compiler_lib_search_path], [1], [The library search path used internally by the compiler when linking a shared library]) ])# _LT_SYS_HIDDEN_LIBDEPS # _LT_LANG_F77_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for a Fortran 77 compiler are # suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_F77_CONFIG], [AC_LANG_PUSH(Fortran 77) if test -z "$F77" || test "X$F77" = "Xno"; then _lt_disable_F77=yes fi _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for f77 test sources. ac_ext=f # Object file extension for compiled f77 test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the F77 compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_disable_F77" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${F77-"f77"} CFLAGS=$FFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) GCC=$G77 if test -n "$compiler"; then AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_TAGVAR(GCC, $1)="$G77" _LT_TAGVAR(LD, $1)="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" GCC=$lt_save_GCC CC="$lt_save_CC" CFLAGS="$lt_save_CFLAGS" fi # test "$_lt_disable_F77" != yes AC_LANG_POP ])# _LT_LANG_F77_CONFIG # _LT_LANG_FC_CONFIG([TAG]) # ------------------------- # Ensure that the configuration variables for a Fortran compiler are # suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_FC_CONFIG], [AC_LANG_PUSH(Fortran) if test -z "$FC" || test "X$FC" = "Xno"; then _lt_disable_FC=yes fi _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for fc test sources. ac_ext=${ac_fc_srcext-f} # Object file extension for compiled fc test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the FC compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_disable_FC" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${FC-"f95"} CFLAGS=$FCFLAGS compiler=$CC GCC=$ac_cv_fc_compiler_gnu _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) if test -n "$compiler"; then AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu" _LT_TAGVAR(LD, $1)="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_SYS_HIDDEN_LIBDEPS($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS fi # test "$_lt_disable_FC" != yes AC_LANG_POP ])# _LT_LANG_FC_CONFIG # _LT_LANG_GCJ_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for the GNU Java Compiler compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_GCJ_CONFIG], [AC_REQUIRE([LT_PROG_GCJ])dnl AC_LANG_SAVE # Source file extension for Java test sources. ac_ext=java # Object file extension for compiled Java test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="class foo {}" # Code to be used in simple link tests lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC=yes CC=${GCJ-"gcj"} CFLAGS=$GCJFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_TAGVAR(LD, $1)="$LD" _LT_CC_BASENAME([$compiler]) # GCJ did not exist at the time GCC didn't implicitly link libc in. _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi AC_LANG_RESTORE GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_GCJ_CONFIG # _LT_LANG_GO_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for the GNU Go compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_GO_CONFIG], [AC_REQUIRE([LT_PROG_GO])dnl AC_LANG_SAVE # Source file extension for Go test sources. ac_ext=go # Object file extension for compiled Go test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="package main; func main() { }" # Code to be used in simple link tests lt_simple_link_test_code='package main; func main() { }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC=yes CC=${GOC-"gccgo"} CFLAGS=$GOFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_TAGVAR(LD, $1)="$LD" _LT_CC_BASENAME([$compiler]) # Go did not exist at the time GCC didn't implicitly link libc in. _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi AC_LANG_RESTORE GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_GO_CONFIG # _LT_LANG_RC_CONFIG([TAG]) # ------------------------- # Ensure that the configuration variables for the Windows resource compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_RC_CONFIG], [AC_REQUIRE([LT_PROG_RC])dnl AC_LANG_SAVE # Source file extension for RC test sources. ac_ext=rc # Object file extension for compiled RC test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' # Code to be used in simple link tests lt_simple_link_test_code="$lt_simple_compile_test_code" # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC= CC=${RC-"windres"} CFLAGS= compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes if test -n "$compiler"; then : _LT_CONFIG($1) fi GCC=$lt_save_GCC AC_LANG_RESTORE CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_RC_CONFIG # LT_PROG_GCJ # ----------- AC_DEFUN([LT_PROG_GCJ], [m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], [AC_CHECK_TOOL(GCJ, gcj,) test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" AC_SUBST(GCJFLAGS)])])[]dnl ]) # Old name: AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_GCJ], []) # LT_PROG_GO # ---------- AC_DEFUN([LT_PROG_GO], [AC_CHECK_TOOL(GOC, gccgo,) ]) # LT_PROG_RC # ---------- AC_DEFUN([LT_PROG_RC], [AC_CHECK_TOOL(RC, windres,) ]) # Old name: AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_RC], []) # _LT_DECL_EGREP # -------------- # If we don't have a new enough Autoconf to choose the best grep # available, choose the one first in the user's PATH. m4_defun([_LT_DECL_EGREP], [AC_REQUIRE([AC_PROG_EGREP])dnl AC_REQUIRE([AC_PROG_FGREP])dnl test -z "$GREP" && GREP=grep _LT_DECL([], [GREP], [1], [A grep program that handles long lines]) _LT_DECL([], [EGREP], [1], [An ERE matcher]) _LT_DECL([], [FGREP], [1], [A literal string matcher]) dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too AC_SUBST([GREP]) ]) # _LT_DECL_OBJDUMP # -------------- # If we don't have a new enough Autoconf to choose the best objdump # available, choose the one first in the user's PATH. m4_defun([_LT_DECL_OBJDUMP], [AC_CHECK_TOOL(OBJDUMP, objdump, false) test -z "$OBJDUMP" && OBJDUMP=objdump _LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) AC_SUBST([OBJDUMP]) ]) # _LT_DECL_DLLTOOL # ---------------- # Ensure DLLTOOL variable is set. m4_defun([_LT_DECL_DLLTOOL], [AC_CHECK_TOOL(DLLTOOL, dlltool, false) test -z "$DLLTOOL" && DLLTOOL=dlltool _LT_DECL([], [DLLTOOL], [1], [DLL creation program]) AC_SUBST([DLLTOOL]) ]) # _LT_DECL_SED # ------------ # Check for a fully-functional sed program, that truncates # as few characters as possible. Prefer GNU sed if found. m4_defun([_LT_DECL_SED], [AC_PROG_SED test -z "$SED" && SED=sed Xsed="$SED -e 1s/^X//" _LT_DECL([], [SED], [1], [A sed program that does not truncate output]) _LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], [Sed that helps us avoid accidentally triggering echo(1) options like -n]) ])# _LT_DECL_SED m4_ifndef([AC_PROG_SED], [ ############################################################ # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_SED. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # ############################################################ m4_defun([AC_PROG_SED], [AC_MSG_CHECKING([for a sed that does not truncate output]) AC_CACHE_VAL(lt_cv_path_SED, [# Loop through the user's path and test for sed and gsed. # Then use that list of sed's as ones to test for truncation. as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for lt_ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" fi done done done IFS=$as_save_IFS lt_ac_max=0 lt_ac_count=0 # Add /usr/xpg4/bin/sed as it is typically found on Solaris # along with /bin/sed that truncates output. for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do test ! -f $lt_ac_sed && continue cat /dev/null > conftest.in lt_ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >conftest.in # Check for GNU sed and select it if it is found. if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then lt_cv_path_SED=$lt_ac_sed break fi while true; do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo >>conftest.nl $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break cmp -s conftest.out conftest.nl || break # 10000 chars as input seems more than enough test $lt_ac_count -gt 10 && break lt_ac_count=`expr $lt_ac_count + 1` if test $lt_ac_count -gt $lt_ac_max; then lt_ac_max=$lt_ac_count lt_cv_path_SED=$lt_ac_sed fi done done ]) SED=$lt_cv_path_SED AC_SUBST([SED]) AC_MSG_RESULT([$SED]) ])#AC_PROG_SED ])#m4_ifndef # Old name: AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_SED], []) # _LT_CHECK_SHELL_FEATURES # ------------------------ # Find out whether the shell is Bourne or XSI compatible, # or has some other useful features. m4_defun([_LT_CHECK_SHELL_FEATURES], [AC_MSG_CHECKING([whether the shell understands some XSI constructs]) # Try some XSI features xsi_shell=no ( _lt_dummy="a/b/c" test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ = c,a/b,b/c, \ && eval 'test $(( 1 + 1 )) -eq 2 \ && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ && xsi_shell=yes AC_MSG_RESULT([$xsi_shell]) _LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell']) AC_MSG_CHECKING([whether the shell understands "+="]) lt_shell_append=no ( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \ >/dev/null 2>&1 \ && lt_shell_append=yes AC_MSG_RESULT([$lt_shell_append]) _LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append']) if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then lt_unset=unset else lt_unset=false fi _LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl # test EBCDIC or ASCII case `echo X|tr X '\101'` in A) # ASCII based system # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr lt_SP2NL='tr \040 \012' lt_NL2SP='tr \015\012 \040\040' ;; *) # EBCDIC based system lt_SP2NL='tr \100 \n' lt_NL2SP='tr \r\n \100\100' ;; esac _LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl _LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl ])# _LT_CHECK_SHELL_FEATURES # _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY) # ------------------------------------------------------ # In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and # '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY. m4_defun([_LT_PROG_FUNCTION_REPLACE], [dnl { sed -e '/^$1 ()$/,/^} # $1 /c\ $1 ()\ {\ m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1]) } # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: ]) # _LT_PROG_REPLACE_SHELLFNS # ------------------------- # Replace existing portable implementations of several shell functions with # equivalent extended shell implementations where those features are available.. m4_defun([_LT_PROG_REPLACE_SHELLFNS], [if test x"$xsi_shell" = xyes; then _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl case ${1} in */*) func_dirname_result="${1%/*}${2}" ;; * ) func_dirname_result="${3}" ;; esac]) _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl func_basename_result="${1##*/}"]) _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl case ${1} in */*) func_dirname_result="${1%/*}${2}" ;; * ) func_dirname_result="${3}" ;; esac func_basename_result="${1##*/}"]) _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are # positional parameters, so assign one to ordinary parameter first. func_stripname_result=${3} func_stripname_result=${func_stripname_result#"${1}"} func_stripname_result=${func_stripname_result%"${2}"}]) _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl func_split_long_opt_name=${1%%=*} func_split_long_opt_arg=${1#*=}]) _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl func_split_short_opt_arg=${1#??} func_split_short_opt_name=${1%"$func_split_short_opt_arg"}]) _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl case ${1} in *.lo) func_lo2o_result=${1%.lo}.${objext} ;; *) func_lo2o_result=${1} ;; esac]) _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo]) _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))]) _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}]) fi if test x"$lt_shell_append" = xyes; then _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"]) _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl func_quote_for_eval "${2}" dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \ eval "${1}+=\\\\ \\$func_quote_for_eval_result"]) # Save a `func_append' function call where possible by direct use of '+=' sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: else # Save a `func_append' function call even when '+=' is not available sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: fi if test x"$_lt_function_replace_fail" = x":"; then AC_MSG_WARN([Unable to substitute extended shell functions in $ofile]) fi ]) # _LT_PATH_CONVERSION_FUNCTIONS # ----------------------------- # Determine which file name conversion functions should be used by # func_to_host_file (and, implicitly, by func_to_host_path). These are needed # for certain cross-compile configurations and native mingw. m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_MSG_CHECKING([how to convert $build file names to $host format]) AC_CACHE_VAL(lt_cv_to_host_file_cmd, [case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 ;; esac ;; *-*-cygwin* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_noop ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin ;; esac ;; * ) # unhandled hosts (and "normal" native builds) lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac ]) to_host_file_cmd=$lt_cv_to_host_file_cmd AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) _LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], [0], [convert $build file names to $host format])dnl AC_MSG_CHECKING([how to convert $build file names to toolchain format]) AC_CACHE_VAL(lt_cv_to_tool_file_cmd, [#assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac ]) to_tool_file_cmd=$lt_cv_to_tool_file_cmd AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) _LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], [0], [convert $build files to toolchain format])dnl ])# _LT_PATH_CONVERSION_FUNCTIONS PHYLIPNEW-3.69.650/m4/java.m40000664000175000017500000001505511732713220011775 00000000000000dnl -*- Autoconf -*- dnl @synopsis CHECK_JAVA() dnl dnl Need to specify --with-java and --with-javaos dnl @author Alan Bleasby dnl dnl This macro calls: dnl dnl AC_SUBST([JAVA_CFLAGS]) dnl AC_SUBST([JAVA_CPPFLAGS]) dnl AC_SUBST([JAVA_LDFLAGS]) dnl dnl AM_CONDITIONAL([JAVA_BUILD], ...) dnl dnl And sets: dnl dnl AC_DEFINE([HAVE_JAVA], ...) dnl dnl AC_PATH_PROG([ANT], ...) dnl AC_PATH_PROG([JAR], ...) dnl AC_PATH_PROG([JAVA], ...) dnl AC_PATH_PROG([JAVAC], ...) AC_DEFUN([CHECK_JAVA], [ JAVA_CFLAGS="" JAVA_CPPFLAGS="" JAVA_LDFLAGS="" have_java="yes" auth_java="" AC_MSG_CHECKING([for Java JNI]) AC_ARG_WITH([java], [AS_HELP_STRING([--with-java@<:@=ARG@:>@], [root directory path of Java installation])], [ AC_MSG_RESULT([${withval}]) AS_IF([test "x${withval}" = "xno"], [have_java="no"]) ], [ AC_MSG_RESULT([no]) have_java="no" ]) AS_IF([test "x${have_java}" = "xyes"], [ # If specified, the Java JNI include directory has to exist. AS_IF([test -d ${with_java}], [AS_VAR_SET([JAVA_CPPFLAGS], ["-I${withval}"])], [ have_java="no" AC_MSG_ERROR([Java include directory ${withval} does not exist]) ]) ]) AC_MSG_CHECKING([for Java JNI OS]) AC_ARG_WITH([javaos], [AS_HELP_STRING([--with-javaos@<:@=ARG@:>@], [root directory path of Java OS include])], [ AC_MSG_RESULT([${withval}]) AS_IF([test "x${withval}" != "xno"], [ # If specified, the Java JNI OS include directory has to exist. AS_IF([test "x${have_java}" = "xyes" && test -d ${withval}], [AS_VAR_APPEND([JAVA_CPPFLAGS], [" -I${withval}"])], [ have_java="no" AC_MSG_ERROR([Java OS include directory ${withval} does not exist]) ]) ]) ], [ AC_MSG_RESULT([no]) ]) # Authorisation type AC_MSG_CHECKING([for authorisation type]) AC_ARG_WITH([auth], [AS_HELP_STRING([--with-auth@<:@=ARG@:>@], [authorisation mechanism for Jemboss server @<:@default=PAM@:>@])], [ AS_IF([test "x${withval}" != "xno"], [ AC_MSG_RESULT([yes]) AS_CASE([${withval}], [yes], [ auth_java="PAM" AC_CHECK_LIB([pam], [main], [AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpam"])]) ], [pam], [ auth_java="PAM" AC_CHECK_LIB([pam], [main], [AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpam"])]) ], [shadow], [ auth_java="N_SHADOW" AC_CHECK_LIB([crypy], [main], [AS_VAR_APPEND([JAVA_LDFLAGS], [" -lcrypt"])]) ], [rshadow], [ auth_java="R_SHADOW" AC_CHECK_LIB([crypy], [main], [AS_VAR_APPEND([JAVA_LDFLAGS], [" -lcrypt"])]) ], [noshadow], [auth_java="NO_SHADOW"], [rnoshadow], [auth_java="RNO_SHADOW"], [aixshadow], [auth_java="AIX_SHADOW"], [hpuxshadow], [auth_java="HPUX_SHADOW"]) ], [AC_MSG_RESULT([no])]) ], [AC_MSG_RESULT([no])]) AS_IF([test -n "${auth_java}"], [AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D${auth_java}"])], [AS_VAR_APPEND([JAVA_CPPFLAGS], [" -DNO_AUTH"])]) # Threading type AC_MSG_CHECKING([for threading type]) AC_ARG_WITH([thread], [AS_HELP_STRING([--with-thread@<:@=ARG@:>@], [thread type @<:@default=linux@:>@])], [ AS_IF([test "x${withval}" != "xno"], [ AC_MSG_RESULT([yes]) AS_CASE([${withval}], [yes], [ AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D_REENTRANT"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) # AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [freebsd], [ AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D_THREAD_SAFE"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -pthread"]) # AS_VAR_APPEND([LIBS], [" -lc_r"]) ], [linux], [ AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D_REENTRANT"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) # AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [solaris], [ AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D_POSIX_C_SOURCE=199506L"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) # AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [macos], [ # AS_VAR_APPEND([JAVA_CPPFLAGS], [""]) # AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) # AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [hpux], [ AS_VAR_APPEND([JAVA_CFLAGS], [" -Ae +z"]) AS_VAR_APPEND([JAVA CPPFLAGS], [" -DNATIVE -D_POSIX_C_SOURCE=199506L"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) # AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [irix], [ # AS_VAR_APPEND([JAVA_CFLAGS], [""]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [aix], [ AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D_REENTRANT"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) AS_VAR_APPEND([LIBS], [" -lpthread"]) ], [osf], [ AS_VAR_APPEND([JAVA_CPPFLAGS], [" -D_REENTRANT -D_OSF_SOURCE"]) AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) AS_VAR_APPEND([LIBS], [" -lpthread"]) ]) ], [AC_MSG_RESULT([no])]) ], [AC_MSG_RESULT([no])]) # Test for programs ant, jar, java and javac. AS_IF([test "x${have_java}" = "xyes"], [ AC_PATH_PROG([ANT], [ant], [no]) AS_IF([test "x${ANT}" = "xno"], [have_java="no"]) AC_PATH_PROG([JAR], [jar], [no]) AS_IF([test "x${JAR}" = "xno"], [have_java="no"]) AC_PATH_PROG([JAVA], [java], [no]) AS_IF([test "x${JAVA}" = "xno"], [have_java="no"]) AC_PATH_PROG([JAVAC], [javac], [no]) AS_IF([test "x${JAVAC}" = "xno"], [have_java="no"]) ]) AS_IF([test "x${have_java}" = "xyes"], [ AC_DEFINE([HAVE_JAVA], [1], [Define to 1 if the Java Native Interface (JNI) is available.]) ### FIXME: Append -DDEBIAN for the moment. # Debian uses PAM service "ssh" instead of "login", see ajjava.c # This could use AC_DEFINE() if no better option was avialable. # Ultimately, this should be configurable via server configuration # files. AS_IF([test -f "/etc/debian_release" || test -f /etc/debian_version], [AS_VAR_APPEND([JAVA_CPPFLAGS], [" -DDEBIAN"])]) ]) AC_ARG_VAR([ANT], [Path to the Apache Ant make tool]) AC_ARG_VAR([JAR], [Path to the Java archive tool]) AC_ARG_VAR([JAVA], [Path to the Java application launcher]) AC_ARG_VAR([JAVAC], [Path to the Java compiler]) AC_SUBST([JAVA_CFLAGS]) AC_SUBST([JAVA_CPPFLAGS]) AC_SUBST([JAVA_LDFLAGS]) AM_CONDITIONAL([JAVA_BUILD], [test "x${have_java}" = "xyes"]) ]) PHYLIPNEW-3.69.650/m4/ltversion.m40000644000175000017500000000126212171071672013100 00000000000000# ltversion.m4 -- version numbers -*- Autoconf -*- # # Copyright (C) 2004 Free Software Foundation, Inc. # Written by Scott James Remnant, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # @configure_input@ # serial 3337 ltversion.m4 # This file is part of GNU Libtool m4_define([LT_PACKAGE_VERSION], [2.4.2]) m4_define([LT_PACKAGE_REVISION], [1.3337]) AC_DEFUN([LTVERSION_VERSION], [macro_version='2.4.2' macro_revision='1.3337' _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) _LT_DECL(, macro_revision, 0) ]) PHYLIPNEW-3.69.650/m4/general.m40000664000175000017500000000125311374607601012473 00000000000000AC_DEFUN([CHECK_GENERAL], # # Handle general setup e.g. documentation directory # [AC_MSG_CHECKING(if docroot is given) AC_ARG_WITH([docroot], [AS_HELP_STRING([--with-docroot=DIR], [root directory path of documentation (defaults to none)])], [if test "$withval" != no ; then AC_MSG_RESULT(yes) CPPFLAGS="$CPPFLAGS -DDOC_ROOT=\\\"$withval\\\"" fi], [ AC_MSG_RESULT(no) ]) ] # GCC profiling [AC_MSG_CHECKING(if gcc profiling is selected) AC_ARG_WITH([gccprofile], [AS_HELP_STRING([--with-gccprofile], [selects profiling])], [if test "$withval" != no ; then AC_MSG_RESULT(yes) CFLAGS="$CFLAGS -g -pg" LDFLAGS="$LDFLAGS -pg" fi], [ AC_MSG_RESULT(no) ]) ] ) PHYLIPNEW-3.69.650/m4/mysql.m40000664000175000017500000001266311732713347012235 00000000000000dnl -*- Autoconf -*- ##### http://autoconf-archive.cryp.to/ax_lib_mysql.html # # SYNOPSIS # # AX_LIB_MYSQL([MINIMUM-VERSION]) # # DESCRIPTION # # This macro provides tests of availability of MySQL 'libmysqlclient' # library of particular version or newer. # # AX_LIB_MYSQL macro takes only one argument which is optional. # If there is no required version passed, then macro does not run # version test. # # The --with-mysql option takes one of three possible values: # # no - do not check for MySQL client library # # yes - do check for MySQL library in standard locations # (mysql_config should be in the PATH) # # path - complete path to mysql_config utility, use this option if # mysql_config can't be found in the PATH # # This macro calls: # # AC_SUBST([MYSQL_CFLAGS]) # AC_SUBST([MYSQL_CPPFLAGS]) # AC_SUBST([MYSQL_LDFLAGS]) # AC_SUBST([MYSQL_VERSION]) # # And sets: # # HAVE_MYSQL # # LAST MODIFICATION # # 2006-07-16 # 2007-01-09 MKS: mysql_config --cflags may set gcc -fomit-frame-pointers, # which prevents gdb from displaying stack traces. # Changed mysql_config --cflags to mysql_config --include # 2009-09-23 AJB: Checking for availability of both, include files and # library files. # 2010-06-14 MKS: Added MYSQL_CPPFLAGS # 2011-08-01 MKS: Made test constructs more portable # # COPYLEFT # # Copyright (c) 2006 Mateusz Loskot # # Copying and distribution of this file, with or without # modification, are permitted in any medium without royalty provided # the copyright notice and this notice are preserved. AC_DEFUN([AX_LIB_MYSQL], [ MYSQL_CFLAGS="" MYSQL_CPPFLAGS="" MYSQL_LDFLAGS="" MYSQL_CONFIG="" MYSQL_VERSION="" AC_ARG_WITH([mysql], [AS_HELP_STRING([--with-mysql@<:@=ARG@:>@], [use MySQL client library @<:@default=yes@:>@, optionally specify path to mysql_config])], [ AS_IF([test "x${withval}" = "xno"], [want_mysql="no"], [test "x${withval}" = "xyes"], [want_mysql="yes"], [ want_mysql="yes" MYSQL_CONFIG="${withval}" ]) ], [want_mysql="yes"]) dnl dnl Check MySQL libraries (libmysqlclient) dnl AS_IF([test "x${want_mysql}" = "xyes"], [ AS_IF([test -z "${MYSQL_CONFIG}" -o test], [AC_PATH_PROG([MYSQL_CONFIG], [mysql_config], [no])]) AS_IF([test "x${MYSQL_CONFIG}" != "xno"], [ AC_MSG_CHECKING([for MySQL libraries]) MYSQL_CFLAGS="`${MYSQL_CONFIG} --cflags`" MYSQL_CPPFLAGS="`${MYSQL_CONFIG} --include`" MYSQL_LDFLAGS="`${MYSQL_CONFIG} --libs`" MYSQL_VERSION=`${MYSQL_CONFIG} --version` dnl It isn't enough to just test for mysql_config as Fedora dnl provides it in the mysql RPM even though mysql-devel may dnl not be installed EMBCPPFLAGS="${CPPFLAGS}" EMBLDFLAGS="${LDFLAGS}" CPPFLAGS="${MYSQL_CPPFLAGS} ${EMBCPPFLAGS}" LDFLAGS="${MYSQL_LDFLAGS} ${EMBLDFLAGS}" AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include #include "mysql.h"]], [[mysql_info(NULL)]])], [havemysql="yes"], [havemysql="no"]) CPPFLAGS="${EMBCPPFLAGS}" LDFLAGS="${EMBLDFLAGS}" AS_IF([test "x${havemysql}" = "xyes"], [ AC_DEFINE([HAVE_MYSQL], [1], [Define to 1 if MySQL libraries are available.]) found_mysql="yes" AC_MSG_RESULT([yes]) ], [ MYSQL_CFLAGS="" MYSQL_CPPFLAGS="" MYSQL_LDFLAGS="" found_mysql="no" AC_MSG_RESULT([no]) ]) ], [ found_mysql="no" ]) ]) dnl dnl Check if required version of MySQL is available dnl mysql_version_req=ifelse([$1], [], [], [$1]) AS_IF([test "x${found_mysql}" = "xyes" -a -n "${mysql_version_req}"], [ AC_MSG_CHECKING([if MySQL version is >= ${mysql_version_req}]) dnl Decompose required version string of MySQL dnl and calculate its number representation mysql_version_req_major=`expr ${mysql_version_req} : '\([[0-9]]*\)'` mysql_version_req_minor=`expr ${mysql_version_req} : '[[0-9]]*\.\([[0-9]]*\)'` mysql_version_req_micro=`expr ${mysql_version_req} : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` AS_IF([test "x${mysql_version_req_micro}" = "x"], [mysql_version_req_micro="0"]) mysql_version_req_number=`expr ${mysql_version_req_major} \* 1000000 \ \+ ${mysql_version_req_minor} \* 1000 \ \+ ${mysql_version_req_micro}` dnl Decompose version string of installed MySQL dnl and calculate its number representation mysql_version_major=`expr ${MYSQL_VERSION} : '\([[0-9]]*\)'` mysql_version_minor=`expr ${MYSQL_VERSION} : '[[0-9]]*\.\([[0-9]]*\)'` mysql_version_micro=`expr ${MYSQL_VERSION} : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` AS_IF([test "x${mysql_version_micro}" = "x"], [mysql_version_micro="0"]) mysql_version_number=`expr ${mysql_version_major} \* 1000000 \ \+ ${mysql_version_minor} \* 1000 \ \+ ${mysql_version_micro}` mysql_version_check=`expr ${mysql_version_number} \>\= ${mysql_version_req_number}` AS_IF([test "x${mysql_version_check}" = "x1"], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) ]) AC_SUBST([MYSQL_CFLAGS]) AC_SUBST([MYSQL_CPPFLAGS]) AC_SUBST([MYSQL_LDFLAGS]) AC_SUBST([MYSQL_VERSION]) ]) PHYLIPNEW-3.69.650/m4/hpdf.m40000664000175000017500000000353011430325237011772 00000000000000dnl @synopsis CHECK_HPDF() dnl dnl This macro searches for an installed libhpdf (libharu) library. If nothing dnl was specified when calling configure, it first searches in /usr/local dnl and then in /usr. If the --with-hpdf=DIR is specified, it will try dnl to find it in DIR/include and DIR/lib. dnl dnl It defines the symbol PLD_pdf if the library is found. dnl AC_DEFUN([CHECK_HPDF], # # Handle user hints # [AC_MSG_CHECKING([whether to look for pdf support]) AC_ARG_WITH([hpdf], [AS_HELP_STRING([--with-hpdf=DIR], [root directory path of hpdf installation @<:@defaults to /usr@:>@])], [if test "$withval" != no ; then AC_MSG_RESULT(yes) ALT_HOME="$withval" else AC_MSG_RESULT([no]) fi], [ AC_MSG_RESULT([yes]) ALT_HOME=/usr ]) # # Locate hpdf # if test -d "${ALT_HOME}" then # # Keep a copy if it fails # ALT_LDFLAGS="$LDFLAGS" ALT_CPPFLAGS="$CPPFLAGS" # # Set # LDFLAGS="${LDFLAGS} -L${ALT_HOME}/lib" CPPFLAGS="$CPPFLAGS -I$ALT_HOME/include" # # Check for libharu in ALT_HOME # AC_CHECK_LIB(hpdf, HPDF_New, CHECK=1, CHECK=0, -L${ALT_HOME}/lib) # # # If everything found okay then proceed to include png driver in config. # if test $CHECK = "1" ; then LIBS="$LIBS -lhpdf" case $host_os in solaris*) LDFLAGS="$LDFLAGS -R$ALT_HOME/lib" ;; esac AC_DEFINE([PLD_pdf], [1], [Define to 1 if PDF support is available]) AM_CONDITIONAL(AMPDF, true) echo PDF support found if test $ALT_HOME = "/usr" ; then LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" fi else # # If not okay then reset FLAGS. # AM_CONDITIONAL(AMPDF, false) LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" echo "No pdf support (libhpdf) found." fi else if test $withval != "no"; then echo "Directory $ALT_HOME does not exist" exit 0 fi fi ]) PHYLIPNEW-3.69.650/m4/lt~obsolete.m40000644000175000017500000001375612171071672013440 00000000000000# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # # Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. # Written by Scott James Remnant, 2004. # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 5 lt~obsolete.m4 # These exist entirely to fool aclocal when bootstrapping libtool. # # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) # which have later been changed to m4_define as they aren't part of the # exported API, or moved to Autoconf or Automake where they belong. # # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us # using a macro with the same name in our local m4/libtool.m4 it'll # pull the old libtool.m4 in (it doesn't see our shiny new m4_define # and doesn't know about Autoconf macros at all.) # # So we provide this file, which has a silly filename so it's always # included after everything else. This provides aclocal with the # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything # because those macros already exist, or will be overwritten later. # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. # # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. # Yes, that means every name once taken will need to remain here until # we give up compatibility with versions before 1.7, at which point # we need to keep only those names which we still refer to. # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) PHYLIPNEW-3.69.650/m4/sgi.m40000664000175000017500000000234311430325237011634 00000000000000AC_DEFUN([CHECK_SGI], # # Handle SGI compiler flags # [AC_MSG_CHECKING([for sgiabi]) AC_ARG_WITH([sgiabi], [AS_HELP_STRING([--with-sgiabi=@<:@ARG@:>@], [SGI compiler flags @<:@default=no@:>@])], [if test "$withval" != no ; then AC_MSG_RESULT([yes]) case $host_os in irix*) if test "$withval" = n32m3 ; then CFLAGS="-n32 -mips3 $CFLAGS" LD="/usr/bin/ld -n32 -mips3 -IPA -L/usr/lib32" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib32 $LDFLAGS" fi fi if test "$withval" = n32m4 ; then CFLAGS="-n32 -mips4 $CFLAGS" LD="/usr/bin/ld -n32 -mips4 -IPA -L/usr/lib32" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib32 $LDFLAGS" fi fi if test "$withval" = 64m3 ; then CFLAGS="-64 -mips3 $CFLAGS" LD="/usr/bin/ld -64 -mips3 -IPA -L/usr/lib64" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib64 $LDFLAGS" fi fi if test "$withval" = 64m4 ; then CFLAGS="-64 -mips4 $CFLAGS" LD="/usr/bin/ld -64 -mips4 -IPA -L/usr/lib64" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib64 $LDFLAGS" fi fi ;; esac fi], [ AC_MSG_RESULT([no]) ]) ] ) PHYLIPNEW-3.69.650/m4/ltoptions.m40000644000175000017500000003007312171071672013110 00000000000000# Helper functions for option handling. -*- Autoconf -*- # # Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, # Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 7 ltoptions.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) # ------------------------------------------ m4_define([_LT_MANGLE_OPTION], [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) # --------------------------------------- # Set option OPTION-NAME for macro MACRO-NAME, and if there is a # matching handler defined, dispatch to it. Other OPTION-NAMEs are # saved as a flag. m4_define([_LT_SET_OPTION], [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), _LT_MANGLE_DEFUN([$1], [$2]), [m4_warning([Unknown $1 option `$2'])])[]dnl ]) # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) # ------------------------------------------------------------ # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. m4_define([_LT_IF_OPTION], [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) # ------------------------------------------------------- # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME # are set. m4_define([_LT_UNLESS_OPTIONS], [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), [m4_define([$0_found])])])[]dnl m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 ])[]dnl ]) # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) # ---------------------------------------- # OPTION-LIST is a space-separated list of Libtool options associated # with MACRO-NAME. If any OPTION has a matching handler declared with # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about # the unknown option and exit. m4_defun([_LT_SET_OPTIONS], [# Set options m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), [_LT_SET_OPTION([$1], _LT_Option)]) m4_if([$1],[LT_INIT],[ dnl dnl Simply set some default values (i.e off) if boolean options were not dnl specified: _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no ]) _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no ]) dnl dnl If no reference was made to various pairs of opposing options, then dnl we run the default mode handler for the pair. For example, if neither dnl `shared' nor `disable-shared' was passed, we enable building of shared dnl archives by default: _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], [_LT_ENABLE_FAST_INSTALL]) ]) ])# _LT_SET_OPTIONS ## --------------------------------- ## ## Macros to handle LT_INIT options. ## ## --------------------------------- ## # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) # ----------------------------------------- m4_define([_LT_MANGLE_DEFUN], [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) # ----------------------------------------------- m4_define([LT_OPTION_DEFINE], [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl ])# LT_OPTION_DEFINE # dlopen # ------ LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes ]) AU_DEFUN([AC_LIBTOOL_DLOPEN], [_LT_SET_OPTION([LT_INIT], [dlopen]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `dlopen' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) # win32-dll # --------- # Declare package support for building win32 dll's. LT_OPTION_DEFINE([LT_INIT], [win32-dll], [enable_win32_dll=yes case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) AC_CHECK_TOOL(AS, as, false) AC_CHECK_TOOL(DLLTOOL, dlltool, false) AC_CHECK_TOOL(OBJDUMP, objdump, false) ;; esac test -z "$AS" && AS=as _LT_DECL([], [AS], [1], [Assembler program])dnl test -z "$DLLTOOL" && DLLTOOL=dlltool _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl test -z "$OBJDUMP" && OBJDUMP=objdump _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl ])# win32-dll AU_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_REQUIRE([AC_CANONICAL_HOST])dnl _LT_SET_OPTION([LT_INIT], [win32-dll]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `win32-dll' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) # _LT_ENABLE_SHARED([DEFAULT]) # ---------------------------- # implement the --enable-shared flag, and supports the `shared' and # `disable-shared' LT_INIT options. # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. m4_define([_LT_ENABLE_SHARED], [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([shared], [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) _LT_DECL([build_libtool_libs], [enable_shared], [0], [Whether or not to build shared libraries]) ])# _LT_ENABLE_SHARED LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) # Old names: AC_DEFUN([AC_ENABLE_SHARED], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) ]) AC_DEFUN([AC_DISABLE_SHARED], [_LT_SET_OPTION([LT_INIT], [disable-shared]) ]) AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_ENABLE_SHARED], []) dnl AC_DEFUN([AM_DISABLE_SHARED], []) # _LT_ENABLE_STATIC([DEFAULT]) # ---------------------------- # implement the --enable-static flag, and support the `static' and # `disable-static' LT_INIT options. # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. m4_define([_LT_ENABLE_STATIC], [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([static], [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_static=]_LT_ENABLE_STATIC_DEFAULT) _LT_DECL([build_old_libs], [enable_static], [0], [Whether or not to build static libraries]) ])# _LT_ENABLE_STATIC LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) # Old names: AC_DEFUN([AC_ENABLE_STATIC], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) ]) AC_DEFUN([AC_DISABLE_STATIC], [_LT_SET_OPTION([LT_INIT], [disable-static]) ]) AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_ENABLE_STATIC], []) dnl AC_DEFUN([AM_DISABLE_STATIC], []) # _LT_ENABLE_FAST_INSTALL([DEFAULT]) # ---------------------------------- # implement the --enable-fast-install flag, and support the `fast-install' # and `disable-fast-install' LT_INIT options. # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. m4_define([_LT_ENABLE_FAST_INSTALL], [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([fast-install], [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) _LT_DECL([fast_install], [enable_fast_install], [0], [Whether or not to optimize for fast installation])dnl ])# _LT_ENABLE_FAST_INSTALL LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) # Old names: AU_DEFUN([AC_ENABLE_FAST_INSTALL], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `fast-install' option into LT_INIT's first parameter.]) ]) AU_DEFUN([AC_DISABLE_FAST_INSTALL], [_LT_SET_OPTION([LT_INIT], [disable-fast-install]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `disable-fast-install' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) # _LT_WITH_PIC([MODE]) # -------------------- # implement the --with-pic flag, and support the `pic-only' and `no-pic' # LT_INIT options. # MODE is either `yes' or `no'. If omitted, it defaults to `both'. m4_define([_LT_WITH_PIC], [AC_ARG_WITH([pic], [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], [lt_p=${PACKAGE-default} case $withval in yes|no) pic_mode=$withval ;; *) pic_mode=default # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for lt_pkg in $withval; do IFS="$lt_save_ifs" if test "X$lt_pkg" = "X$lt_p"; then pic_mode=yes fi done IFS="$lt_save_ifs" ;; esac], [pic_mode=default]) test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl ])# _LT_WITH_PIC LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) # Old name: AU_DEFUN([AC_LIBTOOL_PICMODE], [_LT_SET_OPTION([LT_INIT], [pic-only]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `pic-only' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) ## ----------------- ## ## LTDL_INIT Options ## ## ----------------- ## m4_define([_LTDL_MODE], []) LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], [m4_define([_LTDL_MODE], [nonrecursive])]) LT_OPTION_DEFINE([LTDL_INIT], [recursive], [m4_define([_LTDL_MODE], [recursive])]) LT_OPTION_DEFINE([LTDL_INIT], [subproject], [m4_define([_LTDL_MODE], [subproject])]) m4_define([_LTDL_TYPE], []) LT_OPTION_DEFINE([LTDL_INIT], [installable], [m4_define([_LTDL_TYPE], [installable])]) LT_OPTION_DEFINE([LTDL_INIT], [convenience], [m4_define([_LTDL_TYPE], [convenience])]) PHYLIPNEW-3.69.650/test/0002775000175000017500000000000012171071713011327 500000000000000PHYLIPNEW-3.69.650/test/qatest.dat0000664000175000017500000006052311616234343013251 00000000000000####################################### # PHYLIP 3.6 ####################################### ID fclique-ex AB phylipnew AA fclique IN ../../data/evolution/clique.dat IN FI clique.fclique FC = 31 FP /^Characters: \( 1 2 3 6\)\n/ FP /^ 2 1 3 6\n/ FI clique.treefile FC = 1 FP /^\(\(\(Delta,Epsilon\),Gamma\),Alpha,Beta\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 7 FP 3 /^./ // ID fclique-all AB phylipnew AA fclique CL -ancfile ../../data/evolution/clique.ancestral CL -factorfile ../../data/evolution/clique.factors CL -weights ../../data/evolution/clique.weights IN ../../data/evolution/clique.dat IN FI clique.fclique FC = 35 FP /^ 11223 4\n/ FP /^Actual Characters: \( 1 4\)\n/ FP /^Binary Characters: \( 1 2 6\)\n/ FP 1 /Tree and binary characters:/ FI clique.treefile FC = 1 FP /^\(\(\(\(Alpha,Beta\),Gamma\),Epsilon\),Delta\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 7 FP 3 /^./ // ID fconsense-ex AB phylipnew AA fconsense IN ../../data/evolution/consense.dat IN FI consense.fconsense FC = 77 FP /^ 10\. C\n/ FP /^\.\.\*\.\*\.\.\.\.\.\s+4\.00\n/ FI consense.treefile FC = 2 FP /^E:9\.00\):9\.00,B:9\.00\):9\.00,A:9\.00\);\n/ FP /\(\(H:9\.00,J:9\.00\):4\.00,D:9\.00\)/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 7 FP 3 /^./ // ID fcontml-ex AB phylipnew AA fcontml CL -printdata IN ../../data/evolution/contml.dat IN IN FI contml.fcontml FC = 67 FP /^Ln Likelihood = 38\.7191[0-9]\n/ FP /^ 2 Chinese 0\.00208822 \( -0\.00960622, 0\.02017433\)\n/ FI contml.treefile FC = 2 FP /\(African:0\.09693444,\(Australian:0\.05247405,\(American:0\.03806240,Chinese:0\.00208822\):0\.00945315\):0\.02252816,/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 14 FP 9 /^./ // ID fcontrast-ex AB phylipnew AA fcontrast IN ../../data/evolution/contrast.dat IN ../../data/evolution/contrast.tree IN FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI contrast.fcontrast FC = 19 FP /^ 3\.9423 1\.7028\n/ FP /^ 1\.0000 0\.4319\n/ FP /^ 1\.0000 0\.6566\n/ FI stdout FC = 5 FP 2 /^./ // ID fdiscboot-ex AB phylipnew AA fdiscboot CL -seed 3 IN ../../data/evolution/discboot.dat IN IN IN IN FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 12 /^./ FI discboot.ancfile FZ = 0 FI discboot.factfile FZ = 0 FI discboot.mixfile FZ = 0 FI discboot.fdiscboot FC = 600 FP 100 /^Alpha/ FP 10 /^Alpha 111100\n/ // ID fdnacomp-ex AB phylipnew AA fdnacomp CL -ancseq -stepbox -printdata IN ../../data/evolution/dnacomp.dat IN IN IN FI dnacomp.fdnacomp FC = 62 FP /total number of compatible sites is 11\.0\n/ FP / 4 Epsilon maybe GGGATCTCGG CCC\n/ FP / 0[|] 2 1 3 2 0 2 1 1 1\n/ FP / 0 [!] YYNYYYYYY\n/ FP /One most parsimonious tree found:\n/ FI dnacomp.treefile FC = 1 FP /\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 15 FP 11 /^./ // ID fdnacomp-ex2 AB phylipnew AA fdnacomp IN ../../data/evolution/dnacomp.dat IN ../../data/evolution/dnacomptree.dat IN IN FI dnacomp.fdnacomp FC = 23 FP /^User-defined tree:\n/ FP /^total number of compatible sites is 11\.0\n/ FI dnacomp.treefile FC = 1 FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 4 FP 2 /^./ // ID fdnadist-ex AB phylipnew AA fdnadist IN ../../data/evolution/dnadist.dat IN IN FI dnadist.fdnadist FC = 6 FP / 5\n/ FP /^Alpha 0\.000000 0\.303900 0\.857544 1\.158927 1\.542899\n/ FI stderr FC = 8 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 11 FP 8 /^./ // ID fdnainvar-ex AB phylipnew AA fdnainvar CL -printdata IN ../../data/evolution/dnainvar.dat IN IN FI dnainvar.fdnainvar FC = 120 FP /^ AAAG 2\n/ FP /^ 1113 3\n/ FP /^ III: \(\(Alpha,Delta\),\(Beta,Gamma\)\)\n/ FP /^ III 0 - 0 = 0 1\.0000 no\n/ FP /^ Quadratic invariant = 4\.0\n/ FP 3 /^ Quadratic invariant =/ FP / Tree III: 5\.0\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 5 FP 2 /^./ // ID fdnaml-ex AB phylipnew AA fdnaml CL -printdata CL -ncategories 2 -categories "1111112222222" -rate "1.0 2.0" CL -gammatype h CL -nhmmcategories 5 -hmmrates "0.264 1.413 3.596 7.086 12.641" CL -hmmprobabilities "0.522 0.399 0.076 0.0036 0.000023" CL -lambda 1.5 CL -weight "0111111111110" IN ../../data/evolution/dnaml.dat IN IN FI dnaml.fdnaml FC = 91 FP /^ 1 0\.264 0\.522\n/ FP /^ 2 2\.000\n/ FP /^Ln Likelihood = -57\.87892\n/ FP /^ 1 Alpha 0\.26766 \( zero, 0\.80513\) \*\n/ FP /^ 1132121111 211\n/ FI dnaml.treefile FC = 2 FP /^\(\(\(Epsilon:0\.00006,Delta:0\.27319\):7\.59821,Beta:0\.00006\):0\.04687,\n/ FP /^Gamma:0\.95677,Alpha:0\.26766\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 53 FP 45 /^./ // ID fdnaml-ex2 AB phylipnew AA fdnaml CL -printdata CL -njumble 3 -seed 3 IN ../../data/evolution/dnaml.dat IN IN FI dnaml.fdnaml FC = 57 FP /^ A 0\.24615\n/ FP /^Ln Likelihood = -72\.25088\n/ FP /^ 1 Epsilon 0\.00006 \( zero, 0\.34299\)\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 67 FP 57 /^./ FI dnaml.treefile FC = 2 FP /^Gamma:1\.01651,Alpha:0\.20745\);\n/ // ID fdnamlk-ex AB phylipnew AA fdnamlk CL -printdata CL -ncategories 2 -categories "1111112222222" -rate "1.0 2.0" CL -gammatype h CL -nhmmcategories 5 -hmmrates "0.264 1.413 3.596 7.086 12.641" CL -hmmprobabilities "0.522 0.399 0.076 0.0036 0.000023" CL -lambda 1.5 CL -weight "0111111111110" IN ../../data/evolution/dnaml.dat IN IN CC likelihood value differs on Linux. FI dnaml.fdnamlk FC = 90 FP /^ 1 0\.264 0\.522\n/ FP /^ 2 2\.000\n/ FP /^Ln Likelihood = -57\.98242\n/ FP /^ 2 Gamma 4\.15060 0\.55971\n/ FP /^ 1132121111 211\n/ FI dnaml.treefile FC = 2 FP /^\(\(Epsilon:0\.13456,Delta:0\.13456\):4\.01604,\(Gamma:0\.55971,\n/ FP /^\(Beta:0\.15731,Alpha:0\.15731\):0\.40240\):3\.59089\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 14 FP 9 /^./ // ID fdnamove-ex AB phylipnew AA fdnamove IN ../../data/evolution/dnamove.dat IN IN Q IN Y FI dnamove.treefile FC = 1 FP /^\(Epsilon,\(Delta,\(Gamma,\(Beta,Alpha\)\)\)\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 20 FP 14 /^./ // ID fdnapars-ex AB phylipnew AA fdnapars IN ../../data/evolution/dnapars.dat IN IN FI dnapars.fdnapars FC = 30 FP /^requires a total of 19\.000\n/ FP /^ 3 Epsilon 0\.096154\n/ FI dnapars.treefile FC = 2 FP /^\(\(\(Epsilon:0\.09615,Delta:0\.13462\):0\.48718,Gamma:0\.27564\):0\.21795,\n/ FP /^Beta:0\.07692,Alpha:0\.17308\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 21 FP 15 /^./ // ID fdnapenny-ex AB phylipnew AA fdnapenny IN ../../data/evolution/dnapenny.dat IN FI dnapenny.fdnapenny FC = 207 FP /^ 9 trees in all found\n/ FP /^ 1 \+-----4 \+--Epsilon \n/ FI dnapenny.treefile FC = 9 FP /^\(Alpha1,\(\(Alpha2,\(\(Epsilon,Delta\),\(Gamma2,Gamma1\)\)\),\(Beta2,Beta1\)\)\)\[0\.1111\];\n/ FP 3 /\(Gamma2,Gamma1\)/ FP 6 /\(Epsilon,Delta\),/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 18 FP 14 /^./ // ID fdollop-ex AB phylipnew AA fdollop IN ../../data/evolution/dollop.dat IN IN FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI dollop.fdollop FC = 24 FP /^One most parsimonious tree found:\n/ FP 1 /^requires a total of 3\.000\n/ FI dollop.treefile FC = 1 FP /^\(Delta,\(Epsilon,\(Gamma,\(Beta,Alpha\)\)\)\);\n/ FI stdout FC = 19 FP 13 /^./ // ID fdolmove-ex AB phylipnew AA fdolmove IN ../../data/evolution/dolmove.dat IN IN Q IN Y FI dolmove.treefile FC = 1 FP /^\(Epsilon,\(Delta,\(Gamma,\(Beta,Alpha\)\)\)\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 24 FP 15 /^./ // ID fdolpenny-ex AB phylipnew AA fdolpenny IN ../../data/evolution/dolpenny.dat IN FI dolpenny.fdolpenny FC = 63 FP /^ 3 trees in all found\n/ FP 1 /^ \+--6 \+--5 \n/ FI dolpenny.treefile FC = 3 FP /^\(Delta,\(Epsilon,\(Gamma1,\(Alpha2,\(\(Beta2,Beta1\),Alpha1\)\)\)\)\)\[0\.3333\];\n/ FP /^\(Delta,\(Epsilon,\(Gamma1,\(\(Beta2,Beta1\),\(Alpha2,Alpha1\)\)\)\)\)\[0\.3333\];\n/ FP /^\(Delta,\(Epsilon,\(Gamma1,\(\(\(Beta2,Beta1\),Alpha2\),Alpha1\)\)\)\)\[0\.3333\];\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 13 FP 9 /^./ // ID fdrawgram-ex AB phylipnew AA fdrawgram CL -previewer n IN ../../data/evolution/drawgram.tree IN FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 12 FP 8 /^./ FI drawgram.fdrawgram FC = 76 FP /\%\!PS-Adobe-2\.0\n/ FP 5 /show\n/ // ID fdrawtree-ex AB phylipnew AA fdrawtree CL -previewer n IN ../../data/evolution/drawgram.tree IN FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 12 FP 8 /^./ FI drawgram.fdrawtree FC = 67 FP /\%\!PS-Adobe-2\.0\n/ FP 5 /show\n/ // ID ffactor-ex AB phylipnew AA ffactor CC output fails to match documentation, also reports failure IN ../../data/evolution/factor.dat IN FI factor.ffactor FC = 5 FP /^ +4 +5\n/ FP /^Alpha CA00\#\n/ FP /^Beta BB01\%\n/ FP /^Gamma AB12\#\n/ FP /^Epsilon CA01\$\n/ FI factor.factor FC = 0 FI factor.ancestor FC = 0 FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 5 FP 2 /^./ // ID ffitch-ex AB phylipnew AA ffitch IN ../../data/evolution/fitch.dat IN IN FI fitch.ffitch FC = 50 FP /^ 7 Populations\n/ FP /^Sum of squares = 0\.01375\n/ FP /^Average percent standard deviation = 1\.85418\n/ FP /^ 1 Mouse 0\.76985\n/ FP /^ 1 2 0\.41983\n/ FP /^ 2 Gibbon 0\.35537\n/ FI fitch.treefile FC = 2 FP /^\(Mouse:0\.76985,\(\(\(\(Human:0\.11449,Chimp:0\.15471\):0\.03695,\n/ FP /^Gorilla:0\.15680\):0\.02121,Orang:0\.29209\):0\.04986,Gibbon:0\.35537\):0\.41983,Bovine:0\.91675\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 15 FP 11 /^./ // ID ffreqboot-ex AB phylipnew AA ffreqboot CL -seed 3 IN ../../data/evolution/freqboot.dat IN FI freqboot.ffreqboot FC = 1700 FP 100 /^European/ FP 57 /^European 0\.28680 / FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 12 /^./ // ID fgendist-ex AB phylipnew AA fgendist IN ../../data/evolution/gendist.dat IN FI gendist.fgendist FC = 6 FP /^ 5\n/ FP /^Chinese 0\.080749 0\.234698 0\.000000 0\.053879 0\.063275\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 11 FP 8 /^./ // ID fkitsch-ex AB phylipnew AA fkitsch IN ../../data/evolution/kitsch.dat IN IN FI kitsch.fkitsch FC = 49 FP /^ 7 Populations\n/ FP /^Sum of squares = 0\.107\n/ FP /^Average percent standard deviation = 5\.16213\n/ FP /^ 6 Human 0\.13460 0\.81285\n/ FP /^ 5 6 0\.02836 0\.67825\n/ FP /^ 6 Chimp 0\.13460 0\.81285\n/ FI kitsch.treefile FC = 3 FP /^\(\(\(\(\(\(Human:0\.13460,Chimp:0\.13460\):0\.02836,Gorilla:0\.16296\):0\.07638,\n/ FP /^Orang:0\.23933\):0\.06639,Gibbon:0\.30572\):0\.42923,Mouse:0\.73495\):0\.07790,\n/ FP /^Bovine:0\.81285\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 19 FP 14 /^./ // ID fmix-ex AB phylipnew AA fmix IN ../../data/evolution/mix.dat IN IN FI mix.fmix FC = 86 FP 4 /^requires a total of 9\.000\n/ FP /^ 4 trees in all found\n/ FP 2 /^--1 \! \+--Delta +\n/ FI mix.treefile FC = 4 FP /^\(\(\(Epsilon,Gamma\),\(Delta,Beta\)\),Alpha\)\[0\.2500\];\n/ FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.2500\];\n/ FP /^\(\(Epsilon,\(Gamma,\(Delta,Beta\)\)\),Alpha\)\[0\.2500\];\n/ FP /^\(\(Gamma,\(Epsilon,\(Delta,Beta\)\)\),Alpha\)\[0\.2500\];\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 11 /^./ // ID fmix-ex2 AB phylipnew AA fmix CL -printdata -ancfile ../../data/evolution/mixancfile.dat IN ../../data/evolution/mix.dat IN IN FI mix.fmix FC = 46 FP /^ Ancestral states:\n/ FP /^ 001\?\? 1\n/ FP /^One most parsimonious tree found:\n/ FP /^requires a total of 8\.000\n/ FI mix.treefile FC = 1 FP /^\(Delta,\(Epsilon,\(Gamma,\(Beta,Alpha\)\)\)\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 11 /^./ // ID fmove-ex AB phylipnew AA fmove IN ../../data/evolution/move.dat IN IN Q IN Y FI move.treefile FC = 1 FP /^\(Epsilon,\(Delta,\(Gamma,\(Beta,Alpha\)\)\)\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 23 FP 15 /^./ // ID fneighbor-ex AB phylipnew AA fneighbor IN ../../data/evolution/neighbor.dat IN FI neighbor.fneighbor FC = 43 FP /^ 7 Populations\n/ FP /^ \! \! \+--------Gorilla \n/ FP /^ 1 Mouse 0\.76891\n/ FP /^ 1 2 0\.42027\n/ FP /^ 2 Gibbon 0\.35793\n/ FI neighbor.treefile FC = 2 FP /^\(Mouse:0\.76891,\(Gibbon:0\.35793,\(Orang:0\.28469,\(Gorilla:0\.15393,\n/ FP /^\(Chimp:0\.15168,Human:0\.11752\):0\.03982\):0\.02696\):0\.04648\):0\.42027,Bovine:0\.91769\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 15 FP 9 /^./ // ID fpars-ex AB phylipnew AA fpars IN ../../data/evolution/pars.dat IN IN FI pars.fpars FC = 30 FP /^One most parsimonious tree found:\n/ FP /^requires a total of 8\.000\n/ FP /^ 3 Epsilon 0\.00\n/ FP /^ 3 Delta 3\.00\n/ FI pars.treefile FC = 1 FP /^\(\(\(Epsilon:0\.00,Delta:3\.00\):2\.00,Gamma:0\.00\):1\.00,Beta:2\.00,Alpha:0\.00\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 21 FP 15 /^./ // ID fpenny-ex AB phylipnew AA fpenny IN ../../data/evolution/penny.dat IN FI penny.fpenny FC = 70 FP /^ 3 trees in all found\n/ FP 3 /^ remember: this is an unrooted tree\!\n/ FP /^ \! \+-----Alpha2 \n/ FI penny.treefile FC = 3 FP /^\(Alpha1,\(\(Alpha2,\(\(Epsilon,Delta\),Gamma1\)\),\(Beta2,Beta1\)\)\)\[0\.3333\];\n/ FP /^\(Alpha1,\(Alpha2,\(\(\(Epsilon,Delta\),Gamma1\),\(Beta2,Beta1\)\)\)\)\[0\.3333\];\n/ FP /^\(Alpha1,\(\(Alpha2,\(Beta2,Beta1\)\),\(\(Epsilon,Delta\),Gamma1\)\)\)\[0\.3333\];\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 12 /^./ // ID fproml-ex AB phylipnew AA fproml IN ../../data/evolution/proml.dat IN IN FI proml.fproml FC = 36 FP /^Ln Likelihood = -131\.55052\n/ FP /^ 1------2 \+------------Delta \n/ FP /^ 1 +Alpha +0\.31006 +\( +zero, +0\.66806\) \*\*\n/ FI proml.treefile FC = 2 FP /^\(Beta:0\.00010,\(\(Epsilon:0\.00010,Delta:0\.41176\):1\.00907,\n/ FP /^Gamma:0\.68569\):0\.22206,Alpha:0\.31006\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 14 FP 9 /^./ // ID fpromlk-ex AB phylipnew AA fpromlk IN ../../data/evolution/promlk.dat IN IN FI promlk.fpromlk FC = 38 FP /^Ln Likelihood = -134\.70332\n/ FP /^ root 3 \n/ FP /^ 3 4 0\.66464 0\.66464\n/ FP /^ 4 Epsilon 0.85971 0.19507\n/ FI promlk.treefile FC = 2 FP /^\(\(Epsilon:0\.19507,Delta:0\.19507\):0\.66464,\(Gamma:0\.48551,\n/ FP /^\(Beta:0\.15763,Alpha:0\.15763\):0\.32788\):0\.37420\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 14 FP 9 /^./ // ID fprotdist-ex AB phylipnew AA fprotdist IN ../../data/evolution/protdist.dat IN FI protdist.fprotdist FC = 6 FP /^Gamma 0.628142 0.377406 0.000000 0.979550 0.866781\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 12 FP 8 /^./ // ID fprotpars-ex AB phylipnew AA fprotpars IN ../../data/evolution/protpars.dat IN IN FI protpars.fprotpars FC = 61 FP /^ 3 trees in all found\n/ FP 1 /^ \! \+--Beta \n/ FP 2 /^ \! +\+-+Beta +\n/ FI protpars.treefile FC = 3 FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.3333\];\n/ FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\)\[0\.3333\];\n/ FP /^\(\(\(Epsilon,Delta\),\(Gamma,Beta\)\),Alpha\)\[0\.3333\];\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 19 FP 13 /^./ // ID fprotpars-ex2 AB phylipnew AA fprotpars CL -njumble 3 -seed 3 CL -printdata CL -ancseq CL -whichcode m CL -stepbox CL -outgrno 2 CL -dothreshold -threshold 3 IN ../../data/evolution/protpars.dat IN IN FI protpars.fprotpars FC = 137 FP /^ 3 trees in all found\n/ FP 3 /^requires a total of 14\.000\n/ FI protpars.treefile FC = 3 FP /^\(Beta,\(Gamma,\(\(Epsilon,Delta\),Alpha\)\)\)\[0\.3333\];\n/ FP /^\(Beta,\(\(\(Epsilon,Delta\),Gamma\),Alpha\)\)\[0\.3333\];\n/ FP /^\(Beta,\(\(Epsilon,Delta\),\(Gamma,Alpha\)\)\)\[0\.3333\];\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 43 FP 31 /^./ // ID fprotpars-ex3 AB phylipnew AA fprotpars CL -njumble 3 -seed 3 IN ../../data/evolution/protpars2.dat IN IN FI protpars2.fprotpars FC = 435 FP /^Data set # 1:\n/ FP /^ 3 trees in all found\n/ FP 3 /^requires a total of 25\.000\n/ FP /^Data set # 2:\n/ FP /^ 15 trees in all found\n/ FP 15 /^requires a total of 14\.000\n/ FP /^Data set # 3:\n/ FP /^ 5 trees in all found\n/ FP 5 /^requires a total of 24\.000\n/ FI protpars2.treefile FC = 23 FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.3333\];\n/ FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\)\[0\.3333\];\n/ FP /^\(\(\(Epsilon,Delta\),\(Gamma,Beta\)\),Alpha\)\[0\.3333\];\n/ FP /^\(\(Gamma,\(Delta,\(Epsilon,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Epsilon,Gamma\),\(Delta,Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Epsilon,\(Gamma,\(Delta,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Gamma,\(Epsilon,\(Delta,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Delta,Gamma\),\(Epsilon,Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Delta,\(Epsilon,Gamma\)\),Beta\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Epsilon,\(\(Delta,Gamma\),Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Epsilon,\(Delta,Gamma\)\),Beta\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Delta,\(Gamma,\(Epsilon,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Delta,\(\(Epsilon,Gamma\),Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Epsilon,Delta\),\(Gamma,Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Delta,\(Epsilon,\(Gamma,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Epsilon,\(Delta,\(Gamma,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Gamma,\(Delta,\(Epsilon,Beta\)\)\),Alpha\)\[0\.2000\];\n/ FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.2000\];\n/ FP /^\(\(Gamma,\(Epsilon,\(Delta,Beta\)\)\),Alpha\)\[0\.2000\];\n/ FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\)\[0\.2000\];\n/ FP /^\(\(\(Epsilon,Delta\),\(Gamma,Beta\)\),Alpha\)\[0\.2000\];\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 129 FP 94 /^./ // ID fprotpars-ex4 AB phylipnew AA fprotpars CL -option IN ../../data/evolution/protpars.dat IN IN ../../data/evolution/protparswts.dat IN IN IN IN IN IN IN IN IN IN IN IN FI protpars.fprotpars FC = 342 FP /^Weights set # 1:\n/ FP /^ 3 trees in all found\n/ FP 3 /^requires a total of 14\.000\n/ FP /^Weights set # 2:\n/ FP /^ 15 trees in all found\n/ FP 15 /^requires a total of 2\.000\n/ FI protpars.treefile FC = 18 FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.3333\];\n/ FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\)\[0\.3333\];\n/ FP /^\(\(\(Epsilon,Delta\),\(Gamma,Beta\)\),Alpha\)\[0\.3333\];\n/ FP /^\(\(Gamma,\(Delta,\(Epsilon,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Epsilon,Gamma\),\(Delta,Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Gamma,\(\(Epsilon,Delta\),Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Epsilon,\(Gamma,\(Delta,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Gamma,\(Epsilon,\(Delta,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Delta,Gamma\),\(Epsilon,Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Delta,\(Epsilon,Gamma\)\),Beta\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(\(Epsilon,Delta\),Gamma\),Beta\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Epsilon,\(\(Delta,Gamma\),Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Epsilon,\(Delta,Gamma\)\),Beta\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Delta,\(Gamma,\(Epsilon,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Delta,\(\(Epsilon,Gamma\),Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(\(Epsilon,Delta\),\(Gamma,Beta\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Delta,\(Epsilon,\(Gamma,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FP /^\(\(Epsilon,\(Delta,\(Gamma,Beta\)\)\),Alpha\)\[0\.0667\];\n/ FI stderr FC = 8 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 41 FP 27 /^./ // ID frestboot-ex AB phylipnew AA frestboot CL -seed 3 IN ../../data/evolution/restboot.dat IN FI restboot.frestboot FC = 600 FP 100 /^Gamma/ FP 3 /^Gamma \-\+\-\-\-\+\+\+\+\+\ \+\+\+/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 12 /^./ // ID frestdist-ex AB phylipnew AA frestdist IN ../../data/evolution/restdist.dat IN FI restdist.frestdist FC = 6 ## This is what we get from a fixed frestdist ## phylip-3.6b restdist has the same valgrind error ## Need to check current release to see if it is fixed FP /^Gamma 0\.107681 0\.107681 0\.000000 0\.192466 0\.207319\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 14 FP 9 /^./ // ID frestml-ex AB phylipnew AA frestml IN ../../data/evolution/restml.dat IN IN FI restml.frestml FC = 41 FP /^Ln Likelihood = -40\.47082\n/ FP /^ 1 Gamma 0\.10794 \( 0\.01144, 0\.21872\) \*\*\n/ FI restml.treefile FC = 2 FP /^\(Gamma:0\.10794,\(Beta:0\.00100,\(Epsilon:0\.00022,\n/ FP /^Delta:0\.01451\):0\.05878\):0\.01244,Alpha:0\.01244\);\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 15 FP 10 /^./ // ID fretree-ex AB phylipnew AA fretree CC First prompt only needed until it can read CC the number of species from the input tree IN 10 IN ../../data/evolution/retree.dat IN IN Q IN Y IN U FI retree.treefile FC = 2 FP /^\(\(\(\(\(\(\(Human,Chimp\),Gorilla\),Orang\),Gibbon\),\(Barbary_Ma,\(Crab-e\._Ma,\n/ FP /^\(Rhesus_Mac,Jpn_Macaq\)\)\)\),Squir\._Mon\),\(\(Tarsier,Lemur\),Bovine\),Mouse\);\n/ FI stderr FC = 4 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 27 FP 23 /^./ // ID fseqboot-ex AB phylipnew AA fseqboot CL -seed 3 IN ../../data/evolution/seqboot.dat IN FI seqboot.fseqboot FC = 600 FP 7 /^Alpha AAAAAA\n/ FP 14 /^Beta AAACCC\n/ FP 21 /^\S+ +AAACCC\n/ FP 100 /^Alpha/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 16 FP 12 /^./ // ID fseqbootall-ex AB phylipnew AA fseqbootall CL -seed 3 IN ../../data/evolution/seqboot.dat IN FI seqboot.fseqbootall FC = 600 FP 7 /^Alpha AAAAAA\n/ FP 14 /^Beta AAACCC\n/ FP 21 /^\S+ +AAACCC\n/ FP 100 /^Alpha/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 23 FP 18 /^./ // ID ftreedist-ex AB phylipnew AA ftreedist IN ../../data/evolution/treedist.dat IN FI treedist.ftreedist FC = 11 FP /Trees 3 and 4: 3\.162278e\-01/ FP 6 /Trees \d+ and \d+:\s+\d+/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 5 FP 2 /^./ // ID ftreedist-ex2 AB phylipnew AA ftreedist CL -dtype s IN ../../data/evolution/treedist2.dat IN FI treedist2.ftreedist FC = 11 FP /Trees 1 and 2: 4/ FP 6 /Trees \d+ and \d+:\s+\d+/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 5 FP 2 /^./ // ID ftreedist-sparse AB phylipnew AA ftreedist CL -style s IN ../../data/evolution/treedist.dat IN IN FI treedist.ftreedist FC = 6 FP /3 4 3\.162278e\-01/ FP 6 /\d+\s+\d+\s+\d+/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 5 FP 2 /^./ // ID ftreedist-listall AB phylipnew AA ftreedist CL -dtype s IN ../../data/evolution/treedist.dat IN FI treedist.ftreedist FC = 11 FP /Trees 11 and 12: 10\n/ FP 6 /Trees \d+ and \d+:\s+\d+/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 5 FP 2 /^./ // ID ftreedistpair-ex AB phylipnew AA ftreedistpair CL -style s IN ../../data/evolution/treedist.dat IN ../../data/evolution/treedist.dat IN FI treedist.ftreedistpair FC = 288 FP /^1 14 2\.000000e-01\n/ FP /^2 13 2\.000000e-01\n/ FI stderr FC = 2 FP 0 /Warning: / FP 0 /Error: / FP 0 /Died: / FI stdout FC = 2 FP 1 /^./ // PHYLIPNEW-3.69.650/aclocal.m40000664000175000017500000010504512171071674012141 00000000000000# generated automatically by aclocal 1.12.2 -*- Autoconf -*- # Copyright (C) 1996-2012 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, [m4_warning([this file was generated for autoconf 2.69. You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) # Copyright (C) 2002-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 8 # AM_AUTOMAKE_VERSION(VERSION) # ---------------------------- # Automake X.Y traces this macro to ensure aclocal.m4 has been # generated from the m4 files accompanying Automake X.Y. # (This private macro should not be called outside this file.) AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.12' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. m4_if([$1], [1.12.2], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) # _AM_AUTOCONF_VERSION(VERSION) # ----------------------------- # aclocal traces this macro to find the Autoconf version. # This is a private macro too. Using m4_define simplifies # the logic in aclocal, which can simply ignore this definition. m4_define([_AM_AUTOCONF_VERSION], []) # AM_SET_CURRENT_AUTOMAKE_VERSION # ------------------------------- # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], [AM_AUTOMAKE_VERSION([1.12.2])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) # AM_AUX_DIR_EXPAND -*- Autoconf -*- # Copyright (C) 2001-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets # $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to # '$srcdir', '$srcdir/..', or '$srcdir/../..'. # # Of course, Automake must honor this variable whenever it calls a # tool from the auxiliary directory. The problem is that $srcdir (and # therefore $ac_aux_dir as well) can be either absolute or relative, # depending on how configure is run. This is pretty annoying, since # it makes $ac_aux_dir quite unusable in subdirectories: in the top # source directory, any form will work fine, but in subdirectories a # relative path needs to be adjusted first. # # $ac_aux_dir/missing # fails when called from a subdirectory if $ac_aux_dir is relative # $top_srcdir/$ac_aux_dir/missing # fails if $ac_aux_dir is absolute, # fails when called from a subdirectory in a VPATH build with # a relative $ac_aux_dir # # The reason of the latter failure is that $top_srcdir and $ac_aux_dir # are both prefixed by $srcdir. In an in-source build this is usually # harmless because $srcdir is '.', but things will broke when you # start a VPATH build or use an absolute $srcdir. # # So we could use something similar to $top_srcdir/$ac_aux_dir/missing, # iff we strip the leading $srcdir from $ac_aux_dir. That would be: # am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` # and then we would define $MISSING as # MISSING="\${SHELL} $am_aux_dir/missing" # This will work as long as MISSING is not called from configure, because # unfortunately $(top_srcdir) has no meaning in configure. # However there are other variables, like CC, which are often used in # configure, and could therefore not use this "fixed" $ac_aux_dir. # # Another solution, used here, is to always expand $ac_aux_dir to an # absolute PATH. The drawback is that using absolute paths prevent a # configured tree to be moved without reconfiguration. AC_DEFUN([AM_AUX_DIR_EXPAND], [dnl Rely on autoconf to set up CDPATH properly. AC_PREREQ([2.50])dnl # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` ]) # AM_CONDITIONAL -*- Autoconf -*- # Copyright (C) 1997-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 10 # AM_CONDITIONAL(NAME, SHELL-CONDITION) # ------------------------------------- # Define a conditional. AC_DEFUN([AM_CONDITIONAL], [AC_PREREQ([2.52])dnl m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl AC_SUBST([$1_TRUE])dnl AC_SUBST([$1_FALSE])dnl _AM_SUBST_NOTMAKE([$1_TRUE])dnl _AM_SUBST_NOTMAKE([$1_FALSE])dnl m4_define([_AM_COND_VALUE_$1], [$2])dnl if $2; then $1_TRUE= $1_FALSE='#' else $1_TRUE='#' $1_FALSE= fi AC_CONFIG_COMMANDS_PRE( [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then AC_MSG_ERROR([[conditional "$1" was never defined. Usually this means the macro was only invoked conditionally.]]) fi])]) # Copyright (C) 1999-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 17 # There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be # written in clear, in which case automake, when reading aclocal.m4, # will think it sees a *use*, and therefore will trigger all it's # C support machinery. Also note that it means that autoscan, seeing # CC etc. in the Makefile, will ask for an AC_PROG_CC use... # _AM_DEPENDENCIES(NAME) # ---------------------- # See how the compiler implements dependency checking. # NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". # We try a few techniques and use that to set a single cache variable. # # We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was # modified to invoke _AM_DEPENDENCIES(CC); we would have a circular # dependency, and given that the user is not expected to run this macro, # just rely on AC_PROG_CC. AC_DEFUN([_AM_DEPENDENCIES], [AC_REQUIRE([AM_SET_DEPDIR])dnl AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl AC_REQUIRE([AM_MAKE_INCLUDE])dnl AC_REQUIRE([AM_DEP_TRACK])dnl m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], [$1], [CXX], [depcc="$CXX" am_compiler_list=], [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], [$1], [UPC], [depcc="$UPC" am_compiler_list=], [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], [depcc="$$1" am_compiler_list=]) AC_CACHE_CHECK([dependency style of $depcc], [am_cv_$1_dependencies_compiler_type], [if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_$1_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` fi am__universal=false m4_case([$1], [CC], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac], [CXX], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac]) for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_$1_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_$1_dependencies_compiler_type=none fi ]) AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) AM_CONDITIONAL([am__fastdep$1], [ test "x$enable_dependency_tracking" != xno \ && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) ]) # AM_SET_DEPDIR # ------------- # Choose a directory name for dependency files. # This macro is AC_REQUIREd in _AM_DEPENDENCIES. AC_DEFUN([AM_SET_DEPDIR], [AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl ]) # AM_DEP_TRACK # ------------ AC_DEFUN([AM_DEP_TRACK], [AC_ARG_ENABLE([dependency-tracking], [dnl AS_HELP_STRING( [--enable-dependency-tracking], [do not reject slow dependency extractors]) AS_HELP_STRING( [--disable-dependency-tracking], [speeds up one-time build])]) if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) AC_SUBST([AMDEPBACKSLASH])dnl _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl AC_SUBST([am__nodep])dnl _AM_SUBST_NOTMAKE([am__nodep])dnl ]) # Generate code to set up dependency tracking. -*- Autoconf -*- # Copyright (C) 1999-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 6 # _AM_OUTPUT_DEPENDENCY_COMMANDS # ------------------------------ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], [{ # Autoconf 2.62 quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. case $CONFIG_FILES in *\'*) eval set x "$CONFIG_FILES" ;; *) set x $CONFIG_FILES ;; esac shift for mf do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named 'Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then dirpart=`AS_DIRNAME("$mf")` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running 'make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`AS_DIRNAME(["$file"])` AS_MKDIR_P([$dirpart/$fdir]) # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done } ])# _AM_OUTPUT_DEPENDENCY_COMMANDS # AM_OUTPUT_DEPENDENCY_COMMANDS # ----------------------------- # This macro should only be invoked once -- use via AC_REQUIRE. # # This code is only required when automatic dependency tracking # is enabled. FIXME. This creates each '.P' file that we will # need in order to bootstrap the dependency handling code. AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], [AC_CONFIG_COMMANDS([depfiles], [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) ]) # Do all the work for Automake. -*- Autoconf -*- # Copyright (C) 1996-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 19 # This macro actually does too much. Some checks are only needed if # your package does certain things. But this isn't really a big deal. # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) # AM_INIT_AUTOMAKE([OPTIONS]) # ----------------------------------------------- # The call with PACKAGE and VERSION arguments is the old style # call (pre autoconf-2.50), which is being phased out. PACKAGE # and VERSION should now be passed to AC_INIT and removed from # the call to AM_INIT_AUTOMAKE. # We support both call styles for the transition. After # the next Automake release, Autoconf can make the AC_INIT # arguments mandatory, and then we can depend on a new Autoconf # release and drop the old call support. AC_DEFUN([AM_INIT_AUTOMAKE], [AC_PREREQ([2.62])dnl dnl Autoconf wants to disallow AM_ names. We explicitly allow dnl the ones we care about. m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl AC_REQUIRE([AC_PROG_INSTALL])dnl if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl # test to see if srcdir already configured if test -f $srcdir/config.status; then AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi AC_SUBST([CYGPATH_W]) # Define the identity of the package. dnl Distinguish between old-style and new-style calls. m4_ifval([$2], [AC_DIAGNOSE([obsolete], [$0: two- and three-arguments forms are deprecated. For more info, see: http://www.gnu.org/software/automake/manual/automake.html#Modernize-AM_INIT_AUTOMAKE-invocation]) m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl AC_SUBST([PACKAGE], [$1])dnl AC_SUBST([VERSION], [$2])], [_AM_SET_OPTIONS([$1])dnl dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. m4_if( m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), [ok:ok],, [m4_fatal([AC_INIT should be called with package and version arguments])])dnl AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl _AM_IF_OPTION([no-define],, [AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl # Some tools Automake needs. AC_REQUIRE([AM_SANITY_CHECK])dnl AC_REQUIRE([AC_ARG_PROGRAM])dnl AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) AM_MISSING_PROG([AUTOCONF], [autoconf]) AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) AM_MISSING_PROG([AUTOHEADER], [autoheader]) AM_MISSING_PROG([MAKEINFO], [makeinfo]) AC_REQUIRE([AM_PROG_INSTALL_SH])dnl AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl AC_REQUIRE([AC_PROG_MKDIR_P])dnl # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: # # AC_SUBST([mkdir_p], ['$(MKDIR_P)']) # We need awk for the "check" target. The system "awk" is bad on # some platforms. AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl _AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], [_AM_PROG_TAR([v7])])]) _AM_IF_OPTION([no-dependencies],, [AC_PROVIDE_IFELSE([AC_PROG_CC], [_AM_DEPENDENCIES([CC])], [m4_define([AC_PROG_CC], m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl AC_PROVIDE_IFELSE([AC_PROG_CXX], [_AM_DEPENDENCIES([CXX])], [m4_define([AC_PROG_CXX], m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJC], [_AM_DEPENDENCIES([OBJC])], [m4_define([AC_PROG_OBJC], m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl dnl Support for Objective C++ was only introduced in Autoconf 2.65, dnl but we still cater to Autoconf 2.62. m4_ifdef([AC_PROG_OBJCXX], [AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], [_AM_DEPENDENCIES([OBJCXX])], [m4_define([AC_PROG_OBJCXX], m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])])dnl ]) _AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl dnl The 'parallel-tests' driver may need to know about EXEEXT, so add the dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro dnl is hooked onto _AC_COMPILER_EXEEXT early, see below. AC_CONFIG_COMMANDS_PRE(dnl [m4_provide_if([_AM_COMPILER_EXEEXT], [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl ]) dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further dnl mangled by Autoconf and run in a shell conditional statement. m4_define([_AC_COMPILER_EXEEXT], m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) # When config.status generates a header, we must update the stamp-h file. # This file resides in the same directory as the config header # that is generated. The stamp files are numbered to have different names. # Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the # loop where config.status creates the headers, so we can generate # our stamp files there. AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], [# Compute $1's index in $config_headers. _am_arg=$1 _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) # Copyright (C) 2001-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 8 # AM_PROG_INSTALL_SH # ------------------ # Define $install_sh. AC_DEFUN([AM_PROG_INSTALL_SH], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi AC_SUBST([install_sh])]) # Copyright (C) 2003-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # Check whether the underlying file-system supports filenames # with a leading dot. For instance MS-DOS doesn't. AC_DEFUN([AM_SET_LEADING_DOT], [rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 5 # AM_MAKE_INCLUDE() # ----------------- # Check to see how make treats includes. AC_DEFUN([AM_MAKE_INCLUDE], [am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. AC_MSG_CHECKING([for style of include used by $am_make]) am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # Ignore all kinds of additional output from 'make'. case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=include am__quote= _am_result=GNU ;; esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=.include am__quote="\"" _am_result=BSD ;; esac fi AC_SUBST([am__include]) AC_SUBST([am__quote]) AC_MSG_RESULT([$_am_result]) rm -f confinc confmf ]) # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Copyright (C) 1997-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 7 # AM_MISSING_PROG(NAME, PROGRAM) # ------------------------------ AC_DEFUN([AM_MISSING_PROG], [AC_REQUIRE([AM_MISSING_HAS_RUN]) $1=${$1-"${am_missing_run}$2"} AC_SUBST($1)]) # AM_MISSING_HAS_RUN # ------------------ # Define MISSING if not defined so far and test if it supports --run. # If it does, set am_missing_run to use it, otherwise, to nothing. AC_DEFUN([AM_MISSING_HAS_RUN], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([missing])dnl if test x"${MISSING+set}" != xset; then case $am_aux_dir in *\ * | *\ *) MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; *) MISSING="\${SHELL} $am_aux_dir/missing" ;; esac fi # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " else am_missing_run= AC_MSG_WARN(['missing' script is too old or missing]) fi ]) # Helper functions for option handling. -*- Autoconf -*- # Copyright (C) 2001-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 6 # _AM_MANGLE_OPTION(NAME) # ----------------------- AC_DEFUN([_AM_MANGLE_OPTION], [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) # _AM_SET_OPTION(NAME) # -------------------- # Set option NAME. Presently that only means defining a flag for this option. AC_DEFUN([_AM_SET_OPTION], [m4_define(_AM_MANGLE_OPTION([$1]), [1])]) # _AM_SET_OPTIONS(OPTIONS) # ------------------------ # OPTIONS is a space-separated list of Automake options. AC_DEFUN([_AM_SET_OPTIONS], [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) # _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) # ------------------------------------------- # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. AC_DEFUN([_AM_IF_OPTION], [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) # Check to make sure that the build environment is sane. -*- Autoconf -*- # Copyright (C) 1996-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 9 # AM_SANITY_CHECK # --------------- AC_DEFUN([AM_SANITY_CHECK], [AC_MSG_CHECKING([whether build environment is sane]) # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[[\\\"\#\$\&\'\`$am_lf]]*) AC_MSG_ERROR([unsafe absolute working directory name]);; esac case $srcdir in *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; esac # Do 'set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( am_has_slept=no for am_try in 1 2; do echo "timestamp, slept: $am_has_slept" > conftest.file set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$[*]" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi if test "$[*]" != "X $srcdir/configure conftest.file" \ && test "$[*]" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken alias in your environment]) fi if test "$[2]" = conftest.file || test $am_try -eq 2; then break fi # Just in case. sleep 1 am_has_slept=yes done test "$[2]" = conftest.file ) then # Ok. : else AC_MSG_ERROR([newly created file is older than distributed files! Check your system clock]) fi AC_MSG_RESULT([yes]) # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= if grep 'slept: no' conftest.file >/dev/null 2>&1; then ( sleep 1 ) & am_sleep_pid=$! fi AC_CONFIG_COMMANDS_PRE( [AC_MSG_CHECKING([that generated files are newer than configure]) if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi AC_MSG_RESULT([done])]) rm -f conftest.file ]) # Copyright (C) 2001-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # AM_PROG_INSTALL_STRIP # --------------------- # One issue with vendor 'install' (even GNU) is that you can't # specify the program used to strip binaries. This is especially # annoying in cross-compiling environments, where the build's strip # is unlikely to handle the host's binaries. # Fortunately install-sh will honor a STRIPPROG variable, so we # always use install-sh in "make install-strip", and initialize # STRIPPROG with the value of the STRIP variable (set by the user). AC_DEFUN([AM_PROG_INSTALL_STRIP], [AC_REQUIRE([AM_PROG_INSTALL_SH])dnl # Installed binaries are usually stripped using 'strip' when the user # run "make install-strip". However 'strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the 'STRIP' environment variable to overrule this program. dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. if test "$cross_compiling" != no; then AC_CHECK_TOOL([STRIP], [strip], :) fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" AC_SUBST([INSTALL_STRIP_PROGRAM])]) # Copyright (C) 2006-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 3 # _AM_SUBST_NOTMAKE(VARIABLE) # --------------------------- # Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. # This macro is traced by Automake. AC_DEFUN([_AM_SUBST_NOTMAKE]) # AM_SUBST_NOTMAKE(VARIABLE) # -------------------------- # Public sister of _AM_SUBST_NOTMAKE. AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) # Check how to create a tarball. -*- Autoconf -*- # Copyright (C) 2004-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 3 # _AM_PROG_TAR(FORMAT) # -------------------- # Check how to create a tarball in format FORMAT. # FORMAT should be one of 'v7', 'ustar', or 'pax'. # # Substitute a variable $(am__tar) that is a command # writing to stdout a FORMAT-tarball containing the directory # $tardir. # tardir=directory && $(am__tar) > result.tar # # Substitute a variable $(am__untar) that extract such # a tarball read from stdin. # $(am__untar) < result.tar AC_DEFUN([_AM_PROG_TAR], [# Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AC_SUBST([AMTAR], ['$${TAR-tar}']) m4_if([$1], [v7], [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], [m4_case([$1], [ustar],, [pax],, [m4_fatal([Unknown tar format])]) AC_MSG_CHECKING([how to create a $1 tar archive]) # Loop over all known methods to create a tar archive until one works. _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' _am_tools=${am_cv_prog_tar_$1-$_am_tools} # Do not fold the above two line into one, because Tru64 sh and # Solaris sh will not grok spaces in the rhs of '-'. for _am_tool in $_am_tools do case $_am_tool in gnutar) for _am_tar in tar gnutar gtar; do AM_RUN_LOG([$_am_tar --version]) && break done am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' am__untar="$_am_tar -xf -" ;; plaintar) # Must skip GNU tar: if it does not support --format= it doesn't create # ustar tarball either. (tar --version) >/dev/null 2>&1 && continue am__tar='tar chf - "$$tardir"' am__tar_='tar chf - "$tardir"' am__untar='tar xf -' ;; pax) am__tar='pax -L -x $1 -w "$$tardir"' am__tar_='pax -L -x $1 -w "$tardir"' am__untar='pax -r' ;; cpio) am__tar='find "$$tardir" -print | cpio -o -H $1 -L' am__tar_='find "$tardir" -print | cpio -o -H $1 -L' am__untar='cpio -i -H $1 -d' ;; none) am__tar=false am__tar_=false am__untar=false ;; esac # If the value was cached, stop now. We just wanted to have am__tar # and am__untar set. test -n "${am_cv_prog_tar_$1}" && break # tar/untar a dummy directory, and stop if the command works rm -rf conftest.dir mkdir conftest.dir echo GrepMe > conftest.dir/file AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) rm -rf conftest.dir if test -s conftest.tar; then AM_RUN_LOG([$am__untar /dev/null 2>&1 && break fi done rm -rf conftest.dir AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) AC_MSG_RESULT([$am_cv_prog_tar_$1])]) AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR m4_include([m4/general.m4]) m4_include([m4/hpdf.m4]) m4_include([m4/java.m4]) m4_include([m4/lf_x11.m4]) m4_include([m4/libtool.m4]) m4_include([m4/ltoptions.m4]) m4_include([m4/ltsugar.m4]) m4_include([m4/ltversion.m4]) m4_include([m4/lt~obsolete.m4]) m4_include([m4/mysql.m4]) m4_include([m4/pngdriver.m4]) m4_include([m4/postgresql.m4]) m4_include([m4/sgi.m4]) PHYLIPNEW-3.69.650/missing0000755000175000017500000002370312171071677011701 00000000000000#! /bin/sh # Common stub for a few missing GNU programs while installing. scriptversion=2012-01-06.18; # UTC # Copyright (C) 1996-2012 Free Software Foundation, Inc. # Originally by Fran,cois Pinard , 1996. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. if test $# -eq 0; then echo 1>&2 "Try '$0 --help' for more information" exit 1 fi run=: sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' sed_minuso='s/.* -o \([^ ]*\).*/\1/p' # In the cases where this matters, 'missing' is being run in the # srcdir already. if test -f configure.ac; then configure_ac=configure.ac else configure_ac=configure.in fi msg="missing on your system" case $1 in --run) # Try to run requested program, and just exit if it succeeds. run= shift "$@" && exit 0 # Exit code 63 means version mismatch. This often happens # when the user try to use an ancient version of a tool on # a file that requires a minimum version. In this case we # we should proceed has if the program had been absent, or # if --run hadn't been passed. if test $? = 63; then run=: msg="probably too old" fi ;; -h|--h|--he|--hel|--help) echo "\ $0 [OPTION]... PROGRAM [ARGUMENT]... Handle 'PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an error status if there is no known handling for PROGRAM. Options: -h, --help display this help and exit -v, --version output version information and exit --run try to run the given command, and emulate it if it fails Supported PROGRAM values: aclocal touch file 'aclocal.m4' autoconf touch file 'configure' autoheader touch file 'config.h.in' autom4te touch the output file, or create a stub one automake touch all 'Makefile.in' files bison create 'y.tab.[ch]', if possible, from existing .[ch] flex create 'lex.yy.c', if possible, from existing .c help2man touch the output file lex create 'lex.yy.c', if possible, from existing .c makeinfo touch the output file yacc create 'y.tab.[ch]', if possible, from existing .[ch] Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and 'g' are ignored when checking the name. Send bug reports to ." exit $? ;; -v|--v|--ve|--ver|--vers|--versi|--versio|--version) echo "missing $scriptversion (GNU Automake)" exit $? ;; -*) echo 1>&2 "$0: Unknown '$1' option" echo 1>&2 "Try '$0 --help' for more information" exit 1 ;; esac # normalize program name to check for. program=`echo "$1" | sed ' s/^gnu-//; t s/^gnu//; t s/^g//; t'` # Now exit if we have it, but it failed. Also exit now if we # don't have it and --version was passed (most likely to detect # the program). This is about non-GNU programs, so use $1 not # $program. case $1 in lex*|yacc*) # Not GNU programs, they don't have --version. ;; *) if test -z "$run" && ($1 --version) > /dev/null 2>&1; then # We have it, but it failed. exit 1 elif test "x$2" = "x--version" || test "x$2" = "x--help"; then # Could not run --version or --help. This is probably someone # running '$TOOL --version' or '$TOOL --help' to check whether # $TOOL exists and not knowing $TOOL uses missing. exit 1 fi ;; esac # If it does not exist, or fails to run (possibly an outdated version), # try to emulate it. case $program in aclocal*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified 'acinclude.m4' or '${configure_ac}'. You might want to install the Automake and Perl packages. Grab them from any GNU archive site." touch aclocal.m4 ;; autoconf*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified '${configure_ac}'. You might want to install the Autoconf and GNU m4 packages. Grab them from any GNU archive site." touch configure ;; autoheader*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified 'acconfig.h' or '${configure_ac}'. You might want to install the Autoconf and GNU m4 packages. Grab them from any GNU archive site." files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` test -z "$files" && files="config.h" touch_files= for f in $files; do case $f in *:*) touch_files="$touch_files "`echo "$f" | sed -e 's/^[^:]*://' -e 's/:.*//'`;; *) touch_files="$touch_files $f.in";; esac done touch $touch_files ;; automake*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified 'Makefile.am', 'acinclude.m4' or '${configure_ac}'. You might want to install the Automake and Perl packages. Grab them from any GNU archive site." find . -type f -name Makefile.am -print | sed 's/\.am$/.in/' | while read f; do touch "$f"; done ;; autom4te*) echo 1>&2 "\ WARNING: '$1' is needed, but is $msg. You might have modified some files without having the proper tools for further handling them. You can get '$1' as part of Autoconf from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo "#! /bin/sh" echo "# Created by GNU Automake missing as a replacement of" echo "# $ $@" echo "exit 0" chmod +x $file exit 1 fi ;; bison*|yacc*) echo 1>&2 "\ WARNING: '$1' $msg. You should only need it if you modified a '.y' file. You may need the Bison package in order for those modifications to take effect. You can get Bison from any GNU archive site." rm -f y.tab.c y.tab.h if test $# -ne 1; then eval LASTARG=\${$#} case $LASTARG in *.y) SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.c fi SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.h fi ;; esac fi if test ! -f y.tab.h; then echo >y.tab.h fi if test ! -f y.tab.c; then echo 'main() { return 0; }' >y.tab.c fi ;; lex*|flex*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified a '.l' file. You may need the Flex package in order for those modifications to take effect. You can get Flex from any GNU archive site." rm -f lex.yy.c if test $# -ne 1; then eval LASTARG=\${$#} case $LASTARG in *.l) SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" lex.yy.c fi ;; esac fi if test ! -f lex.yy.c; then echo 'main() { return 0; }' >lex.yy.c fi ;; help2man*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified a dependency of a manual page. You may need the Help2man package in order for those modifications to take effect. You can get Help2man from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo ".ab help2man is required to generate this page" exit $? fi ;; makeinfo*) echo 1>&2 "\ WARNING: '$1' is $msg. You should only need it if you modified a '.texi' or '.texinfo' file, or any other file indirectly affecting the aspect of the manual. The spurious call might also be the consequence of using a buggy 'make' (AIX, DU, IRIX). You might want to install the Texinfo package or the GNU make package. Grab either from any GNU archive site." # The file to touch is that specified with -o ... file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -z "$file"; then # ... or it is the one specified with @setfilename ... infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` file=`sed -n ' /^@setfilename/{ s/.* \([^ ]*\) *$/\1/ p q }' $infile` # ... or it is derived from the source name (dir/f.texi becomes f.info) test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info fi # If the file does not exist, the user really needs makeinfo; # let's fail without touching anything. test -f $file || exit 1 touch $file ;; *) echo 1>&2 "\ WARNING: '$1' is needed, and is $msg. You might have modified some files without having the proper tools for further handling them. Check the 'README' file, it often tells you about the needed prerequisites for installing this package. You may also peek at any GNU archive site, in case some other package would contain this missing '$1' program." exit 1 ;; esac exit 0 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: PHYLIPNEW-3.69.650/Makefile.am0000664000175000017500000000127111614226107012323 00000000000000# ACLOCAL_AMFLAGS = -I m4 SUBDIRS = src emboss_acd data emboss_doc EXTRA_DIST = depcomp ltmain.sh install-sh config.sub config.guess # tar to pick up the other directories # then remove any CVS subdirectories dist-hook: tar cBf - emboss_acd | ( cd $(distdir); tar xBf - ; cd emboss_acd; rm -rf CVS ) tar cBf - emboss_doc | ( cd $(distdir); tar xBf - ; cd emboss_doc; rm -rf CVS; rm -rf master) tar cBf - doc | ( cd $(distdir); tar xBf - ; cd doc; rm -rf CVS ) tar cBf - include | ( cd $(distdir); tar xBf - ; cd include; rm -rf CVS ) tar cBf - data | ( cd $(distdir); tar xBf - ; cd data; rm -rf CVS ) tar cBf - test | ( cd $(distdir); tar xBf - ; cd test; rm -rf CVS) PHYLIPNEW-3.69.650/emboss_doc/0002775000175000017500000000000012171071712012464 500000000000000PHYLIPNEW-3.69.650/emboss_doc/text/0002775000175000017500000000000012171071711013447 500000000000000PHYLIPNEW-3.69.650/emboss_doc/text/fdiscboot.txt0000664000175000017500000004757512171064331016124 00000000000000 fdiscboot Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Bootstrapped discrete sites algorithm Description Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development Algorithm SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format. To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis. This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does. If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input. The resampling methods available are: * The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data. * The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values. * Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Kuensch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3. * Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters. * Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters. * Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained. * Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species). * Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test). * Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species. * Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats: Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there. MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects. BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format Usage Here is a sample session with fdiscboot % fdiscboot -seed 3 Bootstrapped discrete sites algorithm Input file: discboot.dat Phylip seqboot_disc program output file [discboot.fdiscboot]: Phylip ancestor data output file (optional) [discboot.ancfile]: Phylip mix data output file (optional) [discboot.mixfile]: Phylip factor data output file (optional) [discboot.factfile]: completed replicate number 10 completed replicate number 20 completed replicate number 30 completed replicate number 40 completed replicate number 50 completed replicate number 60 completed replicate number 70 completed replicate number 80 completed replicate number 90 completed replicate number 100 Output written to file "discboot.fdiscboot" Done. Go to the input files for this example Go to the output files for this example Command line arguments Bootstrapped discrete sites algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates (no help text) discretestates value [-outfile] outfile [*.fdiscboot] Phylip seqboot_disc program output file [-outancfile] outfile [*.fdiscboot] Phylip ancestor data output file (optional) [-outmixfile] outfile [*.fdiscboot] Phylip mix data output file (optional) [-outfactfile] outfile [*.fdiscboot] Phylip factor data output file (optional) Additional (Optional) qualifiers (* if not always prompted): -mixfile properties File of mixtures -ancfile properties File of ancestors -weights properties Weights file -factorfile properties Factors file -test menu [b] Choose test (Values: b (Bootstrap); j (Jackknife); c (Permute species for each character); o (Permute character order); s (Permute within species); r (Rewrite data)) * -regular toggle [N] Altered sampling fraction * -fracsample float [100.0] Samples as percentage of sites (Number from 0.100 to 100.000) * -morphseqtype menu [p] Output format (Values: p (PHYLIP); n (NEXUS)) * -blocksize integer [1] Block size for bootstraping (Integer 1 or more) * -reps integer [100] How many replicates (Integer 1 or more) * -justweights menu [d] Write out datasets or just weights (Values: d (Datasets); w (Weights)) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -printdata boolean [N] Print out the data at start of run * -[no]dotdiff boolean [Y] Use dot-differencing -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outancfile" associated qualifiers -odirectory3 string Output directory "-outmixfile" associated qualifiers -odirectory4 string Output directory "-outfactfile" associated qualifiers -odirectory5 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdiscboot reads discrete character data Input files for usage example File: discboot.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Output file format fdiscboot writes a bootstrap multiple set of discrete character data Output files for usage example File: discboot.ancfile File: discboot.factfile File: discboot.mixfile File: discboot.fdiscboot 5 6 Alpha 111001 Beta 111000 Gamma 100001 Delta 000110 Epsilon 000111 5 6 Alpha 111011 Beta 111000 Gamma 100011 Delta 000100 Epsilon 000111 5 6 Alpha 111110 Beta 111000 Gamma 110110 Delta 000001 Epsilon 000110 5 6 Alpha 000001 Beta 000000 Gamma 000001 Delta 111110 Epsilon 111111 5 6 Alpha 111100 Beta 111000 Gamma 110100 Delta 000011 Epsilon 000100 5 6 Alpha 111100 Beta 100000 Gamma 111100 Delta 000011 Epsilon 011100 5 6 Alpha 110011 Beta 110000 Gamma 100011 Delta 001100 Epsilon 001111 5 6 Alpha 111100 Beta 100000 Gamma 111100 Delta 000011 Epsilon 011100 5 6 Alpha 110100 [Part of this file has been deleted for brevity] Gamma 101111 Delta 000000 Epsilon 001111 5 6 Alpha 110110 Beta 110000 Gamma 110110 Delta 001001 Epsilon 001110 5 6 Alpha 110111 Beta 110000 Gamma 000111 Delta 001000 Epsilon 001111 5 6 Alpha 101111 Beta 100000 Gamma 001111 Delta 010000 Epsilon 011111 5 6 Alpha 011111 Beta 000000 Gamma 011111 Delta 100000 Epsilon 111111 5 6 Alpha 011000 Beta 000000 Gamma 011000 Delta 100111 Epsilon 111000 5 6 Alpha 101100 Beta 100000 Gamma 101100 Delta 010011 Epsilon 011100 5 6 Alpha 111111 Beta 111110 Gamma 100001 Delta 000000 Epsilon 000001 5 6 Alpha 110110 Beta 110000 Gamma 000110 Delta 001001 Epsilon 001110 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/CVS/0002775000175000017500000000000012171064331014102 500000000000000PHYLIPNEW-3.69.650/emboss_doc/text/CVS/Entries0000664000175000017500000000361012171064331015354 00000000000000/.cvsignore/1.1/Thu Jan 21 17:05:10 2010/-kk/ /Makefile.am/1.2/Mon Jan 26 14:45:07 2009// /fclique.txt/1.21/Mon Jul 15 21:25:45 2013// /fconsense.txt/1.22/Mon Jul 15 21:25:45 2013// /fcontml.txt/1.23/Mon Jul 15 21:25:45 2013// /fcontrast.txt/1.22/Mon Jul 15 21:25:45 2013// /fdiscboot.txt/1.21/Mon Jul 15 21:25:45 2013// /fdnacomp.txt/1.27/Mon Jul 15 21:25:45 2013// /fdnadist.txt/1.26/Mon Jul 15 21:25:45 2013// /fdnainvar.txt/1.26/Mon Jul 15 21:25:45 2013// /fdnaml.txt/1.28/Mon Jul 15 21:25:45 2013// /fdnamlk.txt/1.28/Mon Jul 15 21:25:45 2013// /fdnamove.txt/1.24/Mon Jul 15 21:25:45 2013// /fdnapars.txt/1.28/Mon Jul 15 21:25:45 2013// /fdnapenny.txt/1.27/Mon Jul 15 21:25:45 2013// /fdollop.txt/1.21/Mon Jul 15 21:25:45 2013// /fdolmove.txt/1.23/Mon Jul 15 21:25:45 2013// /fdolpenny.txt/1.22/Mon Jul 15 21:25:45 2013// /fdrawgram.txt/1.22/Mon Jul 15 21:25:45 2013// /fdrawtree.txt/1.22/Mon Jul 15 21:25:45 2013// /ffactor.txt/1.19/Mon Jul 15 21:25:45 2013// /ffitch.txt/1.23/Mon Jul 15 21:25:45 2013// /ffreqboot.txt/1.21/Mon Jul 15 21:25:45 2013// /fgendist.txt/1.20/Mon Jul 15 21:25:45 2013// /fkitsch.txt/1.23/Mon Jul 15 21:25:45 2013// /fmix.txt/1.21/Mon Jul 15 21:25:45 2013// /fmove.txt/1.19/Mon Jul 15 21:25:45 2013// /fneighbor.txt/1.23/Mon Jul 15 21:25:45 2013// /fpars.txt/1.22/Mon Jul 15 21:25:45 2013// /fpenny.txt/1.21/Mon Jul 15 21:25:45 2013// /fproml.txt/1.27/Mon Jul 15 21:25:45 2013// /fpromlk.txt/1.27/Mon Jul 15 21:25:45 2013// /fprotdist.txt/1.26/Mon Jul 15 21:25:45 2013// /fprotpars.txt/1.27/Mon Jul 15 21:25:45 2013// /frestboot.txt/1.21/Mon Jul 15 21:25:45 2013// /frestdist.txt/1.24/Mon Jul 15 21:25:45 2013// /frestml.txt/1.24/Mon Jul 15 21:25:45 2013// /fretree.txt/1.21/Mon Jul 15 21:25:45 2013// /fseqboot.txt/1.23/Mon Jul 15 21:25:45 2013// /fseqbootall.txt/1.24/Mon Jul 15 21:25:45 2013// /ftreedist.txt/1.24/Mon Jul 15 21:25:45 2013// /ftreedistpair.txt/1.24/Mon Jul 15 21:25:45 2013// D PHYLIPNEW-3.69.650/emboss_doc/text/CVS/Root0000664000175000017500000000005612000651530014661 00000000000000rice@dev.open-bio.org:/home/repository/emboss PHYLIPNEW-3.69.650/emboss_doc/text/CVS/Repository0000664000175000017500000000006012000651530016110 00000000000000emboss/emboss/embassy/phylipnew/emboss_doc/text PHYLIPNEW-3.69.650/emboss_doc/text/fproml.txt0000664000175000017500000006724012171064331015436 00000000000000 fproml Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Protein phylogeny by maximum likelihood Description Estimates phylogenies from protein amino acid sequences by maximum likelihood. The PAM, JTT, or PMB models can be employed, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites. It also allows different rates of change at known sites. Algorithm This program implements the maximum likelihood method for protein amino acid sequences. It uses the either the Jones-Taylor-Thornton or the Dayhoff probability model of change between amino acids. The assumptions of these present models are: 1. Each position in the sequence evolves independently. 2. Different lineages evolve independently. 3. Each position undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify. 4. All relevant positions are included in the sequence, not just those that have changed or those that are "phylogenetically informative". 5. The probabilities of change between amino acids are given by the model of Jones, Taylor, and Thornton (1992), the PMB model of Veerassamy, Smith and Tillier (2004), or the PAM model of Dayhoff (Dayhoff and Eck, 1968; Dayhoff et. al., 1979). Note the assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those positions that had changed. This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different amino acid positions. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of positions all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant positions. The program computes the the likelihood by summing it over all possible assignments of rates to positions, weighting each by its prior probability of occurrence. For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a position having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive positions with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all positions to rate 2.4, or that fail to have consecutive positions that have the same rate. The Hidden Markov Model framework for rate variation among positions was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant positions. This feature effectively removes the artificial assumption that all positions have the same rate, and also means that we need not know in advance the identities of the positions that have a particular rate of evolution. Another layer of rate variation also is available. The user can assign categories of rates to each positions (for example, we might want amino acid positions in the active site of a protein to change more slowly than other positions. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of amino acid positions in the different categories. For example, we might specify that positions in the active site evolve at relative rates of 0.2 compared to 1.0 at other positions. If we are assuming that a particular position maintains a cysteine bridge to another, we may want to put it in a category of positions (including perhaps the initial position of the protein sequence which maintains methionine) which changes at a rate of 0.0. If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a position is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation. Usage Here is a sample session with fproml % fproml Protein phylogeny by maximum likelihood Input (aligned) protein sequence set(s): proml.dat Phylip tree file (optional): Phylip proml program output file [proml.fproml]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Output written to file "proml.fproml" Tree also written onto file "proml.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Protein phylogeny by maximum likelihood Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fproml] Phylip proml program output file Additional (Optional) qualifiers (* if not always prompted): -ncategories integer [1] Number of substitution rate categories (Integer from 1 to 9) * -rate array Rate for each category * -categories properties File of substitution rate categories -weights properties Weights file * -lengths boolean [N] Use branch lengths from user trees -model menu [Jones-Taylor-Thornton] Probability model for amino acid change (Values: j (Jones-Taylor-Thornton); h (Henikoff/Tillier PMBs); d (Dayhoff PAM)) -gammatype menu [Constant rate] Rate variation among sites (Values: g (Gamma distributed rates); i (Gamma+invariant sites); h (User defined HMM of rates); n (Constant rate)) * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ngammacat integer [1] Number of categories (1-9) (Integer from 1 to 9) * -invarcoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ninvarcat integer [1] Number of categories (1-9) including one for invariant sites (Integer from 1 to 9) * -invarfrac float [0.0] Fraction of invariant sites (Number from 0.000 to 1.000) * -nhmmcategories integer [1] Number of HMM rate categories (Integer from 1 to 9) * -hmmrates array [1.0] HMM category rates * -hmmprobabilities array [1.0] Probability for each HMM category * -adjsite boolean [N] Rates at adjacent sites correlated * -lambda float [1.0] Mean block length of sites having the same rate (Number 1.000 or more) * -njumble integer [0] Number of times to randomise, choose 0 if you don't want to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) * -global boolean [N] Global rearrangements -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]rough boolean [Y] Speedier but rougher analysis -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fproml] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -hypstate boolean [N] Reconstruct hypothetical sequence Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fproml reads any normal sequence USAs. Input files for usage example File: proml.dat 5 13 Alpha AACGTGGCCAAAT Beta AAGGTCGCCAAAC Gamma CATTTCGTCACAA Delta GGTATTTCGGCCT Epsilon GGGATCTCGGCCC Output file format fproml output starts by giving the number of species and the number of amino acid positions. If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of positions is printed, as well as the probabilities of each of those rates. There then follow the data sequences, if the user has selected the menu option to print them, with the sequences printed in groups of ten amino acids. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. Note that the trees printed out have a trifurcation at the base. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. The unit of branch length is the expected fraction of amino acids changed (so that 1.0 is 100 PAMs). A table is printed showing the length of each tree segment (in units of expected amino acid substitutions per position), as well as (very) rough confidence limits on their lengths. If a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow. In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant. YOU ARE WARNED AGAINST TAKING IT TOO SERIOUSLY. If you want to get a better likelihood curve for a branch length you can do multiple runs with different prespecified lengths for that branch, as discussed above in the discussion of the L option. One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that). The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the relative rate of change in the active site and in the rest of the protein to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different amino acid positions, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across amino acid positions. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across amino acid positions are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring positions (option A) and is not done in those cases. The branch lengths printed out are scaled in terms of 100 times the expected numbers of amino acid substitutions, scaled so that the average rate of change, averaged over all the positions analyzed, is set to 100.0, if there are multiple categories of positions. This means that whether or not there are multiple categories of positions, the expected percentage of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same position and overlie or even reverse each other. underlying numbers of changes. That means that a branch of length 26 is 26 times as long as one which would show a 1% difference between the amino acid sequences at the beginning and end of the branch, but we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes. Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length. Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14. At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what amino acid position categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each position which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead. Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file. Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). The symbol printed out is for the amino acid which accounts for the largest fraction of the likelihood at that position. In that table, if a position has an amino acid which accounts for more than 95% of the likelihood, its symbol printed in capital letters (W rather than w). One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed amino acids are based on only the single assignment of rates to positions which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates. Output files for usage example File: proml.fproml Amino acid sequence Maximum Likelihood method, version 3.69.650 Jones-Taylor-Thornton model of amino acid change +Beta | | +Epsilon | +------------------------------3 1------2 +------------Delta | | | +--------------------Gamma | +---------Alpha remember: this is an unrooted tree! Ln Likelihood = -131.55052 Between And Length Approx. Confidence Limits ------- --- ------ ------- ---------- ------ 1 Alpha 0.31006 ( zero, 0.66806) ** 1 Beta 0.00010 ( zero, infinity) 1 2 0.22206 ( zero, 0.62979) * 2 3 1.00907 ( 0.13965, 1.87849) ** 3 Epsilon 0.00010 ( zero, infinity) 3 Delta 0.41176 ( zero, 0.86685) ** 2 Gamma 0.68569 ( 0.01628, 1.35510) ** * = significantly positive, P < 0.05 ** = significantly positive, P < 0.01 File: proml.treefile (Beta:0.00010,((Epsilon:0.00010,Delta:0.41176):1.00907, Gamma:0.68569):0.22206,Alpha:0.31006); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/frestboot.txt0000664000175000017500000005507512171064331016151 00000000000000 frestboot Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Bootstrapped restriction sites algorithm Description Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development Algorithm FRESTBOOT is a restriction site specific version of SEQBOOT. SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format. To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis. This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does. If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input. The resampling methods available are: * The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data. * The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values. * Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Kuensch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3. * Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters. * Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters. * Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained. * Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species). * Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test). * Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species. * Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats: Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there. MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects. BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format Usage Here is a sample session with frestboot % frestboot -seed 3 Bootstrapped restriction sites algorithm Input file: restboot.dat Phylip seqboot_rest program output file [restboot.frestboot]: completed replicate number 10 completed replicate number 20 completed replicate number 30 completed replicate number 40 completed replicate number 50 completed replicate number 60 completed replicate number 70 completed replicate number 80 completed replicate number 90 completed replicate number 100 Output written to file "restboot.frestboot" Done. Go to the input files for this example Go to the output files for this example Command line arguments Bootstrapped restriction sites algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing one or more sets of restriction data [-outfile] outfile [*.frestboot] Phylip seqboot_rest program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -test menu [b] Choose test (Values: b (Bootstrap); j (Jackknife); c (Permute species for each character); o (Permute character order); s (Permute within species); r (Rewrite data)) * -regular toggle [N] Altered sampling fraction * -fracsample float [100.0] Samples as percentage of sites (Number from 0.100 to 100.000) * -rewriteformat menu [p] Output format (Values: p (PHYLIP); n (NEXUS); x (XML)) * -blocksize integer [1] Block size for bootstraping (Integer 1 or more) * -reps integer [100] How many replicates (Integer 1 or more) * -justweights menu [d] Write out datasets or just weights (Values: d (Datasets); w (Weights)) -enzymes boolean [N] Is the number of enzymes present in input file * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -printdata boolean [N] Print out the data at start of run * -[no]dotdiff boolean [Y] Use dot-differencing -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format frestboot data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs. Input files for usage example File: restboot.dat 5 13 2 Alpha ++-+-++--+++- Beta ++++--+--+++- Gamma -+--+-++-+-++ Delta ++-+----++--- Epsilon ++++----++--- Output file format frestboot output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters. The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group. The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor. The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file. Output files for usage example File: restboot.frestboot 5 13 Alpha +--++-+++- -++ Beta +++++----- -++ Gamma -----+---+ +++ Delta +--++----- -+- Epsilon +++++----- -+- 5 13 Alpha ++----+++- +++ Beta +++-----+- +++ Gamma -+-+++--++ ++- Delta ++-------- ++- Epsilon +++------- ++- 5 13 Alpha ++++++-+++ --- Beta ++++++-+++ --- Gamma --++-++--- +++ Delta +++++----- --- Epsilon +++++----- --- 5 13 Alpha ++++-+++++ --- Beta ++-+-+++++ --- Gamma ---+-++--- +++ Delta ++--+++--- --- Epsilon ++--+++--- --- 5 13 Alpha +-+++-++++ +-- Beta +++++-++++ +-- Gamma ----+----- +++ Delta +-++-+---- --- Epsilon ++++-+---- --- 5 13 Alpha +++------- +++ Beta +++------- +++ Gamma +--++++++- +-+ Delta +++------+ +-- Epsilon +++------+ +-- 5 13 Alpha ++++-+--++ ++- Beta ++++----++ ++- Gamma --+++---+- -++ Delta ++++--+++- --- Epsilon ++++--+++- --- 5 13 Alpha +--+---+++ +-- Beta ++++---+++ +-- Gamma ----++-+-+ +++ Delta +--+--++-- --- Epsilon ++++--++-- --- 5 13 Alpha +++--++--+ ++- [Part of this file has been deleted for brevity] Gamma -+--++-+++ -++ Delta ++++------ +++ Epsilon ++++------ +++ 5 13 Alpha +++---+-++ +++ Beta +++---+-++ +++ Gamma ---++++-++ -++ Delta +++----+++ --- Epsilon +++----+++ --- 5 13 Alpha ++++--+--+ +-- Beta +++++----+ +-- Gamma ---+-+-+++ +++ Delta ++++-----+ +-- Epsilon +++++----+ +-- 5 13 Alpha +-----+--- +++ Beta +++++++--- +++ Gamma +------+++ +-- Delta +-----+--- +-- Epsilon +++++++--- +-- 5 13 Alpha +-++--+-++ +-- Beta ++++--+-++ +-- Gamma +---++++-- -++ Delta +-++------ --- Epsilon ++++------ --- 5 13 Alpha +++-+-++++ ++- Beta +++---++++ ++- Gamma --++--++-- +++ Delta +++--+++-- --- Epsilon +++--+++-- --- 5 13 Alpha ++-+++--++ +-- Beta +++-++--++ +-- Gamma ----+++--- +++ Delta ++-----+-- --- Epsilon +++----+-- --- 5 13 Alpha +---++---- ++- Beta ++---+---- ++- Gamma --++-+---- -++ Delta +-----++++ --- Epsilon ++----++++ --- 5 13 Alpha +++-++++-+ +++ Beta +++++----+ +++ Gamma -++------+ +++ Delta +++-+---++ --- Epsilon +++++---++ --- Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnapars.txt0000664000175000017500000004421312171064331015730 00000000000000 fdnapars Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function DNA parsimony algorithm Description Estimates phylogenies by the parsimony method using nucleic acid sequences. Allows use the full IUB ambiguity codes, and estimates ancestral nucleotide states. Gaps treated as a fifth nucleotide state. It can also do transversion parsimony. Can cope with multifurcations, reconstruct ancestral states, use 0/1 character weights, and infer branch lengths Algorithm This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are analogous to those of MIX: 1. Each site evolves independently. 2. Different lineages evolve independently. 3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny. 4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch. 5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986). Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a deletion N bases long is N separate events. Dnapars can handle both bifurcating and multifurcating trees. In doing its search for most parsimonious trees, it adds species not only by creating new forks in the middle of existing branches, but it also tries putting them at the end of new branches which are added to existing forks. Thus it searches among both bifurcating and multifurcating trees. If a branch in a tree does not have any characters which might change in that branch in the most parsimonious tree, it does not save that tree. Thus in any tree that results, a branch exists only if some character has a most parsimonious reconstruction that would involve change in that branch. It also saves a number of trees tied for best (you can alter the number it saves using the V option in the menu). When rearranging trees, it tries rearrangements of all of the saved trees. This makes the algorithm slower than earlier versions of Dnapars. The input data is standard. The first line of the input file contains the number of species and the number of sites. Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion. Usage Here is a sample session with fdnapars % fdnapars DNA parsimony algorithm Input (aligned) nucleotide sequence set(s): dnapars.dat Phylip tree file (optional): Phylip dnapars program output file [dnapars.fdnapars]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements on the first of the trees tied for best !---------! ......... ......... Collapsing best trees . Output written to file "dnapars.fdnapars" Tree also written onto file "dnapars.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments DNA parsimony algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fdnapars] Phylip dnapars program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -maxtrees integer [10000] Number of trees to save (Integer from 1 to 1000000) * -[no]thorough toggle [Y] More thorough search * -[no]rearrange boolean [Y] Rearrange on just one best tree -transversion boolean [N] Use transversion parsimony * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -dothreshold toggle [N] Use threshold parsimony * -threshold float [1.0] Threshold value (Number 1.000 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdnapars] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -stepbox boolean [N] Print out steps in each site -ancseq boolean [N] Print sequences at all nodes of tree -[no]treeprint boolean [Y] Print out tree * -[no]dotdiff boolean [Y] Use dot differencing to display results Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnapars reads any normal sequence USAs. Input files for usage example File: dnapars.dat 5 13 Alpha AACGUGGCCAAAU Beta AAGGUCGCCAAAC Gamma CAUUUCGUCACAA Delta GGUAUUUCGGCCU Epsilon GGGAUCUCGGCCC Output file format fdnapars output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees. Each tree has branch lengths. These are computed using an algorithm published by Hochbaum and Pathria (1997) which I first heard of from Wayne Maddison who invented it independently of them. This algorithm averages the number of reconstructed changes of state over all sites a over all possible most parsimonious placements of the changes of state among branches. Note that it does not correct in any way for multiple changes that overlay each other. If option 2 is toggled on a table of the number of changes of state required in each character is also printed. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item "." appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a deletion may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across sites. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, this is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989) It uses the mean and variance of the differences in the number of steps between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different, then the trees are declared significantly different. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. Option 6 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: dnapars.fdnapars DNA parsimony algorithm, version 3.69.650 One most parsimonious tree found: +-----Epsilon +----------------------------3 +------------2 +-------Delta | | | +----------------Gamma | 1----Beta | +---------Alpha requires a total of 19.000 between and length ------- --- ------ 1 2 0.217949 2 3 0.487179 3 Epsilon 0.096154 3 Delta 0.134615 2 Gamma 0.275641 1 Beta 0.076923 1 Alpha 0.173077 File: dnapars.treefile (((Epsilon:0.09615,Delta:0.13462):0.48718,Gamma:0.27564):0.21795, Beta:0.07692,Alpha:0.17308); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdrawtree.txt0000664000175000017500000002457112171064331016122 00000000000000 fdrawtree Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Plots an unrooted tree diagram Description Plots unrooted phylogenies, cladograms, circular trees and phenograms in a wide variety of user-controllable formats. The program is interactive and allows previewing of the tree on PC, Macintosh, or X Windows screens, or on Tektronix or Digital graphics terminals. Final output can be to a file formatted for one of the drawing programs, for a ray-tracing or VRML browser, or one at can be sent to a laser printer (such as Postscript or PCL-compatible printers), on graphics screens or terminals, on pen plotters or on dot matrix printers capable of graphics. Similar to DRAWGRAM but plots unrooted phylogenies. Algorithm DRAWTREE interactively plots an unrooted tree diagram, with many options including orientation of tree and branches, label sizes and angles, margin sizes. Particularly if you can use your computer screen to preview the plot, you can very effectively adjust the details of the plotting to get just the kind of plot you want. To understand the working of DRAWGRAM and DRAWTREE, you should first read the Tree Drawing Programs web page in this documentation. As with DRAWGRAM, to run DRAWTREE you need a compiled copy of the program, a font file, and a tree file. The tree file has a default name of intree. The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Note that the program will get confused if the input tree file has the number of trees on the first line of the file, so that number may have to be removed. Usage Here is a sample session with fdrawtree % fdrawtree -previewer n Plots an unrooted tree diagram Phylip tree file: drawgram.tree Phylip drawtree output file [drawgram.fdrawtree]: DRAWTREE from PHYLIP version 3.69.650 Reading tree ... Tree has been read. Loading the font ... Font loaded. Writing plot file ... Plot written to file "drawgram.fdrawtree" Done. Go to the input files for this example Go to the output files for this example Command line arguments Plots an unrooted tree diagram Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-intreefile] tree Phylip tree file [-plotfile] outfile [*.fdrawtree] Phylip drawtree output file Additional (Optional) qualifiers (* if not always prompted): -plotter menu [l] Plotter or printer the tree will be drawn on (Values: l (Postscript printer file format); m (PICT format (for drawing programs)); j (HP Laserjet 75 dpi PCL file format); s (HP Laserjet 150 dpi PCL file format); y (HP Laserjet 300 dpi PCL file format); w (MS-Windows Bitmap); f (FIG 2.0 drawing program format); a (Idraw drawing program format); z (VRML Virtual Reality Markup Language file); n (PCX 640x350 file format (for drawing programs)); p (PCX 800x600 file format (for drawing programs)); q (PCX 1024x768 file format (for drawing programs)); k (TeKtronix 4010 graphics terminal); x (X Bitmap format); v (POVRAY 3D rendering program file); r (Rayshade 3D rendering program file); h (Hewlett-Packard pen plotter (HPGL file format)); d (DEC ReGIS graphics (VT240 terminal)); e (Epson MX-80 dot-matrix printer); c (Prowriter/Imagewriter dot-matrix printer); t (Toshiba 24-pin dot-matrix printer); o (Okidata dot-matrix printer); b (Houston Instruments plotter); u (other (one you have inserted code for))) -previewer menu [x] Previewing device (Values: n (Will not be previewed); I i (MSDOS graphics screen m:Macintosh screens); x (X Windows display); w (MS Windows display); k (TeKtronix 4010 graphics terminal); d (DEC ReGIS graphics (VT240 terminal)); o (Other (one you have inserted code for))) -iterate menu [e] Iterate to improve tree (Values: n (No); e (Equal-Daylight algorithm); b (n-Body algorithm)) -lengths boolean [N] Use branch lengths from user trees -labeldirection menu [m] Label direction (Values: a (along); f (fixed); r (radial); m (middle)) -treeangle float [90.0] Angle the tree is to be plotted (Number from -360.000 to 360.000) -arc float [360] Degrees the arc should occupy (Number from 0.000 to 360.000) * -labelrotation float [90.0] Angle of labels (0 degrees is horizontal for a tree growing vertically) (Number from 0.000 to 360.000) -[no]rescaled toggle [Y] Automatically rescale branch lengths * -bscale float [1.0] Centimeters per unit branch length (Any numeric value) -treedepth float [0.53] Depth of tree as fraction of its breadth (Number from 0.100 to 100.000) * -xmargin float [1.65] Horizontal margin (cm) (Number 0.100 or more) * -ymargin float [2.16] Vertical margin (cm) (Number 0.100 or more) * -xrayshade float [1.65] Horizontal margin (pixels) (Number 0.100 or more) * -yrayshade float [2.16] Vertical margin (pixels) (Number 0.100 or more) -paperx float [20.63750] Paper width (Any numeric value) -papery float [26.98750] Paper height (Number 0.100 or more) -pagesheight float [1] Number of trees across height of page (Number 1.000 or more) -pageswidth float [1] Number of trees across width of page (Number 1.000 or more) -hpmargin float [0.41275] Horizontal overlap (cm) (Number 0.001 or more) -vpmargin float [0.53975] Vertical overlap (cm) (Number 0.001 or more) Advanced (Unprompted) qualifiers: -fontfile string [font1] Fontfile name (Any string) Associated qualifiers: "-plotfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdrawtree input ... Input files for usage example File: drawgram.tree (Delta,(Epsilon,(Gamma,(Beta,Alpha)))); Output file format fdrawtree outputs ... Output files for usage example Graphics File: drawgram.fdrawtree [fdrawtree results] Data files The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description fdrawgram Plots a cladogram- or phenogram-like rooted tree diagram fretree Interactive tree rearrangement Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdolmove.txt0000664000175000017500000003271012171064331015744 00000000000000 fdolmove Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Interactive Dollo or polymorphism parsimony Description Interactive construction of phylogenies from discrete character data with two states (0 and 1) using the Dollo or polymorphism parsimony criteria. Evaluates parsimony and compatibility criteria for those phylogenies and displays reconstructed states throughout the tree. This can be used to find parsimony or compatibility estimates by hand. Algorithm DOLMOVE is an interactive parsimony program which uses the Dollo and Polymorphism parsimony criteria. It is inspired on Wayne Maddison and David Maddison's marvellous program MacClade, which is written for Apple MacIntosh computers. DOLMOVE reads in a data set which is prepared in almost the same format as one for the Dollo and polymorhism parsimony program DOLLOP. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different characters and the way their states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different characters are affected by changes in the tree topology. This program is compatible with fewer computer systems than the other programs in PHYLIP. It can be adapted to PCDOS systems or to any system whose screen or terminals emulate DEC VT100 terminals (such as Telnet programs for logging in to remote computers over a TCP/IP network, VT100-compatible windows in the X windowing system, and any terminal compatible with ANSI standard terminals). For any other screen types, there is a generic option which does not make use of screen graphics characters to display the character states. This will be less effective, as the states will be less easy to see when displayed. Usage Here is a sample session with fdolmove % fdolmove Interactive Dollo or polymorphism parsimony Phylip character discrete states file: dolmove.dat Phylip tree file (optional): NEXT? (R # + - S . T U W O F H J K L C ? X Q) (? for Help): Q Do you want to write out the tree to a file? (Y or N): Y Interactive Dollo or polymorphism parsimony, version 3.69.650 5 species, 6 characters Computing steps needed for compatibility in sites ... (unrooted) 5.0 Steps 4 chars compatible Dollo ,-----------5:Epsilon --9 ! ,--------4:Delta `--8 ! ,-----3:Gamma `--7 ! ,--2:Beta `--6 `--1:Alpha Tree written to file "dolmove.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Interactive Dollo or polymorphism parsimony Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing data set [-intreefile] tree Phylip tree file (optional) Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -ancfile properties Ancestral states file -factorfile properties Factors file -method menu [d] Parsimony method (Values: d (Dollo); p (Polymorphism)) -dothreshold toggle [N] Use threshold parsimony * -threshold float [1] Threshold value (Number 0.000 or more) -initialtree menu [Arbitary] Initial tree (Values: a (Arbitary); u (User); s (Specify)) -screenwidth integer [80] Width of terminal screen in characters (Any integer value) -screenlines integer [24] Number of lines on screen (Any integer value) -outtreefile outfile [*.fdolmove] Phylip tree output file (optional) Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdolmove reads discrete character data with "?", "P", "B" states allowed. . (0,1) Discrete character data These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both". There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form: 1 ---> 0 ---> 2 | | V 3 so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters: Old State New States --- ----- --- ------ 0 001 1 000 2 011 3 101 The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops. However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979). If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR. We now also have the program PARS, which can do parsimony for unordered character states. Input files for usage example File: dolmove.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Output file format fdolmove output: If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest changes (according to the criterion in use). If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and many of these will be shown as "?". If the A option is not used, then the program will assume 0 as the ancestral state. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1. When DOLMOVE calculates the number of characters compatible with the tree, it will take the F option into account and count the multistate characters as units, counting a character as compatible with the tree only when all of the binary characters corresponding to it are compatible with the tree. Output files for usage example File: dolmove.treefile (Epsilon,(Delta,(Gamma,(Beta,Alpha)))); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fkitsch.txt0000664000175000017500000004403712171064331015571 00000000000000 fkitsch Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Fitch-Margoliash method with contemporary tips Description Estimates phylogenies from distance matrix data under the "ultrametric" model which is the same as the additive tree model except that an evolutionary clock is assumed. The Fitch-Margoliash criterion and other least squares criteria, or the Minimum Evolution criterion are possible. This program will be useful with distances computed from molecular sequences, restriction sites or fragments distances, with distances from DNA hybridization measurements, and with genetic distances computed from gene frequencies. Algorithm This program carries out the Fitch-Margoliash and Least Squares methods, plus a variety of others of the same family, with the assumption that all tip species are contemporaneous, and that there is an evolutionary clock (in effect, a molecular clock). This means that branches of the tree cannot be of arbitrary length, but are constrained so that the total length from the root of the tree to any species is the same. The quantity minimized is the same weighted sum of squares described in the Distance Matrix Methods documentation file. The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that: * Each distance is measured independently from the others: no item of data contributes to more than one distance. * The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation. These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious. THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b). For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better. Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST. For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true). FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time. The method may be considered as providing an estimate of the phylogeny. Alternatively, it can be considered as a phenetic clustering of the tip species. This method minimizes an objective function, the sum of squares, not only setting the levels of the clusters so as to do so, but rearranging the hierarchy of clusters to try to find alternative clusterings that give a lower overall sum of squares. When the power option P is set to a value of P = 0.0, so that we are minimizing a simple sum of squares of the differences between the observed distance matrix and the expected one, the method is very close in spirit to Unweighted Pair Group Arithmetic Average Clustering (UPGMA), also called Average-Linkage Clustering. If the topology of the tree is fixed and there turn out to be no branches of negative length, its result should be the same as UPGMA in that case. But since it tries alternative topologies and (unless the N option is set) it combines nodes that otherwise could result in a reversal of levels, it is possible for it to give a different, and better, result than simple sequential clustering. Of course UPGMA itself is available as an option in program NEIGHBOR. An important use of this method will be to do a formal statistical test of the evolutionary clock hypothesis. This can be done by comparing the sums of squares achieved by FITCH and by KITSCH, BUT SOME CAVEATS ARE NECESSARY. First, the assumption is that the observed distances are truly independent, that no original data item contributes to more than one of them (not counting the two reciprocal distances from i to j and from j to i). THIS WILL NOT HOLD IF THE DISTANCES ARE OBTAINED FROM GENE FREQUENCIES, FROM MORPHOLOGICAL CHARACTERS, OR FROM MOLECULAR SEQUENCES. It may be invalid even for immunological distances and levels of DNA hybridization, provided that the use of common standard for all members of a row or column allows an error in the measurement of the standard to affect all these distances simultaneously. It will also be invalid if the numbers have been collected in experimental groups, each measured by taking differences from a common standard which itself is measured with error. Only if the numbers in different cells are measured from independent standards can we depend on the statistical model. The details of the test and the assumptions are discussed in my review paper on distance methods (Felsenstein, 1984a). For further and sometimes irrelevant controversy on these matters see the papers by Farris (1981, 1985, 1986) and myself (Felsenstein, 1986, 1988b). A second caveat is that the distances must be expected to rise linearly with time, not according to any other curve. Thus it may be necessary to transform the distances to achieve an expected linearity. If the distances have an upper limit beyond which they could not go, this is a signal that linearity may not hold. It is also VERY important to choose the power P at a value that results in the standard deviation of the variation of the observed from the expected distances being the P/2-th power of the expected distance. To carry out the test, fit the same data with both FITCH and KITSCH, and record the two sums of squares. If the topology has turned out the same, we have N = n(n-1)/2 distances which have been fit with 2n-3 parameters in FITCH, and with n-1 parameters in KITSCH. Then the difference between S(K) and S(F) has d1 = n-2 degrees of freedom. It is statistically independent of the value of S(F), which has d2 = N-(2n-3) degrees of freedom. The ratio of mean squares [S(K)-S(F)]/d1 ---------------- S(F)/d2 should, under the evolutionary clock, have an F distribution with n-2 and N-(2n-3) degrees of freedom respectively. The test desired is that the F ratio is in the upper tail (say the upper 5%) of its distribution. If the S (subreplication) option is in effect, the above degrees of freedom must be modified by noting that N is not n(n-1)/2 but is the sum of the numbers of replicates of all cells in the distance matrix read in, which may be either square or triangular. A further explanation of the statistical test of the clock is given in a paper of mine (Felsenstein, 1986). The program uses a similar tree construction method to the other programs in the package and, like them, is not guaranteed to give the best-fitting tree. The assignment of the branch lengths for a given topology is a least squares fit, subject to the constraints against negative branch lengths, and should not be able to be improved upon. KITSCH runs more quickly than FITCH. Usage Here is a sample session with fkitsch % fkitsch Fitch-Margoliash method with contemporary tips Phylip distance matrix file: kitsch.dat Phylip tree file (optional): Phylip kitsch program output file [kitsch.fkitsch]: Adding species: 1. Bovine 2. Mouse 3. Gibbon 4. Orang 5. Gorilla 6. Chimp 7. Human Doing global rearrangements !-------------! ............. Output written to file "kitsch.fkitsch" Tree also written onto file "kitsch.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Fitch-Margoliash method with contemporary tips Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-datafile] distances File containing one or more distance matrices [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fkitsch] Phylip kitsch program output file Additional (Optional) qualifiers (* if not always prompted): -matrixtype menu [s] Type of data matrix (Values: s (Square); u (Upper triangular); l (Lower triangular)) -minev boolean [N] Minimum evolution * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -power float [2.0] Power (Any numeric value) -negallowed boolean [N] Negative branch lengths allowed -replicates boolean [N] Subreplicates -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fkitsch] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fkitsch requires a bifurcating tree, unlike FITCH, which requires an unrooted tree with a trifurcation at its base. Thus the tree shown below would be written: ((D,E),(C,(A,B))); If a tree with a trifurcation at the base is by mistake fed into the U option of KITSCH then some of its species (the entire rightmost furc, in fact) will be ignored and too small a tree read in. This should result in an error message and the program should stop. It is important to understand the difference between the User Tree formats for KITSCH and FITCH. You may want to use RETREE to convert a user tree that is suitable for FITCH into one suitable for KITSCH or vice versa. Input files for usage example File: kitsch.dat 7 Bovine 0.0000 1.6866 1.7198 1.6606 1.5243 1.6043 1.5905 Mouse 1.6866 0.0000 1.5232 1.4841 1.4465 1.4389 1.4629 Gibbon 1.7198 1.5232 0.0000 0.7115 0.5958 0.6179 0.5583 Orang 1.6606 1.4841 0.7115 0.0000 0.4631 0.5061 0.4710 Gorilla 1.5243 1.4465 0.5958 0.4631 0.0000 0.3484 0.3083 Chimp 1.6043 1.4389 0.6179 0.5061 0.3484 0.0000 0.2692 Human 1.5905 1.4629 0.5583 0.4710 0.3083 0.2692 0.0000 Output file format fkitsch output is a rooted tree, together with the sum of squares, the number of tree topologies searched, and, if the power P is at its default value of 2.0, the Average Percent Standard Deviation is also supplied. The lengths of the branches of the tree are given in a table, that also shows for each branch the time at the upper end of the branch. "Time" here really means cumulative branch length from the root, going upwards (on the printed diagram, rightwards). For each branch, the "time" given is for the node at the right (upper) end of the branch. It is important to realize that the branch lengths are not exactly proportional to the lengths drawn on the printed tree diagram! In particular, short branches are exaggerated in the length on that diagram so that they are more visible. Output files for usage example File: kitsch.fkitsch 7 Populations Fitch-Margoliash method with contemporary tips, version 3.69.650 __ __ 2 \ \ (Obs - Exp) Sum of squares = /_ /_ ------------ 2 i j Obs negative branch lengths not allowed +-------Human +-6 +----5 +-------Chimp ! ! +---4 +---------Gorilla ! ! +------------------------3 +--------------Orang ! ! +----2 +------------------Gibbon ! ! --1 +-------------------------------------------Mouse ! +------------------------------------------------Bovine Sum of squares = 0.107 Average percent standard deviation = 5.16213 From To Length Height ---- -- ------ ------ 6 Human 0.13460 0.81285 5 6 0.02836 0.67825 6 Chimp 0.13460 0.81285 4 5 0.07638 0.64990 5 Gorilla 0.16296 0.81285 3 4 0.06639 0.57352 4 Orang 0.23933 0.81285 2 3 0.42923 0.50713 3 Gibbon 0.30572 0.81285 1 2 0.07790 0.07790 2 Mouse 0.73495 0.81285 1 Bovine 0.81285 0.81285 File: kitsch.treefile ((((((Human:0.13460,Chimp:0.13460):0.02836,Gorilla:0.16296):0.07638, Orang:0.23933):0.06639,Gibbon:0.30572):0.42923,Mouse:0.73495):0.07790, Bovine:0.81285); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description efitch Fitch-Margoliash and least-squares distance methods ekitsch Fitch-Margoliash method with contemporary tips eneighbor Phylogenies from distance matrix by N-J or UPGMA method ffitch Fitch-Margoliash and least-squares distance methods fneighbor Phylogenies from distance matrix by N-J or UPGMA method Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/ftreedist.txt0000664000175000017500000004042212171064331016121 00000000000000 ftreedist Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Calculate distances between trees Description Computes the Branch Score distance between trees, which allows for differences in tree topology and which also makes use of branch lengths. Also computes the Robinson-Foulds symmetric difference distance between trees, which allows for differences in tree topology but does not use branch lengths. Algorithm This program computes distances between trees. Two distances are computed, the Branch Score Distance of Kuhner and Felsenstein (1994), and the more widely known Symmetric Difference of Robinson and Foulds (1981). The Branch Score Distance uses branch lengths, and can only be calculated when the trees have lengths on all branches. The Symmetric Difference does not use branch length information, only the tree topologies. It must also be borne in mind that neither distance has any immediate statistical interpretation -- we cannot say whether a larger distance is significantly larger than a smaller one. These distances are computed by considering all possible branches that could exist on the the two trees. Each branch divides the set of species into two groups -- the ones connected to one end of the branch and the ones connected to the other. This makes a partition of the full set of species. (in Newick notation) ((A,C),(D,(B,E))) has two internal branches. One induces the partition {A, C | B, D, E} and the other induces the partition {A, C, D | B, E}. A different tree with the same set of species, (((A,D),C),(B,E)) has internal branches that correspond to the two partitions {A, C, D | B, E} and {A, D | B, C, E}. Note that the other branches, all of which are external branches, induce partitions that separate one species from all the others. Thus there are 5 partitions like this: {C | A, B, D, E} on each of these trees. These are always present on all trees, provided that each tree has each species at the end of its own branch. In the case of the Branch Score distance, each partition that does exist on a tree also has a branch length associated with it. Thus if the tree is (((A:0.1,D:0.25):0.05,C:0.01):0.2,(B:0.3,E:0.8):0.2) The list of partitions and their branch lengths is: {A | B, C, D, E} 0.1 {D | A, B, C, E} 0.25 {A, D | B, C, E} 0.05 {C | A, B, D, E} 0.01 {A, D, C | B, E} 0.4 {B | A, C, D, E} 0.3 {E | A, B, C, D} 0.8 Note that the tree is being treated as unrooted here, so that the branch lengths on either side of the rootmost node are summed up to get a branch length of 0.4. The Branch Score Distance imagines us as having made a list of all possible partitions, the ones shown above and also all 7 other possible partitions, which correspond to branches that are not found in this tree. These are assigned branch lengths of 0. For two trees, we imagine constructing these lists, and then summing the squared differences between the branch lengths. Thus if both trees have branches {A, D | B, C, E}, the sum contains the square of the difference between the branch lengths. If one tree has the branch and the other doesn't, it contains the square of the difference between the branch length and zero (in other words, the square of that branch length). If both trees do not have a particular branch, nothing is added to the sum because the difference is then between 0 and 0. The Branch Score Distance takes this sum of squared differences and computes its square root. Note that it has some desirable properties. When small branches differ in tree topology, it is not very big. When branches are both present but differ in length, it is affected. The Symmetric Difference is simply a count of how many partitions there are, among the two trees, that are on one tree and not on the other. In the example above there are two partitions, {A, C | B, D, E} and {A, D | B, C, E}, each of which is present on only one of the two trees. The Symmetric Difference between the two trees is therefore 2. When the two trees are fully resolved bifurcating trees, their symmetric distance must be an even number; it can range from 0 to twice the number of internal branches, which for n species is 4n-6. Note the relationship between the two distances. If all trees have all their branches have length 1.0, the Branch Score Distance is the square of the Symmetric Difference, as each branch that is present in one but not in the other results in 1.0 being added to the sum of squared differences. We have assumed that nothing is lost if the trees are treated as unrooted trees. It is easy to define a counterpart to the Branch Score Distance and one to the Symmetric Difference for these rooted trees. Each branch then defines a set of species, namely the clade defined by that branch. Thus if the first of the two trees above were considered as a rooted tree it would define the three clades {A, C}, {B, D, E}, and {B, E}. The Branch Score Distance is computed from the branch lengths for all possible sets of species, with 0 put for each set that does not occur on that tree. The table above will be nearly the same, but with two entries instead of one for the sets on either side of the root, {A C D} and {B E}. The Symmetric Difference between two rooted trees is simply the count of the number of clades that are defined by one but not by the other. For the second tree the clades would be {A, D}, {B, C, E}, and {B, E}. The Symmetric Difference between thee two rooted trees would then be 4. Although the examples we have discussed have involved fully bifurcating trees, the input trees can have multifurcations. This does not cause any complication for the Branch Score Distance. For the Symmetric Difference, it can lead to distances that are odd numbers. However, note one strong restriction. The trees should all have the same list of species. If you use one set of species in the first two trees, and another in the second two, and choose distances for adjacent pairs, the distances will be incorrect and will depend on the order of these pairs in the input tree file, in odd ways. Usage Here is a sample session with ftreedist % ftreedist Calculate distances between trees Phylip tree file: treedist.dat Phylip treedist program output file [treedist.ftreedist]: Output written to file "treedist.ftreedist" Done. Go to the input files for this example Go to the output files for this example Example 2 % ftreedist -dtype s Calculate distances between trees Phylip tree file: treedist2.dat Phylip treedist program output file [treedist2.ftreedist]: Output written to file "treedist2.ftreedist" Done. Go to the input files for this example Go to the output files for this example Command line arguments Calculate distances between trees Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-intreefile] tree Phylip tree file [-outfile] outfile [*.ftreedist] Phylip treedist program output file Additional (Optional) qualifiers: -dtype menu [b] Distance type (Values: s (Symmetric difference); b (Branch score distance)) -pairing menu [a] Tree pairing method (Values: a (Distances between adjacent pairs in tree file); p (Distances between all possible pairs in tree file)) -style menu [v] Distances output option (Values: f (Full matrix); v (Verbose, one pair per line); s (Sparse, one pair per line)) -noroot boolean [N] Trees to be treated as rooted -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format ftreedist reads one input tree file. If the number of trees is given, it is actually ignored and all trees in the tree file are considered, even if there are more trees than indicated by the number. There is no maximum number of trees that can be processed but, if you feed in too many, there may be an error message about running out of memory. The problem is particularly acute if you choose the option to examine all possible pairs of trees in an input tree file. Thus if there are 1,000 trees in the input tree file, keep in mind that all possible pairs means 1,000,000 pairs to be examined! Input files for usage example File: treedist.dat (A:0.1,(B:0.1,(H:0.1,(D:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(D:0.1,((J:0.1,H:0.1):0.1,(((G:0.1,E:0.1):0.1, (F:0.1,I:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1, D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,(((J:0.1,H:0.1):0.1, D:0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1, D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); Input files for usage example 2 File: treedist2.dat (A,(B,(H,(D,(J,(((G,E),(F,I)),C)))))); (A,(B,(D,((J,H),(((G,E),(F,I)),C))))); (A,(B,(D,(H,(J,(((G,E),(F,I)),C)))))); (A,(B,(E,(G,((F,I),((J,(H,D)),C)))))); (A,(B,(E,(G,((F,I),(((J,H),D),C)))))); (A,(B,(E,((F,I),(G,((J,(H,D)),C)))))); (A,(B,(E,((F,I),(G,(((J,H),D),C)))))); (A,(B,(E,((G,(F,I)),((J,(H,D)),C))))); (A,(B,(E,((G,(F,I)),(((J,H),D),C))))); (A,(B,(E,(G,((F,I),((J,(H,D)),C)))))); (A,(B,(D,(H,(J,(((G,E),(F,I)),C)))))); (A,(B,(E,((G,(F,I)),((J,(H,D)),C))))); Output file format If any of the four types of analysis are selected, the user must specify how they want the results presented. The Full matrix (choice F) is a table showing all distances. It is written onto the output file. The table is presented as groups of 10 columns. Here is the Full matrix for the 12 trees in the input tree file which is given as an example at the end of this page. Tree distance program, version 3.6 Symmetric differences between all pairs of trees in tree file: 1 2 3 4 5 6 7 8 9 10 \------------------------------------------------------------ 1 | 0 4 2 10 10 10 10 10 10 10 2 | 4 0 2 10 8 10 8 10 8 10 3 | 2 2 0 10 10 10 10 10 10 10 4 | 10 10 10 0 2 2 4 2 4 0 5 | 10 8 10 2 0 4 2 4 2 2 6 | 10 10 10 2 4 0 2 2 4 2 7 | 10 8 10 4 2 2 0 4 2 4 8 | 10 10 10 2 4 2 4 0 2 2 9 | 10 8 10 4 2 4 2 2 0 4 10 | 10 10 10 0 2 2 4 2 4 0 11 | 2 2 0 10 10 10 10 10 10 10 12 | 10 10 10 2 4 2 4 0 2 2 11 12 \------------ 1 | 2 10 2 | 2 10 3 | 0 10 4 | 10 2 5 | 10 4 6 | 10 2 7 | 10 4 8 | 10 0 9 | 10 2 10 | 10 2 11 | 0 10 12 | 10 0 The Full matrix is only available for analyses P and L (not for A or C). Option V (Verbose) writes one distance per line. The Verbose output is the default. Here it is for the example data set given below: Tree distance program, version 3.6 Symmetric differences between adjacent pairs of trees: Trees 1 and 2: 4 Trees 3 and 4: 10 Trees 5 and 6: 4 Trees 7 and 8: 4 Trees 9 and 10: 4 Trees 11 and 12: 10 Option S (Sparse or terse) is similar except that all that is given on each line are the numbers of the two trees and the distance, separated by blanks. This may be a convenient format if you want to write a program to read these numbers in, and you want to spare yourself the effort of having the program wade through the words on each line in the Verbose output. The first four lines of the Sparse output are titles that your program would want to skip past. Here is the Sparse output for the example trees. 1 2 4 3 4 10 5 6 4 7 8 4 9 10 4 11 12 10 Output files for usage example File: treedist.ftreedist Tree distance program, version 3.69.650 Branch score distances between adjacent pairs of trees: Trees 1 and 2: 2.000000e-01 Trees 3 and 4: 3.162278e-01 Trees 5 and 6: 2.000000e-01 Trees 7 and 8: 2.000000e-01 Trees 9 and 10: 2.000000e-01 Trees 11 and 12: 3.162278e-01 Output files for usage example 2 File: treedist2.ftreedist Tree distance program, version 3.69.650 Symmetric differences between adjacent pairs of trees: Trees 1 and 2: 4 Trees 3 and 4: 10 Trees 5 and 6: 4 Trees 7 and 8: 4 Trees 9 and 10: 4 Trees 11 and 12: 10 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description econsense Majority-rule and strict consensus tree fconsense Majority-rule and strict consensus tree ftreedistpair Calculate distance between two sets of trees Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnacomp.txt0000664000175000017500000004235112171064331015722 00000000000000 fdnacomp Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function DNA compatibility algorithm Description Estimates phylogenies from nucleic acid sequence data using the compatibility criterion, which searches for the largest number of sites which could have all states (nucleotides) uniquely evolved on the same tree. Compatibility is particularly appropriate when sites vary greatly in their rates of evolution, but we do not know in advance which are the less reliable ones. Algorithm This program implements the compatibility method for DNA sequence data. For a four-state character without a character-state tree, as in DNA sequences, the usual clique theorems cannot be applied. The approach taken in this program is to directly evaluate each tree topology by counting how many substitutions are needed in each site, comparing this to the minimum number that might be needed (one less than the number of bases observed at that site), and then evaluating the number of sites which achieve the minimum number. This is the evaluation of the tree (the number of compatible sites), and the topology is chosen so as to maximize that number. Compatibility methods originated with Le Quesne's (1969) suggestion that one ought to look for trees supported by the largest number of perfectly fitting (compatible) characters. Fitch (1975) showed by counterexample that one could not use the pairwise compatibility methods used in CLIQUE to discover the largest clique of jointly compatible characters. The assumptions of this method are similar to those of CLIQUE. In a paper in the Biological Journal of the Linnean Society (1981b) I discuss this matter extensively. In effect, the assumptions are that: 1. Each character evolves independently. 2. Different lineages evolve independently. 3. The ancestral base at each site is unknown. 4. The rates of change in most sites over the time spans involved in the the divergence of the group are very small. 5. A few of the sites have very high rates of change. 6. We do not know in advance which are the high and which the low rate sites. That these are the assumptions of compatibility methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that arguments such as mine are invalid and that parsimony (and perhaps compatibility) methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986). There is, however, some reason to believe that the present criterion is not the proper way to correct for the presence of some sites with high rates of change in nucleotide sequence data. It can be argued that sites showing more than two nucleotide states, even if those are compatible with the other sites, are also candidates for sites with high rates of change. It might then be more proper to use DNAPARS with the Threshold option with a threshold value of 2. Change from an occupied site to a gap is counted as one change. Reversion from a gap to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a gap N bases long is N separate events. This may be an overcorrection. When we have nonoverlapping gaps, we could instead code a gap as a single event by changing all but the first "-" in the gap into "?" characters. In this way only the first base of the gap causes the program to infer a change. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of weighted compatibility differences between trees, taken across sites. If the two trees compatibilities are more than 1.96 standard deviations different then the trees are declared significantly different. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of weighted compatibilities of sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected compatibility, compatibilities for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest compatibility exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the compatibility of each tree, the differences of each from the highest one, the variance of that quantity as determined by the compatibility differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. The algorithm is a straightforward modification of DNAPARS, but with some extra machinery added to calculate, as each species is added, how many base changes are the minimum which could be required at that site. The program runs fairly quickly. Usage Here is a sample session with fdnacomp % fdnacomp -ancseq -stepbox -printdata DNA compatibility algorithm Input (aligned) nucleotide sequence set(s): dnacomp.dat Phylip tree file (optional): Phylip weights file (optional): Phylip dnacomp program output file [dnacomp.fdnacomp]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements !---------! ......... Output written to file "dnacomp.fdnacomp" Trees also written onto file "dnacomp.treefile" Go to the input files for this example Go to the output files for this example Example 2 % fdnacomp DNA compatibility algorithm Input (aligned) nucleotide sequence set(s): dnacomp.dat Phylip tree file (optional): dnacomptree.dat Phylip weights file (optional): Phylip dnacomp program output file [dnacomp.fdnacomp]: Output written to file "dnacomp.fdnacomp" Trees also written onto file "dnacomp.treefile" Go to the input files for this example Go to the output files for this example Command line arguments DNA compatibility algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) -weights properties Phylip weights file (optional) [-outfile] outfile [*.fdnacomp] Phylip dnacomp program output file Additional (Optional) qualifiers (* if not always prompted): -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdnacomp] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -stepbox boolean [N] Print steps & compatibility at sites -ancseq boolean [N] Print sequences at all nodes of tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnacomp reads any normal sequence USAs. Input files for usage example File: dnacomp.dat 5 13 Alpha AACGUGGCCAAAU Beta AAGGUCGCCAAAC Gamma CAUUUCGUCACAA Delta GGUAUUUCGGCCU Epsilon GGGAUCUCGGCCC Input files for usage example 2 File: dnacomptree.dat ((((Epsilon,Delta),Gamma),Beta),Alpha); Output file format fdnacomp output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each character. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a gap may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: dnacomp.fdnacomp DNA compatibility algorithm, version 3.69.650 5 species, 13 sites Name Sequences ---- --------- Alpha AACGTGGCCA AAT Beta AAGGTCGCCA AAC Gamma CATTTCGTCA CAA Delta GGTATTTCGG CCT Epsilon GGGATCTCGG CCC One most parsimonious tree found: +--Epsilon +--4 +--3 +--Delta ! ! +--2 +-----Gamma ! ! 1 +--------Beta ! +-----------Alpha remember: this is an unrooted tree! total number of compatible sites is 11.0 steps in each site: 0 1 2 3 4 5 6 7 8 9 *----------------------------------------- 0| 2 1 3 2 0 2 1 1 1 10| 1 1 1 3 compatibility (Y or N) of each site with this tree: 0123456789 *---------- 0 ! YYNYYYYYY 10 !YYYN From To Any Steps? State at upper node 1 AABGTSGCCA AAY 1 2 maybe AABGTCGCCA AAY 2 3 yes VAKDTCGCCA CAY 3 4 yes GGKATCTCGG CCY 4 Epsilon maybe GGGATCTCGG CCC 4 Delta yes GGTATTTCGG CCT 3 Gamma yes CATTTCGTCA CAA 2 Beta maybe AAGGTCGCCA AAC 1 Alpha maybe AACGTGGCCA AAT File: dnacomp.treefile ((((Epsilon,Delta),Gamma),Beta),Alpha); Output files for usage example 2 File: dnacomp.fdnacomp DNA compatibility algorithm, version 3.69.650 User-defined tree: +--Epsilon +--4 +--3 +--Delta ! ! +--2 +-----Gamma ! ! 1 +--------Beta ! +-----------Alpha remember: this is an unrooted tree! total number of compatible sites is 11.0 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/frestdist.txt0000664000175000017500000004253212171064331016143 00000000000000 frestdist Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Calculate distance matrix from restriction sites or fragments Description Distances calculated from restriction sites data or restriction fragments data. The restriction sites option is the one to use to also make distances for RAPDs or AFLPs. Algorithm Restdist reads the same restriction sites format as RESTML and computes a restriction sites distance. It can also compute a restriction fragments distance. The original restriction fragments and restriction sites distance methods were introduced by Nei and Li (1979). Their original method for restriction fragments is also available in this program, although its default methods are my modifications of the original Nei and Li methods. These two distances assume that the restriction sites are accidental byproducts of random change of nucleotide sequences. For my restriction sites distance the DNA sequences are assumed to be changing according to the Kimura 2-parameter model of DNA change (Kimura, 1980). The user can set the transition/transversion rate for the model. For my restriction fragments distance there is there is an implicit assumption of a Jukes-Cantor (1969) model of change, The user can also set the parameter of a correction for unequal rates of evolution between sites in the DNA sequences, using a Gamma distribution of rates among sites. The Jukes-Cantor model is also implicit in the restriction fragments distance of Nei and Li(1979). It does not allow us to correct for a Gamma distribution of rates among sites. Restriction Sites Distance The restriction sites distances use data coded for the presence of absence of individual restriction sites (usually as + and - or 0 and 1). My distance is based on the proportion, out of all sites observed in one species or the other, which are present in both species. This is done to correct for the ascertainment of sites, for the fact that we are not aware of many sites because they do not appear in any species. My distance starts by computing from the particular pair of species the fraction n++ f = --------------------- n++ + 1/2 (n+- + n-+) where n++ is the number of sites contained in both species, n+- is the number of sites contained in the first of the two species but not in the second, and n-+ is the number of sites contained in the second of the two species but not in the first. This is the fraction of sites that are present in one species which are present in both. Since the number of sites present in the two species will often differ, the denominator is the average of the number of sites found in the two species. If each restriction site is s nucleotides long, the probability that a restriction site is present in the other species, given that it is present in a species, is Qs, `where Q is the probability that a nucleotide has no net change as one goes from the one species to the other. It may have changed in between; we are interested in the probability that that nucleotide site is in the same base in both species, irrespective of what has happened in between. The distance is then computed by finding the branch length of a two-species tree (connecting these two species with a single branch) such that Q equals the s-th root of f. For this the program computes Q for various values of branch length, iterating them by a Newton-Raphson algorithm until the two quantities are equal. The resulting distance should be numerically close to the original restriction sites distance of Nei and Li (1979) when divergence is small. Theirs computes the probability of retention of a site in a way that assumes that the site is present in the common ancestor of the two species. Ours does not make this assumption. It is inspired by theirs, but differs in this detail. Their distance also assumes a Jukes-Cantor (1969) model of base change, and does not allow for transitions being more frequent than transversions. In this sense mine generalizes theres somewhat. Their distance does include, as mine does as well, a correction for Gamma distribution of rate of change among nucleotide sites. I have made their original distance available here Restriction Fragments Distance For restriction fragments data we use a different distance. If we average over all restriction fragment lengths, each at its own expected frequency, the probability that the fragment will still be in existence after a certain amount of branch length, we must take into account the probability that the two restriction sites at the ends of the fragment do not mutate, and the probability that no new restriction site occurs within the fragment in that amount of branch length. The result for a restriction site length of s is: Q2s f = -------- 2 - Qs (The details of the derivation will be given in my forthcoming book Inferring Phylogenies (to be published by Sinauer Associates in 2001). Given the observed fraction of restriction sites retained, f, we can solve a quadratic equation from the above expression for Qs. That makes it easy to obtain a value of Q, and the branch length can then be estimated by adjusting it so the probability of a base not changing is equal to that value. Alternatively, if we use the Nei and Li (1979) restriction fragments distance, this involves solving for g in the nonlinear equation g = [ f (3 - 2g) ]1/4 and then the distance is given by d = - (2/r) loge(g) where r is the length of the restriction site. Comparing these two restriction fragments distances in a case where their underlying DNA model is the same (which is when the transition/transversion ratio of the modified model is set to 0.5), you will find that they are very close to each other, differing very little at small distances, with the modified distance become smaller than the Nei/Li distance at larger distances. It will therefore matter very little which one you use. A Comment About RAPDs and AFLPs Although these distances are designed for restriction sites and restriction fragments data, they can be applied to RAPD and AFLP data as well. RAPD (Randomly Amplified Polymorphic DNA) and AFLP (Amplified Fragment Length Polymorphism) data consist of presence or absence of individual bands on a gel. The bands are segments of DNA with PCR primers at each end. These primers are defined sequences of known length (often about 10 nucleotides each). For AFLPs the reolevant length is the primer length, plus three nucleotides. Mutation in these sequences makes them no longer be primers, just as in the case of restriction sites. Thus a pair of 10-nucleotide primers will behave much the same as a 20-nucleotide restriction site, for RAPDs (26 for AFLPs). You can use the restriction sites distance as the distance between RAPD or AFLP patterns if you set the proper value for the total length of the site to the total length of the primers (plus 6 in the case of AFLPs). Of course there are many possible sources of noise in these data, including confusing fragments of similar length for each other and having primers near each other in the genome, and these are not taken into account in the statistical model used here. Usage Here is a sample session with frestdist % frestdist Calculate distance matrix from restriction sites or fragments Input file: restdist.dat Phylip restdist program output file [restdist.frestdist]: Restriction site or fragment distances, version 3.69.650 Distances calculated for species Alpha .... Beta ... Gamma .. Delta . Epsilon Distances written to file "restdist.frestdist" Done. Go to the input files for this example Go to the output files for this example Command line arguments Calculate distance matrix from restriction sites or fragments Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-data] discretestates File containing one or more sets of restriction data [-outfile] outfile [*.frestdist] Phylip restdist program output file Additional (Optional) qualifiers (* if not always prompted): -[no]restsites boolean [Y] Restriction sites (put N if you want restriction fragments) -neili boolean [N] Use original Nei/Li model (default uses modified Nei/Li model) * -gammatype boolean [N] Gamma distributed rates among sites * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) -ttratio float [2.0] Transition/transversion ratio (Number 0.001 or more) -sitelength integer [6] Site length (Integer 1 or more) -lower boolean [N] Lower triangular distance matrix -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format frestdist input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites, but there is also a third number, which is the number of different restriction enzymes that were used to detect the restriction sites. Thus a data set with 10 species and 35 different sites, representing digestion with 4 different enzymes, would have the first line of the data file look like this: 10 35 4 The site data are in standard form. Each species starts with a species name whose maximum length is given by the constant "nmlngth" (whose value in the program as distributed is 10 characters). The name should, as usual, be padded out to that length with blanks if necessary. The sites data then follows, one character per site (any blanks will be skipped and ignored). Like the DNA and protein sequence data, the restriction sites data may be either in the "interleaved" form or the "sequential" form. Note that if you are analyzing restriction sites data with the programs DOLLOP or MIX or other discrete character programs, at the moment those programs do not use the "aligned" or "interleaved" data format. Therefore you may want to avoid that format when you have restriction sites data that you will want to feed into those programs. The presence of a site is indicated by a "+" and the absence by a "-". I have also allowed the use of "1" and "0" as synonyms for "+" and "-", for compatibility with MIX and DOLLOP which do not allow "+" and "-". If the presence of the site is unknown (for example, if the DNA containing it has been deleted so that one does not know whether it would have contained the site) then the state "?" can be used to indicate that the state of this site is unknown. Input files for usage example File: restdist.dat 5 13 2 Alpha ++-+-++--+++- Beta ++++--+--+++- Gamma -+--+-++-+-++ Delta ++-+----++--- Epsilon ++++----++--- Output file format frestdist output contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. If the L option is used, the matrix or distances is in lower triangular form, so that only the distances to the other species that precede each species are printed. Otherwise the distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs. If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters. The distances printed out are scaled in terms of expected numbers of substitutions per DNA site, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0. Thus when the G option is used, the rate of change at one site may be higher than at another, but their mean is expected to be 1. Output files for usage example File: restdist.frestdist 5 Alpha 0.000000 0.022368 0.107681 0.082639 0.095581 Beta 0.022368 0.000000 0.107681 0.082639 0.056895 Gamma 0.107681 0.107681 0.000000 0.192466 0.207319 Delta 0.082639 0.082639 0.192466 0.000000 0.015945 Epsilon 0.095581 0.056895 0.207319 0.015945 0.000000 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnamove.txt0000664000175000017500000002625712171064331015741 00000000000000 fdnamove Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Interactive DNA parsimony Description Interactive construction of phylogenies from nucleic acid sequences, with their evaluation by parsimony and compatibility and the display of reconstructed ancestral bases. This can be used to find parsimony or compatibility estimates by hand. Algorithm DNAMOVE is an interactive DNA parsimony program, inspired by Wayne Maddison and David and Wayne Maddison's marvellous program MacClade, which is written for Macintosh computers. DNAMOVE reads in a data set which is prepared in almost the same format as one for the DNA parsimony program DNAPARS. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different sites and the way the nucleotide states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different sites are affected by changes in the tree topology. This program uses graphic characters that show the tree to best advantage on some computer systems. Its graphic characters will work best on MSDOS systems or MSDOS windows in Windows, and to any system whose screen or terminals emulate ANSI standard terminals such as old Digital VT100 terminals, Telnet programs, or VT100-compatible windows in the X windowing system. For any other screen types, (such as Macintosh windows) there is a generic option which does not make use of screen graphics characters. The program will work well in those cases, but the tree it displays will look a bit uglier. This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are exactly analogous to those of MIX: 1. Each site evolves independently. 2. Different lineages evolve independently. 3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny. 4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch. 5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Usage Here is a sample session with fdnamove % fdnamove Interactive DNA parsimony Input (aligned) nucleotide sequence set(s): dnamove.dat Phylip tree file (optional): NEXT (R # + - S . T U W O F H J K L C ? X Q) (? for Help): Q Do you want to write out the tree to a file? (Y or N): Y 5 species, 13 sites Computing steps needed for compatibility in sites ... (unrooted) 19.0 Steps 11 sites compatible ,-----------5:Epsilon --9 ! ,--------4:Delta `--8 ! ,-----3:Gamma `--7 ! ,--2:Beta `--6 `--1:Alpha Tree written to file "dnamove.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Interactive DNA parsimony Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall (Aligned) nucleotide sequence set(s) filename and optional format, or reference (input USA) [-intreefile] tree Phylip tree file (optional) Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file - ignore sites with weight zero -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -dothreshold toggle [N] Use threshold parsimony * -threshold float [1] Threshold value (Number 1.000 or more) -initialtree menu [Arbitary] Initial tree (Values: a (Arbitary); u (User); s (Specify)) -screenwidth integer [80] Width of terminal screen in characters (Any integer value) -screenlines integer [24] Number of lines on screen (Any integer value) -outtreefile outfile [*.fdnamove] Phylip tree output file (optional) Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnamove reads any normal sequence USAs. Input files for usage example File: dnamove.dat 5 13 Alpha AACGUGGCCA AAU Beta AAGGUCGCCA AAC Gamma CAUUUCGUCA CAA Delta GGUAUUUCGG CCU Epsilon GGGAUCUCGG CCC Output file format fdnamove outputs a graph to the specified graphics device. outputs a report format file. The default format is ... Output files for usage example File: dnamove.treefile (Epsilon,(Delta,(Gamma,(Beta,Alpha)))); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdolpenny.txt0000664000175000017500000006643012171064331016135 00000000000000 fdolpenny Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Penny algorithm Dollo or polymorphism Description Finds all most parsimonious phylogenies for discrete-character data with two states, for the Dollo or polymorphism parsimony criteria using the branch-and-bound method of exact search. May be impractical (depending on the data) for more than 10-11 species. Algorithm DOLPENNY is a program that will find all of the most parsimonious trees implied by your data when the Dollo or polymorphism parsimony criteria are employed. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion). There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW. The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times). The Algorithm The search strategy used by DOLPENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of losses. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated. Actually the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. Its number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups): Make tree of first two species (A,B) Add C in first place ((A,B),C) Add D in first place (((A,D),B),C) Add D in second place ((A,(B,D)),C) Add D in third place (((A,B),D),C) Add D in fourth place ((A,B),(C,D)) Add D in fifth place (((A,B),C),D) Add C in second place: ((A,C),B) Add D in first place (((A,D),C),B) Add D in second place ((A,(C,D)),B) Add D in third place (((A,C),D),B) Add D in fourth place ((A,C),(B,D)) Add D in fifth place (((A,C),B),D) Add C in third place (A,(B,C)) Add D in first place ((A,D),(B,C)) Add D in second place (A,((B,D),C)) Add D in third place (A,(B,(C,D))) Add D in fourth place (A,((B,C),D)) Add D in fifth place ((A,(B,C)),D) Among these fifteen trees you will find all of the four-species rooted bifurcating trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of losses (or retentions of polymorphism) is evaluated. The point of this is that if a previously-found tree such as ((A,B),(C,D)) required fewer losses, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther. The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time. The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible. The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer losses than these, it clears out the list and restarts the list with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems. Controlling Run Times Among the quantities available to be set at the beginning of a run of DOLPENNY, two (howoften and howmany) are of particular importance. As DOLPENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 300, the default settings. Every time 100 trees have been examined, DOLPENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far. When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften X howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to pay for (or wait for). In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften and nothing will be printed out except some headings and then the final trees. The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate. A constant that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if "maxtrees" is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If "maxtrees" is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that "maxtrees" be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, "maxtrees" is set to 100 in the distribution copy. Methods and Options The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in DOLLOP, and thus the remainder of this document will be nearly identical to the DOLLOP document. The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary. The assumptions of this method are in effect: 1. We know which state is the ancestral one (state 0). 2. The characters are evolving independently. 3. Different lineages evolve independently. 4. The probability of a forward change (0-->1) is small over the evolutionary times involved. 5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change. 6. Retention of polymorphism for both states (0 and 1) is highly improbable. 7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment. That these are the assumptions is established in several of my papers (1973a, 1978b, 1979, 1981b, 1983). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred. The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained. The assumptions of the polymorphism parsimony method are in effect: 1. The ancestral state (state 0) is known in each character. 2. The characters are evolving independently of each other. 3. Different lineages are evolving independently. 4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group. 5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change. 6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism. 7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Usage Here is a sample session with fdolpenny % fdolpenny Penny algorithm Dollo or polymorphism Phylip character discrete states file: dolpenny.dat Phylip dolpenny program output file [dolpenny.fdolpenny]: How many trees looked Approximate at so far Length of How many percentage (multiples shortest tree trees this long searched of 100): found so far found so far so far ---------- ------------ ------------ ------------ 1 3.00000 1 0.95 Output written to file "dolpenny.fdolpenny" Trees also written onto file "dolpenny.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Penny algorithm Dollo or polymorphism Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing one or more data sets [-outfile] outfile [*.fdolpenny] Phylip dolpenny program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -ancfile properties Ancestral states file -dothreshold toggle [N] Use threshold parsimony * -threshold float [1] Threshold value (Number 0.000 or more) -howmany integer [1000] How many groups of trees (Any integer value) -howoften integer [100] How often to report, in trees (Any integer value) -[no]simple boolean [Y] Branch and bound is simple -method menu [d] Parsimony method (Values: d (Dollo); p (Polymorphism)) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdolpenny] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -ancseq boolean [N] Print states at all nodes of tree -stepbox boolean [N] Print out steps in each character Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdolpenny reads discrete character data with "?", "P", "B" states allowed. . (0,1) Discrete character data These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both". There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form: 1 ---> 0 ---> 2 | | V 3 so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters: Old State New States --- ----- --- ------ 0 001 1 000 2 011 3 101 The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops. However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979). If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR. We now also have the program PARS, which can do parsimony for unordered character states. Input files for usage example File: dolpenny.dat 7 6 Alpha1 110110 Alpha2 110110 Beta1 110000 Beta2 110000 Gamma1 100110 Delta 001001 Epsilon 001110 Output file format fdolpenny output format is standard. It includes a rooted tree and, if the user selects option 4, a table of the numbers of reversions or retentions of polymorphism necessary in each character. If any of the ancestral states has been specified to be unknown, a table of reconstructed ancestral states is also provided. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the tabulated counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1. A table is available to be printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the best tree. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use DOLMOVE to display the tree and examine its interior states, as the algorithm in DOLMOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in DOLPENNY gives up more easily on displaying these states. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: dolpenny.fdolpenny Penny algorithm for Dollo or polymorphism parsimony, version 3.69.650 branch-and-bound to find all most parsimonious trees requires a total of 3.000 3 trees in all found +-----------------Delta ! --2 +--------------Epsilon ! ! +--3 +-----------Gamma1 ! ! +--6 +--------Alpha2 ! ! +--1 +--Beta2 ! +--5 +--4 +--Beta1 ! +-----Alpha1 +-----------------Delta ! --2 +--------------Epsilon ! ! +--3 +-----------Gamma1 ! ! +--6 +--Beta2 ! +-----5 ! ! +--Beta1 +--4 ! +--Alpha2 +-----1 +--Alpha1 +-----------------Delta ! --2 +--------------Epsilon ! ! +--3 +-----------Gamma1 ! ! ! ! +--Beta2 +--6 +--5 ! +--4 +--Beta1 ! ! ! +--1 +-----Alpha2 ! +--------Alpha1 File: dolpenny.treefile (Delta,(Epsilon,(Gamma1,(Alpha2,((Beta2,Beta1),Alpha1)))))[0.3333]; (Delta,(Epsilon,(Gamma1,((Beta2,Beta1),(Alpha2,Alpha1)))))[0.3333]; (Delta,(Epsilon,(Gamma1,(((Beta2,Beta1),Alpha2),Alpha1))))[0.3333]; Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdollop Dollo and polymorphism parsimony algorithm ffactor Multistate to binary recoding program fmix Mixed parsimony algorithm fmove Interactive mixed method parsimony fpars Discrete character parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fgendist.txt0000664000175000017500000004477312171064331015750 00000000000000 fgendist Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Compute genetic distances from gene frequencies Description Computes one of three different genetic distance formulas from gene frequency data. The formulas are Nei's genetic distance, the Cavalli-Sforza chord measure, and the genetic distance of Reynolds et. al. The former is appropriate for data in which new mutations occur in an infinite isoalleles neutral mutation model, the latter two for a model without mutation and with pure genetic drift. The distances are written to a file in a format appropriate for input to the distance matrix programs. Algorithm This program computes any one of three measures of genetic distance from a set of gene frequencies in different populations (or species). The three are Nei's genetic distance (Nei, 1972), Cavalli-Sforza's chord measure (Cavalli- Sforza and Edwards, 1967) and Reynolds, Weir, and Cockerham's (1983) genetic distance. These are written to an output file in a format that can be read by the distance matrix phylogeny programs FITCH and KITSCH. The three measures have somewhat different assumptions. All assume that all differences between populations arise from genetic drift. Nei's distance is formulated for an infinite isoalleles model of mutation, in which there is a rate of neutral mutation and each mutant is to a completely new alleles. It is assumed that all loci have the same rate of neutral mutation, and that the genetic variability initially in the population is at equilibrium between mutation and genetic drift, with the effective population size of each population remaining constant. Nei's distance is: \ \ /_ /_ p1mi p2mi m i D = - ln ( ------------------------------------- ). \ \ \ \ [ /_ /_ p1mi2]1/2 [ /_ /_ p2mi2]1/2 m i m i where m is summed over loci, i over alleles at the m-th locus, and where p1mi is the frequency of the i-th allele at the m-th locus in population 1. Subject to the above assumptions, Nei's genetic distance is expected, for a sample of sufficiently many equivalent loci, to rise linearly with time. The other two genetic distances assume that there is no mutation, and that all gene frequency changes are by genetic drift alone. However they do not assume that population sizes have remained constant and equal in all populations. They cope with changing population size by having expectations that rise linearly not with time, but with the sum over time of 1/N, where N is the effective population size. Thus if population size doubles, genetic drift will be taking place more slowly, and the genetic distance will be expected to be rising only half as fast with respect to time. Both genetic distances are different estimators of the same quantity under the same model. Cavalli-Sforza's chord distance is given by \ \ \ D2 = 4 /_ [ 1 - /_ p1mi1/2 p 2mi1/2] / /_ (am - 1) m i m where m indexes the loci, where i is summed over the alleles at the m-th locus, and where a is the number of alleles at the m-th locus. It can be shown that this distance always satisfies the triangle inequality. Note that as given here it is divided by the number of degrees of freedom, the sum of the numbers of alleles minus one. The quantity which is expected to rise linearly with amount of genetic drift (sum of 1/N over time) is D squared, the quantity computed above, and that is what is written out into the distance matrix. Reynolds, Weir, and Cockerham's (1983) genetic distance is \ \ /_ /_ [ p1mi - p2mi]2 m i D2 = -------------------------------------- \ \ 2 /_ [ 1 - /_ p1mi p2mi ] m i where the notation is as before and D2 is the quantity that is expected to rise linearly with cumulated genetic drift. Having computed one of these genetic distances, one which you feel is appropriate to the biology of the situation, you can use it as the input to the programs FITCH, KITSCH or NEIGHBOR. Keep in mind that the statistical model in those programs implicitly assumes that the distances in the input table have independent errors. For any measure of genetic distance this will not be true, as bursts of random genetic drift, or sampling events in drawing the sample of individuals from each population, cause fluctuations of gene frequency that affect many distances simultaneously. While this is not expected to bias the estimate of the phylogeny, it does mean that the weighing of evidence from all the different distances in the table will not be done with maximal efficiency. One issue is which value of the P (Power) parameter should be used. This depends on how the variance of a distance rises with its expectation. For Cavalli-Sforza's chord distance, and for the Reynolds et. al. distance it can be shown that the variance of the distance will be proportional to the square of its expectation; this suggests a value of 2 for P, which the default value for FITCH and KITSCH (there is no P option in NEIGHBOR). If you think that the pure genetic drift model is appropriate, and are thus tempted to use the Cavalli-Sforza or Reynolds et. al. distances, you might consider using the maximum likelihood program CONTML instead. It will correctly weigh the evidence in that case. Like those genetic distances, it uses approximations that break down as loci start to drift all the way to fixation. Although Nei's distance will not break down in that case, it makes other assumptions about equality of substitution rates at all loci and constancy of population sizes. qThe most important thing to remember is that genetic distance is not an abstract, idealized measure of "differentness". It is an estimate of a parameter (time or cumulated inverse effective population size) of the model which is thought to have generated the differences we see. As an estimate, it has statistical properties that can be assessed, and we should never have to choose between genetic distances based on their aesthetic properties, or on the personal prestige of their originators. Considering them as estimates focuses us on the questions which genetic distances are intended to answer, for if there are none there is no reason to compute them. For further perspective on genetic distances, I recommend my own paper evaluating Reynolds, Weir, and Cockerham (1983), and the material in Nei's book (Nei, 1987). Usage Here is a sample session with fgendist % fgendist Compute genetic distances from gene frequencies Phylip gendist program input file: gendist.dat Phylip gendist program output file [gendist.fgendist]: Distances calculated for species European . African .. Chinese ... American .... Australian ..... Distances written to file "gendist.fgendist" Done. Go to the input files for this example Go to the output files for this example Command line arguments Compute genetic distances from gene frequencies Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] frequencies File containing one or more sets of data [-outfile] outfile [*.fgendist] Phylip gendist program output file Additional (Optional) qualifiers: -method menu [n] Which method to use (Values: n (Nei genetic distance); c (Cavalli-Sforza chord measure); r (Reynolds genetic distance)) -[no]progress boolean [Y] Print indications of progress of run -lower boolean [N] Lower triangular distance matrix Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fgendist reads continuous character data Continuous character data The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny. When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions: 1. Different lineages evolve independently. 2. After two lineages split, their characters change independently. 3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method). 4. Different loci or characters drift independently. How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c). The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option: 5 3 2 3 2 Alpha 0.90 0.80 0.10 0.56 Beta 0.72 0.54 0.30 0.20 Gamma 0.38 0.10 0.05 0.98 Delta 0.42 0.40 0.43 0.97 Epsilon 0.10 0.30 0.70 0.62 whereas here is what it would have to look like if the A option were invoked: 5 3 2 3 2 Alpha 0.90 0.10 0.80 0.10 0.10 0.56 0.44 Beta 0.72 0.28 0.54 0.30 0.16 0.20 0.80 Gamma 0.38 0.62 0.10 0.05 0.85 0.98 0.02 Delta 0.42 0.58 0.40 0.43 0.17 0.97 0.03 Epsilon 0.10 0.90 0.30 0.70 0.00 0.62 0.38 The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name). If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message. While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points. CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables. CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set. In the quantitative characters mode, a typical small data set would be: 5 6 Alpha 0.345 0.467 1.213 2.2 -1.2 1.0 Beta 0.457 0.444 1.1 1.987 -0.2 2.678 Gamma 0.6 0.12 0.97 2.3 -0.11 1.54 Delta 0.68 0.203 0.888 2.0 1.67 Epsilon 0.297 0.22 0.90 1.9 1.74 Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale. For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs. Input files for usage example File: gendist.dat 5 10 2 2 2 2 2 2 2 2 2 2 European 0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205 0.8055 0.5043 African 0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600 0.7582 0.6207 Chinese 0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726 0.7482 0.7334 American 0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000 0.8086 0.8636 Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396 0.9097 0.2976 Output file format fgendist output simply contains on its first line the number of species (or populations). Each species (or population) starts a new line, with its name printed out first, and then and there are up to nine genetic distances printed on each line, in the standard format used as input by the distance matrix programs. The output, in its default form, is ready to be used in the distance matrix programs. Output files for usage example File: gendist.fgendist 5 European 0.000000 0.078002 0.080749 0.066805 0.103014 African 0.078002 0.000000 0.234698 0.104975 0.227281 Chinese 0.080749 0.234698 0.000000 0.053879 0.063275 American 0.066805 0.104975 0.053879 0.000000 0.134756 Australian 0.103014 0.227281 0.063275 0.134756 0.000000 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description egendist Genetic distance matrix program fcontml Gene frequency and continuous character maximum likelihood Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnamlk.txt0000664000175000017500000007552512171064331015560 00000000000000 fdnamlk Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Estimates nucleotide phylogeny by maximum likelihood Description Same as DNAML but assumes a molecular clock. The use of the two programs together permits a likelihood ratio test of the molecular clock hypothesis to be made. Estimates phylogenies from nucleotide sequences by maximum likelihood. The model employed allows for unequal expected frequencies of the four nucleotides, for unequal rates of transitions and transversions, and for different (prespecified) rates of change in different categories of sites, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites. Algorithm This program implements the maximum likelihood method for DNA sequences under the constraint that the trees estimated must be consistent with a molecular clock. The molecular clock is the assumption that the tips of the tree are all equidistant, in branch length, from its root. This program is indirectly related to DNAML. Details of the algorithm are not yet published, but many aspects of it are similar to DNAML, and these are published in the paper by Felsenstein and Churchill (1996). The model of base substitution allows the expected frequencies of the four bases to be unequal, allows the expected frequencies of transitions and transversions to be unequal, and has several ways of allowing different rates of evolution at different sites. The assumptions of the model are: 1. Each site in the sequence evolves independently. 2. Different lineages evolve independently. 3. There is a molecular clock. 4. Each site undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify. 5. All relevant sites are included in the sequence, not just those that have changed or those that are "phylogenetically informative". 6. A substitution consists of one of two sorts of events: 1. The first kind of event consists of the replacement of the existing base by a base drawn from a pool of purines or a pool of pyrimidines (depending on whether the base being replaced was a purine or a pyrimidine). It can lead either to no change or to a transition. 2. The second kind of event consists of the replacement of the existing base by a base drawn at random from a pool of bases at known frequencies, independently of the identity of the base which is being replaced. This could lead either to a no change, to a transition or to a transversion. The ratio of the two purines in the purine replacement pool is the same as their ratio in the overall pool, and similarly for the pyrimidines. The ratios of transitions to transversions can be set by the user. The substitution process can be diagrammed as follows: Suppose that you specified A, C, G, and T base frequencies of 0.24, 0.28, 0.27, and 0.21. o First kind of event: Determine whether the existing base is a purine or a pyrimidine. Draw from the proper pool: Purine pool: Pyrimidine pool: | | | | | 0.4706 A | | 0.5714 C | | 0.5294 G | | 0.4286 T | | (ratio is | | (ratio is | | 0.24 : 0.27) | | 0.28 : 0.21) | |_______________| |_______________| o Second kind of event: Draw from the overall pool: | | | 0.24 A | | 0.28 C | | 0.27 G | | 0.21 T | |__________________| Note that if the existing base is, say, an A, the first kind of event has a 0.4706 probability of "replacing" it by another A. The second kind of event has a 0.24 chance of replacing it by another A. This rather disconcerting model is used because it has nice mathematical properties that make likelihood calculations far easier. A closely similar, but not precisely identical model having different rates of transitions and transversions has been used by Hasegawa et. al. (1985b). The transition probability formulas for the current model were given (with my permission) by Kishino and Hasegawa (1989). Another explanation is available in the paper by Felsenstein and Churchill (1996). Note the assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those sites that had changed. This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different sites. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of sites all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant sites. The program computes the the likelihood by summing it over all possible assignments of rates to sites, weighting each by its prior probability of occurrence. For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a site having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive sites with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all sites to rate 2.4, or that fail to have consecutive sites that have the same rate. The Hidden Markov Model framework for rate variation among sites was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant sites. This feature effectively removes the artificial assumption that all sites have the same rate, and also means that we need not know in advance the identities of the sites that have a particular rate of evolution. Another layer of rate variation also is available. The user can assign categories of rates to each site (for example, we might want first, second, and third codon positions in a protein coding sequence to be three different categories. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of sites in the different categories. For example, we might specify that first, second, and third positions evolve at relative rates of 1.0, 0.8, and 2.7. If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a site is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation. Usage Here is a sample session with fdnamlk % fdnamlk -printdata -ncategories 2 -categories "1111112222222" -rate "1.0 2.0" -gammatype h -nhmmcategories 5 -hmmrates "0.264 1.413 3.596 7.086 12.641" -hmmpr obabilities "0.522 0.399 0.076 0.0036 0.000023" -lambda 1.5 -weight "01111111111 10" Estimates nucleotide phylogeny by maximum likelihood Input (aligned) nucleotide sequence set(s): dnaml.dat Phylip tree file (optional): Phylip dnamlk program output file [dnaml.fdnamlk]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Output written to file "dnaml.fdnamlk" Tree also written onto file "dnaml.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Estimates nucleotide phylogeny by maximum likelihood Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fdnamlk] Phylip dnamlk program output file Additional (Optional) qualifiers (* if not always prompted): -ncategories integer [1] Number of substitution rate categories (Integer from 1 to 9) * -rate array Rate for each category * -categories properties File of substitution rate categories -weights properties Weights file -ttratio float [2.0] Transition/transversion ratio (Number 0.001 or more) -[no]freqsfrom toggle [Y] Use empirical base frequencies from seqeunce input * -basefreq array [0.25 0.25 0.25 0.25] Base frequencies for A C G T/U (use blanks to separate) -gammatype menu [Constant rate] Rate variation among sites (Values: g (Gamma distributed rates); i (Gamma+invariant sites); h (User defined HMM of rates); n (Constant rate)) * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ngammacat integer [1] Number of categories (1-9) (Integer from 1 to 9) * -invarcoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ninvarcat integer [1] Number of categories (1-9) including one for invariant sites (Integer from 1 to 9) * -invarfrac float [0.0] Fraction of invariant sites (Number from 0.000 to 1.000) * -nhmmcategories integer [1] Number of HMM rate categories (Integer from 1 to 9) * -hmmrates array [1.0] HMM category rates * -hmmprobabilities array [1.0] Probability for each HMM category * -adjsite boolean [N] Rates at adjacent sites correlated * -lambda float [1.0] Mean block length of sites having the same rate (Number 1.000 or more) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) * -global boolean [N] Global rearrangements * -lengths boolean [N] Use branch lengths from user trees -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdnamlk] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -hypstate boolean [N] Reconstruct hypothetical sequence Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnamlk reads any normal sequence USAs. Input files for usage example File: dnaml.dat 5 13 Alpha AACGTGGCCAAAT Beta AAGGTCGCCAAAC Gamma CATTTCGTCACAA Delta GGTATTTCGGCCT Epsilon GGGATCTCGGCCC Output file format --> fdnamlk output starts by giving the number of species, the number of sites, and the base frequencies for A, C, G, and T that have been specified. It then prints out the transition/transversion ratio that was specified or used by default. It also uses the base frequencies to compute the actual transition/transversion ratio implied by the parameter. If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of sites is printed, as well as the probabilities of each of those rates. There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The trees found are printed as a rooted tree topology. The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. A table is printed showing the length of each tree segment, and the time (in units of expected nucleotide substitutions per site) of each fork in the tree, measured from the root of the tree. I have not attempted in include code for approximate confidence limits on branch points, as I have done for branch lengths in DNAML, both because of the extreme crudeness of that test, and because the variation of times for different forks would be highly correlated. The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the expected transition/transversion ratio to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive. This program makes possible a (reasonably) legitimate statistical test of the molecular clock. To do such a test, run DNAML and DNAMLK on the same data. If the trees obtained are of the same topology (when considered as unrooted), it is legitimate to compare their likelihoods by the likelihood ratio test. In DNAML the likelihood has been computed by estimating 2n-3 branch lengths, if their are n tips on the tree. In DNAMLK it has been computed by estimating n-1 branching times (in effect, n-1 branch lengths). The difference in the number of parameters is (2n-3)-(n-1) = n-2. To perform the test take the difference in log likelihoods between the two runs (DNAML should be the higher of the two, barring numerical iteration difficulties) and double it. Look this up on a chi-square distribution with n-2 degrees of freedom. If the result is significant, the log likelihood has been significantly increased by allowing all 2n-3 branch lengths to be estimated instead of just n-1, and molecular clock may be rejected. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring sites (option A) and is not done in those cases. The branch lengths printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length. Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14. At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what site categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each site which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead. Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file. Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). In that table, if a site has a base which accounts for more than 95% of the likelihood, it is printed in capital letters (A rather than a). If the best nucleotide accounts for less than 50% of the likelihood, the program prints out an ambiguity code (such as M for "A or C") for the set of nucleotides which, taken together, account for more half of the likelihood. The ambiguity codes are listed in the sequence programs documentation file. One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed nucleotides are based on only the single assignment of rates to sites which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates. Output files for usage example File: dnaml.fdnamlk Nucleic acid sequence Maximum Likelihood method, version 3.69.650 5 species, 13 sites Site categories are: 1111112222 222 Sites are weighted as follows: 01111 11111 110 Name Sequences ---- --------- Alpha AACGTGGCCA AAT Beta AAGGTCGCCA AAC Gamma CATTTCGTCA CAA Delta GGTATTTCGG CCT Epsilon GGGATCTCGG CCC Empirical Base Frequencies: A 0.23636 C 0.29091 G 0.25455 T(U) 0.21818 Transition/transversion ratio = 2.000000 State in HMM Rate of change Probability 1 0.264 0.522 2 1.413 0.399 3 3.596 0.076 4 7.086 0.0036 5 12.641 0.000023 Site category Rate of change 1 1.000 2 2.000 +-Epsilon +---------------------------------------------------------4 ! +-Delta --3 ! +-------Gamma +---------------------------------------------------2 ! +-Beta +-----1 +-Alpha Ln Likelihood = -57.98242 Ancestor Node Node Height Length -------- ---- ---- ------ ------ root 3 3 4 4.01604 4.01604 4 Epsilon 4.15060 0.13456 4 Delta 4.15060 0.13456 3 2 3.59089 3.59089 2 Gamma 4.15060 0.55971 2 1 3.99329 0.40240 1 Beta 4.15060 0.15731 1 Alpha 4.15060 0.15731 Combination of categories that contributes the most to the likelihood: 1132121111 211 Most probable category at each site if > 0.95 probability ("." otherwise) .......... ... File: dnaml.treefile ((Epsilon:0.13456,Delta:0.13456):4.01604,(Gamma:0.55971, (Beta:0.15731,Alpha:0.15731):0.40240):3.59089); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnapenny.txt0000664000175000017500000006207612171064331016123 00000000000000 fdnapenny Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Penny algorithm for DNA Description Finds all most parsimonious phylogenies for nucleic acid sequences by branch-and-bound search. This may not be practical (depending on the data) for more than 10-11 species or so. Algorithm DNAPENNY is a program that will find all of the most parsimonious trees implied by your data when the nucleic acid sequence parsimony criterion is employed. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion). There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW. The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times). The Algorithm The search strategy used by DNAPENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of base substitutions. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated. This is because the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. For each tree the number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups): Make tree of first two species: (A,B) Add C in first place: ((A,B),C) Add D in first place: (((A,D),B),C) Add D in second place: ((A,(B,D)),C) Add D in third place: (((A,B),D),C) Add D in fourth place: ((A,B),(C,D)) Add D in fifth place: (((A,B),C),D) Add C in second place: ((A,C),B) Add D in first place: (((A,D),C),B) Add D in second place: ((A,(C,D)),B) Add D in third place: (((A,C),D),B) Add D in fourth place: ((A,C),(B,D)) Add D in fifth place: (((A,C),B),D) Add C in third place: (A,(B,C)) Add D in first place: ((A,D),(B,C)) Add D in second place: (A,((B,D),C)) Add D in third place: (A,(B,(C,D))) Add D in fourth place: (A,((B,C),D)) Add D in fifth place: ((A,(B,C)),D) Among these fifteen trees you will find all of the four-species rooted trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of steps is evaluated. In addition a prediction is made as to how many steps will be added, at a minimum, as further species are added. This is done by counting how many sites which are invariant in the data up the most recent species added will ultimately show variation when further species are added. Thus if 20 sites vary among species A, B, and C and their root, and if tree ((A,C),B) requires 24 steps, then if there are 8 more sites which will be seen to vary when species D is added, we can immediately say that no matter how we add D, the resulting tree can have no less than 24 + 8 = 32 steps. The point of all this is that if a previously-found tree such as ((A,B),(C,D)) required only 30 steps, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther. The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time. The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible. This process of evaluating which species to add in which order goes on the first time the search makes a tree; thereafter it uses that order. The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer losses than these, it clears out the list and restarts it with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems. Controlling Run Times Among the quantities available to be set from the menu of DNAPENNY, two (howoften and howmany) are of particular importance. As DNAPENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 1000, the default settings. Every time 100 trees have been examined, DNAPENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far. When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften times howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to pay for (or wait for). In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften, and nothing will be printed out except some headings and then the final trees. The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate. A constant at the beginning of the program that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if maxtrees is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If maxtrees is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that maxtrees be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, maxtrees is set to 100 in the distribution copy. Method and Options The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in DNAPARS, and thus the remainder of this document will be nearly identical to the DNAPARS document. This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are exactly analogous to those of DNAPARS: 1. Each site evolves independently. 2. Different lineages evolve independently. 3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny. 4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch. 5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another. Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a deletion N bases long is N separate events. Usage Here is a sample session with fdnapenny % fdnapenny Penny algorithm for DNA Input (aligned) nucleotide sequence set(s): dnapenny.dat Phylip dnapenny program output file [dnapenny.fdnapenny]: justweights: false numwts: 0 How many trees looked Approximate at so far Length of How many percentage (multiples shortest tree trees this short searched of 100): found so far found so far so far ---------- ------------ ------------ ------------ 1 9.0 2 0.11 2 8.0 3 6.67 3 8.0 9 20.00 4 8.0 9 86.67 Output written to file "dnapenny.fdnapenny" Trees also written onto file "dnapenny.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Penny algorithm for DNA Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-outfile] outfile [*.fdnapenny] Phylip dnapenny program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties (no help text) properties value -howoften integer [100] How often to report, in trees (Any integer value) -howmany integer [1000] How many groups of trees (Any integer value) -[no]simple boolean [Y] Branch and bound is simple -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -dothreshold toggle [N] Use threshold parsimony * -threshold float [1.0] Threshold value (Number 1.000 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdnapenny] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -stepbox boolean [N] Print out steps in each site -ancseq boolean [N] Print sequences at all nodes of tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory2 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnapenny reads any normal sequence USAs Input files for usage example File: dnapenny.dat 8 6 Alpha1 AAGAAG Alpha2 AAGAAG Beta1 AAGGGG Beta2 AAGGGG Gamma1 AGGAAG Gamma2 AGGAAG Delta GGAGGA Epsilon GGAAAG Output file format fdnapenny output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each character. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a deletion may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: dnapenny.fdnapenny Penny algorithm for DNA, version 3.69.650 branch-and-bound to find all most parsimonious trees requires a total of 8.000 9 trees in all found +--------------------Alpha1 ! ! +-----------Alpha2 ! ! 1 +-----4 +--Epsilon ! ! ! +-----6 ! ! ! ! +--Delta ! ! +--5 +--2 ! +--Gamma2 ! +-----7 ! +--Gamma1 ! ! +--Beta2 +--------------3 +--Beta1 remember: this is an unrooted tree! +--------------------Alpha1 ! ! +-----------Alpha2 ! ! 1 +-----4 +--------Gamma2 ! ! ! ! ! ! +--7 +--Epsilon ! ! ! +--6 +--2 +--5 +--Delta ! ! ! +-----Gamma1 ! ! +--Beta2 +--------------3 +--Beta1 [Part of this file has been deleted for brevity] +--5 +--Delta ! +-----Gamma1 remember: this is an unrooted tree! +--------------------Alpha1 ! ! +-----Alpha2 1 +-----------2 ! ! ! +--Beta2 ! ! +--3 ! ! +--Beta1 +--4 ! +-----Gamma2 ! +--7 ! ! ! +--Epsilon +--------5 +--6 ! +--Delta ! +--------Gamma1 remember: this is an unrooted tree! +--------------------Alpha1 ! ! +-----Alpha2 1 +-----------2 ! ! ! +--Beta2 ! ! +--3 ! ! +--Beta1 +--4 ! +--Epsilon ! +-----6 ! ! +--Delta +--------5 ! +--Gamma2 +-----7 +--Gamma1 remember: this is an unrooted tree! File: dnapenny.treefile (Alpha1,((Alpha2,((Epsilon,Delta),(Gamma2,Gamma1))),(Beta2,Beta1)))[0.1111]; (Alpha1,((Alpha2,(Gamma2,((Epsilon,Delta),Gamma1))),(Beta2,Beta1)))[0.1111]; (Alpha1,((Alpha2,((Gamma2,(Epsilon,Delta)),Gamma1)),(Beta2,Beta1)))[0.1111]; (Alpha1,(Alpha2,((Gamma2,((Epsilon,Delta),Gamma1)),(Beta2,Beta1))))[0.1111]; (Alpha1,(Alpha2,(((Epsilon,Delta),(Gamma2,Gamma1)),(Beta2,Beta1))))[0.1111]; (Alpha1,(Alpha2,(((Gamma2,(Epsilon,Delta)),Gamma1),(Beta2,Beta1))))[0.1111]; (Alpha1,((Alpha2,(Beta2,Beta1)),(Gamma2,((Epsilon,Delta),Gamma1))))[0.1111]; (Alpha1,((Alpha2,(Beta2,Beta1)),((Gamma2,(Epsilon,Delta)),Gamma1)))[0.1111]; (Alpha1,((Alpha2,(Beta2,Beta1)),((Epsilon,Delta),(Gamma2,Gamma1))))[0.1111]; Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fprotdist.txt0000664000175000017500000005631612171064331016157 00000000000000 fprotdist Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Protein distance algorithm Description Computes a distance measure for protein sequences, using maximum likelihood estimates based on the Dayhoff PAM matrix, the JTT matrix model, the PBM model, Kimura's 1983 approximation to these, or a model based on the genetic code plus a constraint on changing to a different category of amino acid. The distances can also be corrected for gamma-distributed and gamma-plus-invariant-sites-distributed rates of change in different sites. Rates of evolution can vary among sites in a prespecified way, and also according to a Hidden Markov model. The program can also make a table of percentage similarity among sequences. The distances can be used in the distance matrix programs. Algorithm This program uses protein sequences to compute a distance matrix, under four different models of amino acid replacement. It can also compute a table of similarity between the amino acid sequences. The distance for each pair of species estimates the total branch length between the two species, and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR. This is an alternative to use of the sequence data itself in the parsimony program PROTPARS. The program reads in protein sequences and writes an output file containing the distance matrix or similarity table. The five models of amino acid substitution are one which is based on the Jones, Taylor and Thornton (1992) model of amino acid change, the PMB model (Veerassamy, Smith and Tillier, 2004) which is derived from the Blocks database of conserved protein motifs, one based on the PAM matrixes of Margaret Dayhoff, one due to Kimura (1983) which approximates it based simply on the fraction of similar amino acids, and one based on a model in which the amino acids are divided up into groups, with change occurring based on the genetic code but with greater difficulty of changing between groups. The program correctly takes into account a variety of sequence ambiguities. The five methods are: (1) The Dayhoff PAM matrix. This uses Dayhoff's PAM 001 matrix from Dayhoff (1979), page 348. The PAM model is an empirical one that scales probabilities of change from one amino acid to another in terms of a unit which is an expected 1% change between two amino acid sequences. The PAM 001 matrix is used to make a transition probability matrix which allows prediction of the probability of changing from any one amino acid to any other, and also predicts equilibrium amino acid composition. The program assumes that these probabilities are correct and bases its computations of distance on them. The distance that is computed is scaled in units of expected fraction of amino acids changed. This is a unit such that 1.0 is 100 PAM's. (2) The Jones-Taylor-Thornton model. This is similar to the Dayhoff PAM model, except that it is based on a recounting of the number of observed changes in amino acids by Jones, Taylor, and Thornton (1992). They used a much larger sample of protein sequences than did Dayhoff. The distance is scaled in units of the expected fraction of amino acids changed (100 PAM's). Because its sample is so much larger this model is to be preferred over the original Dayhoff PAM model. It is the default model in this program. (3) The PMB (Probability Matrix from Blocks) model. This is derived using the Blocks database of conserved protein motifs. It will be described in a paper by Veerassamy, Smith and Tillier (2004). Elisabeth Tillier kindly made the matrices available for this model. (4) Kimura's distance. This is a rough-and-ready distance formula for approximating PAM distance by simply measuring the fraction of amino acids, p, that differs between two sequences and computing the distance as (Kimura, 1983) D = - loge ( 1 - p - 0.2 p2 ). This is very quick to do but has some obvious limitations. It does not take into account which amino acids differ or to what amino acids they change, so some information is lost. The units of the distance measure are fraction of amino acids differing, as also in the case of the PAM distance. If the fraction of amino acids differing gets larger than 0.8541 the distance becomes infinite. (5) The Categories distance. This is my own concoction. I imagined a nucleotide sequence changing according to Kimura's 2-parameter model, with the exception that some changes of amino acids are less likely than others. The amino acids are grouped into a series of categories. Any base change that does not change which category the amino acid is in is allowed, but if an amino acid changes category this is allowed only a certain fraction of the time. The fraction is called the "ease" and there is a parameter for it, which is 1.0 when all changes are allowed and near 0.0 when changes between categories are nearly impossible. In this option I have allowed the user to select the Transition/Transversion ratio, which of several genetic codes to use, and which categorization of amino acids to use. There are three of them, a somewhat random sample: 1. The George-Hunt-Barker (1988) classification of amino acids, 2. A classification provided by my colleague Ben Hall when I asked him for one, 3. One I found in an old "baby biochemistry" book (Conn and Stumpf, 1963), which contains most of the biochemistry I was ever taught, and all that I ever learned. Interestingly enough, all of them are consisten with the same linear ordering of amino acids, which they divide up in different ways. For the Categories model I have set as default the George/Hunt/Barker classification with the "ease" parameter set to 0.457 which is approximately the value implied by the empirical rates in the Dayhoff PAM matrix. The method uses, as I have noted, Kimura's (1980) 2-parameter model of DNA change. The Kimura "2-parameter" model allows for a difference between transition and transversion rates. Its transition probability matrix for a short interval of time is: To: A G C T --------------------------------- A | 1-a-2b a b b From: G | a 1-a-2b b b C | b b 1-a-2b a T | b b a 1-a-2b where a is u dt, the product of the rate of transitions per unit time and dt is the length dt of the time interval, and b is v dt, the product of half the rate of transversions (i.e., the rate of a specific transversion) and the length dt of the time interval. Each distance that is calculated is an estimate, from that particular pair of species, of the divergence time between those two species. The Kimura distance is straightforward to compute. The other two are considerably slower, and they look at all positions, and find that distance which makes the likelihood highest. This likelihood is in effect the length of the internal branch in a two-species tree that connects these two species. Its likelihood is just the product, under the model, of the probabilities of each position having the (one or) two amino acids that are actually found. This is fairly slow to compute. The computation proceeds from an eigenanalysis (spectral decomposition) of the transition probability matrix. In the case of the PAM 001 matrix the eigenvalues and eigenvectors are precomputed and are hard-coded into the program in over 400 statements. In the case of the Categories model the program computes the eigenvalues and eigenvectors itself, which will add a delay. But the delay is independent of the number of species as the calculation is done only once, at the outset. The actual algorithm for estimating the distance is in both cases a bisection algorithm which tries to find the point at which the derivative os the likelihood is zero. Some of the kinds of ambiguous amino acids like "glx" are correctly taken into account. However, gaps are treated as if they are unkown nucleotides, which means those positions get dropped from that particular comparison. However, they are not dropped from the whole analysis. You need not eliminate regions containing gaps, as long as you are reasonably sure of the alignment there. Note that there is an assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias the distances by making them too large, and that in turn would cause the distances to misinterpret the meaning of those positions that had changed. The program can now correct distances for unequal rates of change at different amino acid positions. This correction, which was introduced for DNA sequences by Jin and Nei (1990), assumes that the distribution of rates of change among amino acid positions follows a Gamma distribution. The user is asked for the value of a parameter that determines the amount of variation of rates among amino acid positions. Instead of the more widely-known coefficient alpha, PROTDIST uses the coefficient of variation (ratio of the standard deviation to the mean) of rates among amino acid positions. . So if there is 20% variation in rates, the CV is is 0.20. The square of the C.V. is also the reciprocal of the better-known "shape parameter", alpha, of the Gamma distribution, so in this case the shape parameter alpha = 1/(0.20*0.20) = 25. If you want to achieve a particular value of alpha, such as 10, you will want to use a CV of 1/sqrt(100) = 1/10 = 0.1. In addition to the five distance calculations, the program can also compute a table of similarities between amino acid sequences. These values are the fractions of amino acid positions identical between the sequences. The diagonal values are 1.0000. No attempt is made to count similarity of nonidentical amino acids, so that no credit is given for having (for example) different hydrophobic amino acids at the corresponding positions in the two sequences. This option has been requested by many users, who need it for descriptive purposes. It is not intended that the table be used for inferring the tree. Usage Here is a sample session with fprotdist % fprotdist Protein distance algorithm Input (aligned) protein sequence set(s): protdist.dat Phylip distance matrix output file [protdist.fprotdist]: Computing distances: Alpha Beta . Gamma .. Delta ... Epsilon .... Output written to file "protdist.fprotdist" Done. Go to the input files for this example Go to the output files for this example Command line arguments Protein distance algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-outfile] outfile [*.fprotdist] Phylip distance matrix output file Additional (Optional) qualifiers (* if not always prompted): -ncategories integer [1] Number of substitution rate categories (Integer from 1 to 9) * -rate array Rate for each category * -categories properties File of substitution rate categories -weights properties Weights file -method menu [j] Choose the method to use (Values: j (Jones-Taylor-Thornton matrix); h (Henikoff/Tiller PMB matrix); d (Dayhoff PAM matrix); k (Kimura formula); s (Similarity table); c (Categories model)) * -gammatype menu [c] Rate variation among sites (Values: g (Gamma distributed rates); i (Gamma+invariant sites); c (Constant rate)) * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -invarcoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -aacateg menu [G] Choose the category to use (Values: G (George/Hunt/Barker (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr Pro)); C (Chemical (Cys Met), (Val Leu Ileu Gly Ala Ser Thr), (Pro)); H (Hall (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr),(Pro))) * -whichcode menu [u] Which genetic code (Values: u (Universal); c (Ciliate); m (Universal mitochondrial); v (Vertebrate mitochondrial); f (Fly mitochondrial); y (Yeast mitochondrial)) * -ease float [0.457] Prob change category (1.0=easy) (Number from 0.000 to 1.000) * -ttratio float [2.0] Transition/transversion ratio (Number 0.000 or more) * -basefreq array [0.25 0.25 0.25 0.25] Base frequencies for A C G T/U (use blanks to separate) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fprotdist reads any normal sequence USAs. Input files for usage example File: protdist.dat 5 13 Alpha AACGTGGCCACAT Beta AAGGTCGCCACAC Gamma CAGTTCGCCACAA Delta GAGATTTCCGCCT Epsilon GAGATCTCCGCCC Output file format fprotdist output contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. The distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs. If the similarity table is selected, the table that is produced is not in a format that can be used as input to the distance matrix programs. it has a heading, and the species names are also put at the tops of the columns of the table (or rather, the first 8 characters of each species name is there, the other two characters omitted to save space). There is not an option to put the table into a format that can be read by the distance matrix programs, nor is there one to make it into a table of fractions of difference by subtracting the similarity values from 1. This is done deliberately to make it more difficult for the use to use these values to construct trees. The similarity values are not corrected for multiple changes, and their use to construct trees (even after converting them to fractions of difference) would be wrong, as it would lead to severe conflict between the distant pairs of sequences and the close pairs of sequences. If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters, and the identity of the distance measure that is being used. In the Categories model of substitution, the distances printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change is set to 1.0. For the Dayhoff PAM and Kimura models the distance are scaled in terms of the expected numbers of amino acid substitutions per site. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes may occur in the same site and overlie or even reverse each other. The branch lengths estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the protein (or nucleotide) sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes. One problem that can arise is that two or more of the species can be so dissimilar that the distance between them would have to be infinite, as the likelihood rises indefinitely as the estimated divergence time increases. For example, with the Kimura model, if the two sequences differ in 85.41% or more of their positions then the estimate of divergence time would be infinite. Since there is no way to represent an infinite distance in the output file, the program regards this as an error, issues a warning message indicating which pair of species are causing the problem, and computes a distance of -1.0. Output files for usage example File: protdist.fprotdist 5 Alpha 0.000000 0.331834 0.628142 1.036660 1.365098 Beta 0.331834 0.000000 0.377406 1.102689 0.682218 Gamma 0.628142 0.377406 0.000000 0.979550 0.866781 Delta 1.036660 1.102689 0.979550 0.000000 0.227515 Epsilon 1.365098 0.682218 0.866781 0.227515 0.000000 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fseqboot.txt0000664000175000017500000005576012171064331015765 00000000000000 fseqboot Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Bootstrapped sequences algorithm Description Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development Algorithm SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format. To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis. This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does. If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input. The resampling methods available are: * The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data. * The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values. * Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Kuensch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3. * Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters. * Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters. * Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained. * Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species). * Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test). * Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species. * Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats: Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there. MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects. BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format Usage Here is a sample session with fseqboot % fseqboot -seed 3 Bootstrapped sequences algorithm Input (aligned) sequence set: seqboot.dat Phylip seqboot_seq program output file [seqboot.fseqboot]: completed replicate number 10 completed replicate number 20 completed replicate number 30 completed replicate number 40 completed replicate number 50 completed replicate number 60 completed replicate number 70 completed replicate number 80 completed replicate number 90 completed replicate number 100 Output written to file "seqboot.fseqboot" Done. Go to the input files for this example Go to the output files for this example Command line arguments Bootstrapped sequences algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqset (Aligned) sequence set filename and optional format, or reference (input USA) [-outfile] outfile [*.fseqboot] Phylip seqboot_seq program output file Additional (Optional) qualifiers (* if not always prompted): -categories properties File of input categories -weights properties Weights file -test menu [b] Choose test (Values: b (Bootstrap); j (Jackknife); c (Permute species for each character); o (Permute character order); s (Permute within species); r (Rewrite data)) * -regular toggle [N] Altered sampling fraction * -fracsample float [100.0] Samples as percentage of sites (Number from 0.100 to 100.000) * -rewriteformat menu [p] Output format (Values: p (PHYLIP); n (NEXUS); x (XML)) * -seqtype menu [d] Output format (Values: d (dna); p (protein); r (rna)) * -blocksize integer [1] Block size for bootstraping (Integer 1 or more) * -reps integer [100] How many replicates (Integer 1 or more) * -justweights menu [d] Write out datasets or just weights (Values: d (Datasets); w (Weights)) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -printdata boolean [N] Print out the data at start of run * -[no]dotdiff boolean [Y] Use dot-differencing -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fseqboot data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs. Input files for usage example File: seqboot.dat 5 6 Alpha AACAAC Beta AACCCC Gamma ACCAAC Delta CCACCA Epsilon CCAAAC Output file format fseqboot output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters. The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group. The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor. The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file. Output files for usage example File: seqboot.fseqboot 5 6 Alpha AAACCA Beta AAACCC Gamma ACCCCA Delta CCCAAC Epsilon CCCAAA 5 6 Alpha AAACAA Beta AAACCC Gamma ACCCAA Delta CCCACC Epsilon CCCAAA 5 6 Alpha AAAAAC Beta AAACCC Gamma AACAAC Delta CCCCCA Epsilon CCCAAC 5 6 Alpha CCCCCA Beta CCCCCC Gamma CCCCCA Delta AAAAAC Epsilon AAAAAA 5 6 Alpha AAAACC Beta AAACCC Gamma AACACC Delta CCCCAA Epsilon CCCACC 5 6 Alpha AAAACC Beta ACCCCC Gamma AAAACC Delta CCCCAA Epsilon CAAACC 5 6 Alpha AACCAA Beta AACCCC Gamma ACCCAA Delta CCAACC Epsilon CCAAAA 5 6 Alpha AAAACC Beta ACCCCC Gamma AAAACC Delta CCCCAA Epsilon CAAACC 5 6 Alpha AACACC [Part of this file has been deleted for brevity] Gamma ACAAAA Delta CCCCCC Epsilon CCAAAA 5 6 Alpha AACAAC Beta AACCCC Gamma AACAAC Delta CCACCA Epsilon CCAAAC 5 6 Alpha AACAAA Beta AACCCC Gamma CCCAAA Delta CCACCC Epsilon CCAAAA 5 6 Alpha ACAAAA Beta ACCCCC Gamma CCAAAA Delta CACCCC Epsilon CAAAAA 5 6 Alpha CAAAAA Beta CCCCCC Gamma CAAAAA Delta ACCCCC Epsilon AAAAAA 5 6 Alpha CAACCC Beta CCCCCC Gamma CAACCC Delta ACCAAA Epsilon AAACCC 5 6 Alpha ACAACC Beta ACCCCC Gamma ACAACC Delta CACCAA Epsilon CAAACC 5 6 Alpha AAAAAA Beta AAAAAC Gamma ACCCCA Delta CCCCCC Epsilon CCCCCA 5 6 Alpha AACAAC Beta AACCCC Gamma CCCAAC Delta CCACCA Epsilon CCAAAC Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/ftreedistpair.txt0000664000175000017500000004172712171064331017006 00000000000000 ftreedistpair Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Calculate distance between two sets of trees Description Computes the Branch Score distance between trees, which allows for differences in tree topology and which also makes use of branch lengths. Also computes the Robinson-Foulds symmetric difference distance between trees, which allows for differences in tree topology but does not use branch lengths. Algorithm This program computes distances between trees. Two distances are computed, the Branch Score Distance of Kuhner and Felsenstein (1994), and the more widely known Symmetric Difference of Robinson and Foulds (1981). The Branch Score Distance uses branch lengths, and can only be calculated when the trees have lengths on all branches. The Symmetric Difference does not use branch length information, only the tree topologies. It must also be borne in mind that neither distance has any immediate statistical interpretation -- we cannot say whether a larger distance is significantly larger than a smaller one. These distances are computed by considering all possible branches that could exist on the the two trees. Each branch divides the set of species into two groups -- the ones connected to one end of the branch and the ones connected to the other. This makes a partition of the full set of species. (in Newick notation) ((A,C),(D,(B,E))) has two internal branches. One induces the partition {A, C | B, D, E} and the other induces the partition {A, C, D | B, E}. A different tree with the same set of species, (((A,D),C),(B,E)) has internal branches that correspond to the two partitions {A, C, D | B, E} and {A, D | B, C, E}. Note that the other branches, all of which are external branches, induce partitions that separate one species from all the others. Thus there are 5 partitions like this: {C | A, B, D, E} on each of these trees. These are always present on all trees, provided that each tree has each species at the end of its own branch. In the case of the Branch Score distance, each partition that does exist on a tree also has a branch length associated with it. Thus if the tree is (((A:0.1,D:0.25):0.05,C:0.01):0.2,(B:0.3,E:0.8):0.2) The list of partitions and their branch lengths is: {A | B, C, D, E} 0.1 {D | A, B, C, E} 0.25 {A, D | B, C, E} 0.05 {C | A, B, D, E} 0.01 {A, D, C | B, E} 0.4 {B | A, C, D, E} 0.3 {E | A, B, C, D} 0.8 Note that the tree is being treated as unrooted here, so that the branch lengths on either side of the rootmost node are summed up to get a branch length of 0.4. The Branch Score Distance imagines us as having made a list of all possible partitions, the ones shown above and also all 7 other possible partitions, which correspond to branches that are not found in this tree. These are assigned branch lengths of 0. For two trees, we imagine constructing these lists, and then summing the squared differences between the branch lengths. Thus if both trees have branches {A, D | B, C, E}, the sum contains the square of the difference between the branch lengths. If one tree has the branch and the other doesn't, it contains the square of the difference between the branch length and zero (in other words, the square of that branch length). If both trees do not have a particular branch, nothing is added to the sum because the difference is then between 0 and 0. The Branch Score Distance takes this sum of squared differences and computes its square root. Note that it has some desirable properties. When small branches differ in tree topology, it is not very big. When branches are both present but differ in length, it is affected. The Symmetric Difference is simply a count of how many partitions there are, among the two trees, that are on one tree and not on the other. In the example above there are two partitions, {A, C | B, D, E} and {A, D | B, C, E}, each of which is present on only one of the two trees. The Symmetric Difference between the two trees is therefore 2. When the two trees are fully resolved bifurcating trees, their symmetric distance must be an even number; it can range from 0 to twice the number of internal branches, which for n species is 4n-6. Note the relationship between the two distances. If all trees have all their branches have length 1.0, the Branch Score Distance is the square of the Symmetric Difference, as each branch that is present in one but not in the other results in 1.0 being added to the sum of squared differences. We have assumed that nothing is lost if the trees are treated as unrooted trees. It is easy to define a counterpart to the Branch Score Distance and one to the Symmetric Difference for these rooted trees. Each branch then defines a set of species, namely the clade defined by that branch. Thus if the first of the two trees above were considered as a rooted tree it would define the three clades {A, C}, {B, D, E}, and {B, E}. The Branch Score Distance is computed from the branch lengths for all possible sets of species, with 0 put for each set that does not occur on that tree. The table above will be nearly the same, but with two entries instead of one for the sets on either side of the root, {A C D} and {B E}. The Symmetric Difference between two rooted trees is simply the count of the number of clades that are defined by one but not by the other. For the second tree the clades would be {A, D}, {B, C, E}, and {B, E}. The Symmetric Difference between thee two rooted trees would then be 4. Although the examples we have discussed have involved fully bifurcating trees, the input trees can have multifurcations. This does not cause any complication for the Branch Score Distance. For the Symmetric Difference, it can lead to distances that are odd numbers. However, note one strong restriction. The trees should all have the same list of species. If you use one set of species in the first two trees, and another in the second two, and choose distances for adjacent pairs, the distances will be incorrect and will depend on the order of these pairs in the input tree file, in odd ways. Usage Here is a sample session with ftreedistpair % ftreedistpair -style s Calculate distance between two sets of trees Phylip tree file: treedist.dat Second phylip tree file: treedist.dat Phylip treedist program output file [treedist.ftreedistpair]: Done. Go to the input files for this example Go to the output files for this example Command line arguments Calculate distance between two sets of trees Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-intreefile] tree Phylip tree file [-bintreefile] tree Second phylip tree file [-outfile] outfile [*.ftreedistpair] Phylip treedist program output file Additional (Optional) qualifiers: -dtype menu [b] Distance type (Values: s (Symmetric difference); b (Branch score distance)) -pairing menu [l] Tree pairing method (Values: c (Distances between corresponding pairs each tree file); l (Distances between all possible pairs in each tree file)) -style menu [v] Distances output option (Values: f (Full_matrix); v (Verbose, one pair per line); s (Sparse, one pair per line)) -noroot boolean [N] Trees to be treated as rooted -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -progress boolean [N] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format ftreedistpair reads two input tree files. The tree files may either have the number of trees on their first line, or not. If the number of trees is given, it is actually ignored and all trees in the tree file are considered, even if there are more trees than indicated by the number. There is no maximum number of trees that can be processed but, if you feed in too many, there may be an error message about running out of memory. The problem is particularly acute if you choose the option to examine all possible pairs of trees one from each of two input tree files. Thus if there are 1,000 trees in the input tree file, keep in mind that all possible pairs means 1,000,000 pairs to be examined! Input files for usage example File: treedist.dat (A:0.1,(B:0.1,(H:0.1,(D:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(D:0.1,((J:0.1,H:0.1):0.1,(((G:0.1,E:0.1):0.1, (F:0.1,I:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1, D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,(((J:0.1,H:0.1):0.1, D:0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1, C:0.1):0.1):0.1):0.1):0.1):0.1); (A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1, D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1); Output file format If any of the four types of analysis are selected, the user must specify how they want the results presented. The Full matrix (choice F) is a table showing all distances. It is written onto the output file. The table is presented as groups of 10 columns. Here is the Full matrix for the 12 trees in the input tree file which is given as an example at the end of this page. Tree distance program, version 3.6 Symmetric differences between all pairs of trees in tree file: 1 2 3 4 5 6 7 8 9 10 \------------------------------------------------------------ 1 | 0 4 2 10 10 10 10 10 10 10 2 | 4 0 2 10 8 10 8 10 8 10 3 | 2 2 0 10 10 10 10 10 10 10 4 | 10 10 10 0 2 2 4 2 4 0 5 | 10 8 10 2 0 4 2 4 2 2 6 | 10 10 10 2 4 0 2 2 4 2 7 | 10 8 10 4 2 2 0 4 2 4 8 | 10 10 10 2 4 2 4 0 2 2 9 | 10 8 10 4 2 4 2 2 0 4 10 | 10 10 10 0 2 2 4 2 4 0 11 | 2 2 0 10 10 10 10 10 10 10 12 | 10 10 10 2 4 2 4 0 2 2 11 12 \------------ 1 | 2 10 2 | 2 10 3 | 0 10 4 | 10 2 5 | 10 4 6 | 10 2 7 | 10 4 8 | 10 0 9 | 10 2 10 | 10 2 11 | 0 10 12 | 10 0 The Full matrix is only available for analyses P and L (not for A or C). Option V (Verbose) writes one distance per line. The Verbose output is the default. Here it is for the example data set given below: Tree distance program, version 3.6 Symmetric differences between adjacent pairs of trees: Trees 1 and 2: 4 Trees 3 and 4: 10 Trees 5 and 6: 4 Trees 7 and 8: 4 Trees 9 and 10: 4 Trees 11 and 12: 10 Option S (Sparse or terse) is similar except that all that is given on each line are the numbers of the two trees and the distance, separated by blanks. This may be a convenient format if you want to write a program to read these numbers in, and you want to spare yourself the effort of having the program wade through the words on each line in the Verbose output. The first four lines of the Sparse output are titles that your program would want to skip past. Here is the Sparse output for the example trees. 1 2 4 3 4 10 5 6 4 7 8 4 9 10 4 11 12 10 Output files for usage example File: treedist.ftreedistpair 1 13 0.000000e+00 1 14 2.000000e-01 1 15 1.414214e-01 1 16 3.162278e-01 1 17 3.162278e-01 1 18 3.162278e-01 1 19 3.162278e-01 1 20 3.162278e-01 1 21 3.162278e-01 1 22 3.162278e-01 1 23 1.414214e-01 1 24 3.162278e-01 2 13 2.000000e-01 2 14 0.000000e+00 2 15 1.414214e-01 2 16 3.162278e-01 2 17 2.828427e-01 2 18 3.162278e-01 2 19 2.828427e-01 2 20 3.162278e-01 2 21 2.828427e-01 2 22 3.162278e-01 2 23 1.414214e-01 2 24 3.162278e-01 3 13 1.414214e-01 3 14 1.414214e-01 3 15 0.000000e+00 3 16 3.162278e-01 3 17 3.162278e-01 3 18 3.162278e-01 3 19 3.162278e-01 3 20 3.162278e-01 3 21 3.162278e-01 3 22 3.162278e-01 3 23 0.000000e+00 3 24 3.162278e-01 4 13 3.162278e-01 4 14 3.162278e-01 4 15 3.162278e-01 4 16 0.000000e+00 4 17 1.414214e-01 4 18 1.414214e-01 4 19 2.000000e-01 4 20 1.414214e-01 4 21 2.000000e-01 4 22 0.000000e+00 4 23 3.162278e-01 4 24 1.414214e-01 5 13 3.162278e-01 5 14 2.828427e-01 [Part of this file has been deleted for brevity] 20 10 1.414214e-01 20 11 3.162278e-01 20 12 0.000000e+00 21 1 3.162278e-01 21 2 2.828427e-01 21 3 3.162278e-01 21 4 2.000000e-01 21 5 1.414214e-01 21 6 2.000000e-01 21 7 1.414214e-01 21 8 1.414214e-01 21 9 0.000000e+00 21 10 2.000000e-01 21 11 3.162278e-01 21 12 1.414214e-01 22 1 3.162278e-01 22 2 3.162278e-01 22 3 3.162278e-01 22 4 0.000000e+00 22 5 1.414214e-01 22 6 1.414214e-01 22 7 2.000000e-01 22 8 1.414214e-01 22 9 2.000000e-01 22 10 0.000000e+00 22 11 3.162278e-01 22 12 1.414214e-01 23 1 1.414214e-01 23 2 1.414214e-01 23 3 0.000000e+00 23 4 3.162278e-01 23 5 3.162278e-01 23 6 3.162278e-01 23 7 3.162278e-01 23 8 3.162278e-01 23 9 3.162278e-01 23 10 3.162278e-01 23 11 0.000000e+00 23 12 3.162278e-01 24 1 3.162278e-01 24 2 3.162278e-01 24 3 3.162278e-01 24 4 1.414214e-01 24 5 2.000000e-01 24 6 1.414214e-01 24 7 2.000000e-01 24 8 0.000000e+00 24 9 1.414214e-01 24 10 1.414214e-01 24 11 3.162278e-01 24 12 0.000000e+00 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description econsense Majority-rule and strict consensus tree fconsense Majority-rule and strict consensus tree ftreedist Calculate distances between trees Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fretree.txt0000664000175000017500000002753412171064331015575 00000000000000 fretree Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Interactive tree rearrangement Description Reads in a tree (with branch lengths if necessary) and allows you to reroot the tree, to flip branches, to change species names and branch lengths, and then write the result out. Can be used to convert between rooted and unrooted trees, and to write the tree into a preliminary version of a new XML tree file format which is under development and which is described in the RETREE documentation web page. Algorithm RETREE is a tree editor. It reads in a tree, or allows the user to construct one, and displays this tree on the screen. The user then can specify how the tree is to be rearranged, rerooted or written out to a file. The input trees are in one file (with default file name intree), the output trees are written into another (outtree). The user can reroot, flip branches, change names of species, change or remove branch lengths, and move around to look at various parts of the tree if it is too large to fit on the screen. The trees can be multifurcating at any level, although the user is warned that many PHYLIP programs still cannot handle multifurcations above the root, or even at the root. A major use for this program will be to change rootedness of trees so that a rooted tree derived from one program can be fed in as an unrooted tree to another (you are asked about this when you give the command to write out the tree onto the tree output file). It will also be useful for specifying the length of a branch in a tree where you want a program like DNAML, DNAMLK, FITCH, or CONTML to hold that branch length constant (see the L suboption of the User Tree option in those programs. It will also be useful for changing the order of species for purely cosmetic reasons for DRAWGRAM and DRAWTREE, including using the Midpoint method of rooting the tree. It can also be used to write out a tree file in the Nexus format used by Paup and MacClade or in our XML tree file format. This program uses graphic characters that show the tree to best advantage on some computer systems. Its graphic characters will work best on MSDOS systems or MSDOS windows in Windows, and to any system whose screen or terminals emulate ANSI standard terminals such as old Digitial VT100 terminals, Telnet programs, or VT100-compatible windows in the X windowing system. For any other screen types, (such as Macintosh windows) there is a generic option which does not make use of screen graphics characters. The program will work well in those cases, but the tree it displays will look a bit uglier. Usage Here is a sample session with fretree % fretree Interactive tree rearrangement Number of species [0]: 10 Phylip tree file: retree.dat Phylip tree output file [retree.treefile]: NEXT? (R . U W O T F D B N H J K L C + ? X Q) (? for Help): Q Do you want to write out the tree to a file? (Y or N): Y Enter R if the tree is to be rooted, OR enter U if the tree is to be unrooted: U Tree written to file "retree.treefile" Reading tree file ... ,>>1:Human ,>22 ,>21 `>>2:Chimp ! ! ,>20 `>>>>>3:Gorilla ! ! ,>>>>>>>>>>19 `>>>>>>>>4:Orang ! ! ,>18 `>>>>>>>>>>>5:Gibbon ! ! ! ! ,>>>>>>>>6:Barbary Ma ! `>>>>>>>>>>>>>23 ! ! ,>>>>>7:Crab-e. Ma ,>>>>>>>17 `>24 ! ! ! ,>>8:Rhesus Mac ! ! `>25 ! ! `>>9:Jpn Macaq ,>16 ! ! ! `>>>>>>>>>>>>>>>>>>>>>>>>>10:Squir. Mon ! ! ! ! ,>11:Tarsier ** 7 lines below screen ** Go to the input files for this example Go to the output files for this example Command line arguments Interactive tree rearrangement Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-spp] integer [0] Number of species (Any integer value) [-intreefile] tree Phylip tree file [-outtreefile] outfile [*.fretree] Phylip tree output file Additional (Optional) qualifiers: -initialtree menu [Arbitary] Initial tree (Values: a (Arbitary); u (User); s (Specify)) -format menu [p] Format to write trees (Values: p (PHYLIP); n (NEXUS); x (XML)) -screenwidth integer [80] Width of terminal screen in characters (Any integer value) -vscreenwidth integer [80] Width of plotting area in characters (Any integer value) -screenlines integer [24] Number of lines on screen (Any integer value) Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outtreefile" associated qualifiers -odirectory3 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fretree reads any normal sequence USAs. Input files for usage example File: retree.dat ((((((((Human,Chimp),Gorilla),Orang),Gibbon),(Barbary_Ma,(Crab-e._Ma, (Rhesus_Mac,Jpn_Macaq)))),Squir._Mon),((Tarsier,Lemur),Bovine)),Mouse); Output file format The N (output file format) option allows the user to specify that the tree files that are written by the program will be in one of three formats: 1. The PHYLIP default file format (the Newick standard) used by the programs in this package. 2. The Nexus format defined by David Swofford and by Wayne Maddison and David Maddison for their programs PAUP and MacClade. A tree file written in Nexus format should be directly readable by those programs (They also have options to read a regular PHYLIP tree file as well). 3. An XML tree file format which we have defined. The XML tree file format is fairly simple. The tree file, which may have multiple trees, is enclosed in a pair of ... tags. Each tree is included in tags ... . Each branch of the tree is enclosed in a pair of tags ... , which enclose the branch and all its descendants. If the branch has a length, this is given by the LENGTH attribute of the CLADE tag, so that the pair of tags looks like this: ... A tip of the tree is at the end of a branch (and hence that branch is enclosed in a pair of ... tags). Its name is enclosed by ... tags. Here is an XML tree: Mouse Bovine Gibbon Orang Gorilla Chimp Human The indentation is for readability but is not part of the XML tree standard, which ignores that kind of white space. What programs can read an XML tree? None right now, not even PHYLIP programs! But soon our lab's LAMARC package will have programs that can read an XML tree. XML is rapidly becoming the standard for representing and interchanging complex data -- it is time to have an XML tree standard. Certain extensions are obvious (to represent the bootstrap proportion for a branch, use BOOTP=0.83 in the CLADE tag, for example). There are other proposals for an XML tree standard. They have many similarities to this one, but are not identical to it. At the moment there is no mechanism in place for deciding between them other than seeing which get widely used. Here are links to other proposals: Taxonomic Markup Language http://www.albany.edu/~gilmr/pubxml/. and preprint at xml.coverpages.org/gilmour-TML.pdf published in the paper by Ron Gilmour (2000). Andrew Rambaut's BEAST XML phylogeny format See page 9 of PDF of BEAST manual at http://evolve.zoo.ox.ac.uk/beast/ An XML format for phylogenies is sketchly described there. treeml http://www.nomencurator.org/InfoVis2003/download/treeml.dtd (see also example: ) http://www.cs.umd.edu/hcil/iv03contest/datasets/treeml-sample.xml Jean-Daniel Fekete's DTD for a tree XML file The W (screen and window Width) option specifies the width in characters of the area which the trees will be plotted to fit into. This is by default 80 characters so that they will fit on a normal width terminal. The actual width of the display on the terminal (normally 80 characters) will be regarded as a window displaying part of the tree. Thus you could set the "plotting area" to 132 characters, and inform the program that the screen width is 80 characters. Then the program will display only part of the tree at any one time. Output files for usage example File: retree.treefile (((((((Human,Chimp),Gorilla),Orang),Gibbon),(Barbary_Ma,(Crab-e._Ma, (Rhesus_Mac,Jpn_Macaq)))),Squir._Mon),((Tarsier,Lemur),Bovine),Mouse); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description fdrawgram Plots a cladogram- or phenogram-like rooted tree diagram fdrawtree Plots an unrooted tree diagram Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fseqbootall.txt0000664000175000017500000005722412171064331016453 00000000000000 fseqbootall Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Bootstrapped sequences algorithm Description Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development Algorithm SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format. To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis. This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does. If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input. The resampling methods available are: * The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data. * The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values. * Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Kuensch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3. * Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters. * Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters. * Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained. * Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species). * Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test). * Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species. * Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats: Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there. MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects. BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format Usage Here is a sample session with fseqbootall % fseqbootall -seed 3 Bootstrapped sequences algorithm Input (aligned) sequence set: seqboot.dat Phylip seqboot program output file [seqboot.fseqbootall]: bootstrap: true jackknife: false permute: false lockhart: false ild: false justwts: false completed replicate number 10 completed replicate number 20 completed replicate number 30 completed replicate number 40 completed replicate number 50 completed replicate number 60 completed replicate number 70 completed replicate number 80 completed replicate number 90 completed replicate number 100 Output written to file "seqboot.fseqbootall" Done. Go to the input files for this example Go to the output files for this example Command line arguments Bootstrapped sequences algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infilesequences] seqset (Aligned) sequence set filename and optional format, or reference (input USA) [-outfile] outfile [*.fseqbootall] Phylip seqboot program output file Additional (Optional) qualifiers (* if not always prompted): -categories properties File of input categories -mixfile properties File of mixtures -ancfile properties File of ancestors -weights properties Weights file -factorfile properties Factors file -datatype menu [s] Choose the datatype (Values: s (Molecular sequences); m (Discrete Morphology); r (Restriction Sites); g (Gene Frequencies)) -test menu [b] Choose test (Values: b (Bootstrap); j (Jackknife); c (Permute species for each character); o (Permute character order); s (Permute within species); r (Rewrite data)) * -regular toggle [N] Altered sampling fraction * -fracsample float [100.0] Samples as percentage of sites (Number from 0.100 to 100.000) * -rewriteformat menu [p] Output format (Values: p (PHYLIP); n (NEXUS); x (XML)) * -seqtype menu [d] Output format (Values: d (dna); p (protein); r (rna)) * -morphseqtype menu [p] Output format (Values: p (PHYLIP); n (NEXUS)) * -blocksize integer [1] Block size for bootstraping (Integer 1 or more) * -reps integer [100] How many replicates (Integer 1 or more) * -justweights menu [d] Write out datasets or just weights (Values: d (Datasets); w (Weights)) * -enzymes boolean [N] Is the number of enzymes present in input file * -all boolean [N] All alleles present at each locus * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -printdata boolean [N] Print out the data at start of run * -[no]dotdiff boolean [Y] Use dot-differencing -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-infilesequences" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fseqbootall data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs. Input files for usage example File: seqboot.dat 5 6 Alpha AACAAC Beta AACCCC Gamma ACCAAC Delta CCACCA Epsilon CCAAAC Output file format fseqbootall output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters. The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group. The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor. The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file. Output files for usage example File: seqboot.fseqbootall 5 6 Alpha AAACCA Beta AAACCC Gamma ACCCCA Delta CCCAAC Epsilon CCCAAA 5 6 Alpha AAACAA Beta AAACCC Gamma ACCCAA Delta CCCACC Epsilon CCCAAA 5 6 Alpha AAAAAC Beta AAACCC Gamma AACAAC Delta CCCCCA Epsilon CCCAAC 5 6 Alpha CCCCCA Beta CCCCCC Gamma CCCCCA Delta AAAAAC Epsilon AAAAAA 5 6 Alpha AAAACC Beta AAACCC Gamma AACACC Delta CCCCAA Epsilon CCCACC 5 6 Alpha AAAACC Beta ACCCCC Gamma AAAACC Delta CCCCAA Epsilon CAAACC 5 6 Alpha AACCAA Beta AACCCC Gamma ACCCAA Delta CCAACC Epsilon CCAAAA 5 6 Alpha AAAACC Beta ACCCCC Gamma AAAACC Delta CCCCAA Epsilon CAAACC 5 6 Alpha AACACC [Part of this file has been deleted for brevity] Gamma ACAAAA Delta CCCCCC Epsilon CCAAAA 5 6 Alpha AACAAC Beta AACCCC Gamma AACAAC Delta CCACCA Epsilon CCAAAC 5 6 Alpha AACAAA Beta AACCCC Gamma CCCAAA Delta CCACCC Epsilon CCAAAA 5 6 Alpha ACAAAA Beta ACCCCC Gamma CCAAAA Delta CACCCC Epsilon CAAAAA 5 6 Alpha CAAAAA Beta CCCCCC Gamma CAAAAA Delta ACCCCC Epsilon AAAAAA 5 6 Alpha CAACCC Beta CCCCCC Gamma CAACCC Delta ACCAAA Epsilon AAACCC 5 6 Alpha ACAACC Beta ACCCCC Gamma ACAACC Delta CACCAA Epsilon CAAACC 5 6 Alpha AAAAAA Beta AAAAAC Gamma ACCCCA Delta CCCCCC Epsilon CCCCCA 5 6 Alpha AACAAC Beta AACCCC Gamma CCCAAC Delta CCACCA Epsilon CCAAAC Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fpromlk.txt0000664000175000017500000006537012171064331015613 00000000000000 fpromlk Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Protein phylogeny by maximum likelihood Description Same as PROML but assumes a molecular clock. The use of the two programs together permits a likelihood ratio test of the molecular clock hypothesis to be made. Estimates phylogenies from protein amino acid sequences by maximum likelihood. The PAM, JTT, or PMB models can be employed, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites. It also allows different rates of change at known sites. Algorithm This program implements the maximum likelihood method for protein amino acid sequences under the constraint that the trees estimated must be consistent with a molecular clock. The molecular clock is the assumption that the tips of the tree are all equidistant, in branch length, from its root. This program is indirectly related to PROML. It uses the Dayhoff probability model of change between amino acids. Its algorithmic details are not yet published, but many of them are similar to DNAMLK. The assumptions of the model are: 1. Each position in the sequence evolves independently. 2. Different lineages evolve independently. 3. Each position undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify. 4. All relevant positions are included in the sequence, not just those that have changed or those that are "phylogenetically informative". 5. The probabilities of change between amino acids are given by the model of Jones, 6. Taylor, and Thornton (1992), the PMB model of Veerassamy, Smith and Tillier (2004), or the PAM model of Dayhoff (Dayhoff and Eck, 1968; Dayhoff et. al., 1979). Note the assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those positions that had changed. This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different amino acid positions. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of positions all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant positions. The program computes the likelihood by summing it over all possible assignments of rates to positions, weighting each by its prior probability of occurrence. For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a position having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive positions with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all positions to rate 2.4, or that fail to have consecutive positions that have the same rate. The Hidden Markov Model framework for rate variation among positions was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant positions. This feature effectively removes the artificial assumption that all positions have the same rate, and also means that we need not know in advance the identities of the positions that have a particular rate of evolution. Another layer of rate variation also is available. The user can assign categories of rates to each positions (for example, we might want amino acid positions in the active site of a protein to change more slowly than other positions. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of amino acid positions in the different categories. For example, we might specify that positions in the active site evolve at relative rates of 0.2 compared to 1.0 at other positions. If we are assuming that a particular position maintains a cysteine bridge to another, we may want to put it in a category of positions (including perhaps the initial position of the protein sequence which maintains methionine) which changes at a rate of 0.0. If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a position is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation. Usage Here is a sample session with fpromlk % fpromlk Protein phylogeny by maximum likelihood Input (aligned) protein sequence set(s): promlk.dat Phylip tree file (optional): Phylip promlk program output file [promlk.fpromlk]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Output written to file "promlk.fpromlk" Tree also written onto file "promlk.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Protein phylogeny by maximum likelihood Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fpromlk] Phylip promlk program output file Additional (Optional) qualifiers (* if not always prompted): -ncategories integer [1] Number of substitution rate categories (Integer from 1 to 9) * -rate array Rate for each category * -categories properties File of substitution rate categories -weights properties Weights file * -lengths boolean [N] Use branch lengths from user trees -model menu [Jones-Taylor-Thornton] Probability model for amino acid change (Values: j (Jones-Taylor-Thornton); h (Henikoff/Tillier PMBs); d (Dayhoff PAM)) -gammatype menu [n] Rate variation among sites (Values: g (Gamma distributed rates); i (Gamma+invariant sites); h (User defined HMM of rates); n (Constant rate)) * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ngammacat integer [1] Number of categories (1-9) (Integer from 1 to 9) * -invarcoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ninvarcat integer [1] Number of categories (1-9) including one for invariant sites (Integer from 1 to 9) * -invarfrac float [0.0] Fraction of invariant sites (Number from 0.000 to 1.000) * -nhmmcategories integer [1] Number of HMM rate categories (Integer from 1 to 9) * -hmmrates array [1.0] HMM category rates * -hmmprobabilities array [1.0] Probability for each HMM category * -adjsite boolean [N] Rates at adjacent sites correlated * -lambda float [1.0] Mean block length of sites having the same rate (Number 1.000 or more) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) * -global boolean [N] Global rearrangements -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fpromlk] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -hypstate boolean [N] Reconstruct hypothetical sequence Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fpromlk reads any normal sequence USAs. Input files for usage example File: promlk.dat 5 13 Alpha AACGTGGCCAAAT Beta AAGGTCGCCAAAC Gamma CATTTCGTCACAA Delta GGTATTTCGGCCT Epsilon GGGATCTCGGCCC Output file format fpromlk output starts by giving the number of species, the number of amino acid positions. If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of positions is printed, as well as the probabilities of each of those rates. There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten amino acids. The trees found are printed as a rooted tree topology. The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. The unit of branch length is the expected fraction of amino acids changed (so that 1.0 is 100 PAMs). A table is printed showing the length of each tree segment, and the time (in units of expected amino acid substitutions per position) of each fork in the tree, measured from the root of the tree. I have not attempted in include code for approximate confidence limits on branch points, as I have done for branch lengths in PROML, both because of the extreme crudeness of that test, and because the variation of times for different forks would be highly correlated. The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the relative rate of change in the active site and in the rest of the protein to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possibl values, and this might get expensive. This program makes possible a (reasonably) legitimate statistical test of the molecular clock. To do such a test, run PROML and PROMLK on the same data. If the trees obtained are of the same topology (when considered as unrooted), it is legitimate to compare their likelihoods by the likelihood ratio test. In PROML the likelihood has been computed by estimating 2n-3 branch lengths, if their are n tips on the tree. In PROMLK it has been computed by estimating n-1 branching times (in effect, n-1 branch lengths). The difference in the number of parameters is (2n-3)-(n-1) = n-2. To perform the test take the difference in log likelihoods between the two runs (PROML should be the higher of the two, barring numerical iteration difficulties) and double it. Look this up on a chi-square distribution with n-2 degrees of freedom. If the result is significant, the log likelihood has been significantly increased by allowing all 2n-3 branch lengths to be estimated instead of just n-1, and molecular clock may be rejected. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different amino acid positions, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across amino acid positions. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across amino acid positions are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring positions (option A) and is not done in those cases. The branch lengths printed out are scaled in terms of 100 times the expected numbers of amino acid substitutions, scaled so that the average rate of change, averaged over all the positions analyzed, is set to 100.0, if there are multiple categories of positions. This means that whether or not there are multiple categories of positions, the expected percentage of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same position and overlie or even reverse each other. underlying numbers of changes. That means that a branch of length 26 is 26 times as long as one which would show a 1% difference between the amino acid sequences at the beginning and end of the branch, but we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length. Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14. At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what amino acid position categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each position which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead. Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file. Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). The symbol printed out is for the amino acid which accounts for the largest fraction of the likelihood at that position. In that table, if a position has an amino acid which accounts for more than 95% of the likelihood, its symbol printed in capital letters (W rather than w). One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed amino acids are based on only the single assignment of rates to positions which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates. Output files for usage example File: promlk.fpromlk Amino acid sequence Maximum Likelihood method with molecular clock, version 3.69.650 Jones-Taylor-Thornton model of amino acid change +-----------Epsilon +---------------------------------------4 ! +-----------Delta --3 ! +----------------------------Gamma +----------------------2 ! +--------Beta +-------------------1 +--------Alpha Ln Likelihood = -134.70332 Ancestor Node Node Height Length -------- ---- ---- ------ ------ root 3 3 4 0.66464 0.66464 4 Epsilon 0.85971 0.19507 4 Delta 0.85971 0.19507 3 2 0.37420 0.37420 2 Gamma 0.85971 0.48551 2 1 0.70208 0.32788 1 Beta 0.85971 0.15763 1 Alpha 0.85971 0.15763 File: promlk.treefile ((Epsilon:0.19507,Delta:0.19507):0.66464,(Gamma:0.48551, (Beta:0.15763,Alpha:0.15763):0.32788):0.37420); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/ffreqboot.txt0000664000175000017500000006473412171064331016133 00000000000000 ffreqboot Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Bootstrapped genetic frequencies algorithm Description Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development Algorithm FFREQBOOT is a gene frequency specific version of SEQBOOT. SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format. To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis. This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does. If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input. The resampling methods available are: * The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data. * The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values. * Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Kuensch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3. * Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters. * Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters. * Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained. * Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species). * Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test). * Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species. * Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats: Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there. MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects. BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format Usage Here is a sample session with ffreqboot % ffreqboot -seed 3 Bootstrapped genetic frequencies algorithm Input file: freqboot.dat Phylip seqboot_freq program output file [freqboot.ffreqboot]: completed replicate number 10 completed replicate number 20 completed replicate number 30 completed replicate number 40 completed replicate number 50 completed replicate number 60 completed replicate number 70 completed replicate number 80 completed replicate number 90 completed replicate number 100 Output written to file "freqboot.ffreqboot" Done. Go to the input files for this example Go to the output files for this example Command line arguments Bootstrapped genetic frequencies algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] frequencies (no help text) frequencies value [-outfile] outfile [*.ffreqboot] Phylip seqboot_freq program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -test menu [b] Choose test (Values: b (Bootstrap); j (Jackknife); c (Permute species for each character); o (Permute character order); s (Permute within species); r (Rewrite data)) * -regular toggle [N] Altered sampling fraction * -fracsample float [100.0] Samples as percentage of sites (Number from 0.100 to 100.000) * -blocksize integer [1] Block size for bootstraping (Integer 1 or more) * -reps integer [100] How many replicates (Integer 1 or more) * -justweights menu [d] Write out datasets or just weights (Values: d (Datasets); w (Weights)) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -printdata boolean [N] Print out the data at start of run * -[no]dotdiff boolean [Y] Use dot-differencing -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format ffreqboot data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs. Input files for usage example File: freqboot.dat 5 10 2 2 2 2 2 2 2 2 2 2 European 0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205 0.8055 0.5043 African 0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600 0.7582 0.6207 Chinese 0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726 0.7482 0.7334 American 0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000 0.8086 0.8636 Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396 0.9097 0.2976 Output file format ffreqboot output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters. The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group. The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor. The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file. Output files for usage example File: freqboot.ffreqboot 5 10 2 2 2 2 2 2 2 2 2 2 European 0.28680 0.71320 0.56840 0.43160 0.56840 0.43160 0.44220 0.55780 0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.72850 0.27150 0.72850 0.27150 0.02050 0.97950 African 0.13560 0.86440 0.48400 0.51600 0.48400 0.51600 0.06020 0.93980 0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.96750 0.03250 0.96750 0.03250 0.06000 0.94000 Chinese 0.16280 0.83720 0.59580 0.40420 0.59580 0.40420 0.72980 0.27020 1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.79860 0.20140 0.79860 0.20140 0.07260 0.92740 American 0.01440 0.98560 0.69900 0.30100 0.69900 0.30100 0.32800 0.67200 0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.86030 0.13970 0.86030 0.13970 0.00000 1.00000 Australian 0.12110 0.87890 0.22740 0.77260 0.22740 0.77260 0.58210 0.41790 1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.90000 0.10000 0.90000 0.10000 0.03960 0.96040 5 10 2 2 2 2 2 2 2 2 2 2 European 0.28680 0.71320 0.56840 0.43160 0.44220 0.55780 0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.38280 0.61720 0.02050 0.97950 0.02050 0.97950 0.80550 0.19450 African 0.13560 0.86440 0.48400 0.51600 0.06020 0.93980 0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.59770 0.40230 0.06000 0.94000 0.06000 0.94000 0.75820 0.24180 Chinese 0.16280 0.83720 0.59580 0.40420 0.72980 0.27020 1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.38110 0.61890 0.07260 0.92740 0.07260 0.92740 0.74820 0.25180 American 0.01440 0.98560 0.69900 0.30100 0.32800 0.67200 0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.66060 0.33940 0.00000 1.00000 0.00000 1.00000 0.80860 0.19140 Australian 0.12110 0.87890 0.22740 0.77260 0.58210 0.41790 1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.20180 0.79820 0.03960 0.96040 0.03960 0.96040 0.90970 0.09030 5 10 2 2 2 2 2 2 2 2 2 2 European 0.28680 0.71320 0.28680 0.71320 0.44220 0.55780 0.42860 0.57140 0.42860 0.57140 0.38280 0.61720 0.72850 0.27150 0.80550 0.19450 0.80550 0.19450 0.50430 0.49570 African 0.13560 0.86440 0.13560 0.86440 0.06020 0.93980 0.03970 0.96030 0.03970 0.96030 0.59770 0.40230 0.96750 0.03250 0.75820 0.24180 0.75820 0.24180 0.62070 0.37930 Chinese 0.16280 0.83720 0.16280 0.83720 0.72980 0.27020 1.00000 0.00000 1.00000 0.00000 0.38110 0.61890 0.79860 0.20140 0.74820 0.25180 0.74820 0.25180 0.73340 0.26660 American 0.01440 0.98560 0.01440 0.98560 0.32800 0.67200 0.74210 0.25790 0.74210 0.25790 0.66060 0.33940 0.86030 0.13970 0.80860 0.19140 0.80860 0.19140 0.86360 0.13640 Australian 0.12110 0.87890 0.12110 0.87890 0.58210 0.41790 1.00000 0.00000 1.00000 0.00000 0.20180 0.79820 0.90000 0.10000 0.90970 0.09030 [Part of this file has been deleted for brevity] 5 10 2 2 2 2 2 2 2 2 2 2 European 0.28680 0.71320 0.56840 0.43160 0.56840 0.43160 0.56840 0.43160 0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.80550 0.19450 0.50430 0.49570 0.50430 0.49570 African 0.13560 0.86440 0.48400 0.51600 0.48400 0.51600 0.48400 0.51600 0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.75820 0.24180 0.62070 0.37930 0.62070 0.37930 Chinese 0.16280 0.83720 0.59580 0.40420 0.59580 0.40420 0.59580 0.40420 1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.74820 0.25180 0.73340 0.26660 0.73340 0.26660 American 0.01440 0.98560 0.69900 0.30100 0.69900 0.30100 0.69900 0.30100 0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.80860 0.19140 0.86360 0.13640 0.86360 0.13640 Australian 0.12110 0.87890 0.22740 0.77260 0.22740 0.77260 0.22740 0.77260 1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.90970 0.09030 0.29760 0.70240 0.29760 0.70240 5 10 2 2 2 2 2 2 2 2 2 2 European 0.28680 0.71320 0.28680 0.71320 0.56840 0.43160 0.56840 0.43160 0.44220 0.55780 0.42860 0.57140 0.42860 0.57140 0.72850 0.27150 0.63860 0.36140 0.02050 0.97950 African 0.13560 0.86440 0.13560 0.86440 0.48400 0.51600 0.48400 0.51600 0.06020 0.93980 0.03970 0.96030 0.03970 0.96030 0.96750 0.03250 0.95110 0.04890 0.06000 0.94000 Chinese 0.16280 0.83720 0.16280 0.83720 0.59580 0.40420 0.59580 0.40420 0.72980 0.27020 1.00000 0.00000 1.00000 0.00000 0.79860 0.20140 0.77820 0.22180 0.07260 0.92740 American 0.01440 0.98560 0.01440 0.98560 0.69900 0.30100 0.69900 0.30100 0.32800 0.67200 0.74210 0.25790 0.74210 0.25790 0.86030 0.13970 0.79240 0.20760 0.00000 1.00000 Australian 0.12110 0.87890 0.12110 0.87890 0.22740 0.77260 0.22740 0.77260 0.58210 0.41790 1.00000 0.00000 1.00000 0.00000 0.90000 0.10000 0.98370 0.01630 0.03960 0.96040 5 10 2 2 2 2 2 2 2 2 2 2 European 0.56840 0.43160 0.56840 0.43160 0.44220 0.55780 0.44220 0.55780 0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.72850 0.27150 0.63860 0.36140 0.80550 0.19450 African 0.48400 0.51600 0.48400 0.51600 0.06020 0.93980 0.06020 0.93980 0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.96750 0.03250 0.95110 0.04890 0.75820 0.24180 Chinese 0.59580 0.40420 0.59580 0.40420 0.72980 0.27020 0.72980 0.27020 1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.79860 0.20140 0.77820 0.22180 0.74820 0.25180 American 0.69900 0.30100 0.69900 0.30100 0.32800 0.67200 0.32800 0.67200 0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.86030 0.13970 0.79240 0.20760 0.80860 0.19140 Australian 0.22740 0.77260 0.22740 0.77260 0.58210 0.41790 0.58210 0.41790 1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.90000 0.10000 0.98370 0.01630 0.90970 0.09030 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnadist.txt0000664000175000017500000005733012171064331015732 00000000000000 fdnadist Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Nucleic acid sequence distance matrix program Description Computes four different distances between species from nucleic acid sequences. The distances can then be used in the distance matrix programs. The distances are the Jukes-Cantor formula, one based on Kimura's 2- parameter method, the F84 model used in DNAML, and the LogDet distance. The distances can also be corrected for gamma-distributed and gamma-plus-invariant-sites-distributed rates of change in different sites. Rates of evolution can vary among sites in a prespecified way, and also according to a Hidden Markov model. The program can also make a table of percentage similarity among sequences. Algorithm This program uses nucleotide sequences to compute a distance matrix, under four different models of nucleotide substitution. It can also compute a table of similarity between the nucleotide sequences. The distance for each pair of species estimates the total branch length between the two species, and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR. This is an alternative to use of the sequence data itself in the maximum likelihood program DNAML or the parsimony program DNAPARS. The program reads in nucleotide sequences and writes an output file containing the distance matrix, or else a table of similarity between sequences. The four models of nucleotide substitution are those of Jukes and Cantor (1969), Kimura (1980), the F84 model (Kishino and Hasegawa, 1989; Felsenstein and Churchill, 1996), and the model underlying the LogDet distance (Barry and Hartigan, 1987; Lake, 1994; Steel, 1994; Lockhart et. al., 1994). All except the LogDet distance can be made to allow for for unequal rates of substitution at different sites, as Jin and Nei (1990) did for the Jukes-Cantor model. The program correctly takes into account a variety of sequence ambiguities, although in cases where they exist it can be slow. Jukes and Cantor's (1969) model assumes that there is independent change at all sites, with equal probability. Whether a base changes is independent of its identity, and when it changes there is an equal probability of ending up with each of the other three bases. Thus the transition probability matrix (this is a technical term from probability theory and has nothing to do with transitions as opposed to transversions) for a short period of time dt is: To: A G C T --------------------------------- A | 1-3a a a a From: G | a 1-3a a a C | a a 1-3a a T | a a a 1-3a where a is u dt, the product of the rate of substitution per unit time (u) and the length dt of the time interval. For longer periods of time this implies that the probability that two sequences will differ at a given site is: p = 3/4 ( 1 - e- 4/3 u t) and hence that if we observe p, we can compute an estimate of the branch length ut by inverting this to get ut = - 3/4 loge ( 1 - 4/3 p ) The Kimura "2-parameter" model is almost as symmetric as this, but allows for a difference between transition and transversion rates. Its transition probability matrix for a short interval of time is: To: A G C T --------------------------------- A | 1-a-2b a b b From: G | a 1-a-2b b b C | b b 1-a-2b a T | b b a 1-a-2b where a is u dt, the product of the rate of transitions per unit time and dt is the length dt of the time interval, and b is v dt, the product of half the rate of transversions (i.e., the rate of a specific transversion) and the length dt of the time interval. The F84 model incorporates different rates of transition and transversion, but also allowing for different frequencies of the four nucleotides. It is the model which is used in DNAML, the maximum likelihood nucelotide sequence phylogenies program in this package. You will find the model described in the document for that program. The transition probabilities for this model are given by Kishino and Hasegawa (1989), and further explained in a paper by me and Gary Churchill (1996). The LogDet distance allows a fairly general model of substitution. It computes the distance from the determinant of the empirically observed matrix of joint probabilities of nucleotides in the two species. An explanation of it is available in the chapter by Swofford et, al. (1996). The first three models are closely related. The DNAML model reduces to Kimura's two-parameter model if we assume that the equilibrium frequencies of the four bases are equal. The Jukes-Cantor model in turn is a special case of the Kimura 2-parameter model where a = b. Thus each model is a special case of the ones that follow it, Jukes-Cantor being a special case of both of the others. The Jin and Nei (1990) correction for variation in rate of evolution from site to site can be adapted to all of the first three models. It assumes that the rate of substitution varies from site to site according to a gamma distribution, with a coefficient of variation that is specified by the user. The user is asked for it when choosing this option in the menu. Each distance that is calculated is an estimate, from that particular pair of species, of the divergence time between those two species. For the Jukes- Cantor model, the estimate is computed using the formula for ut given above, as long as the nucleotide symbols in the two sequences are all either A, C, G, T, U, N, X, ?, or - (the latter four indicate a deletion or an unknown nucleotide. This estimate is a maximum likelihood estimate for that model. For the Kimura 2-parameter model, with only these nucleotide symbols, formulas special to that estimate are also computed. These are also, in effect, computing the maximum likelihood estimate for that model. In the Kimura case it depends on the observed sequences only through the sequence length and the observed number of transition and transversion differences between those two sequences. The calculation in that case is a maximum likelihood estimate and will differ somewhat from the estimate obtained from the formulas in Kimura's original paper. That formula was also a maximum likelihood estimate, but with the transition/transversion ratio estimated empirically, separately for each pair of sequences. In the present case, one overall preset transition/transversion ratio is used which makes the computations harder but achieves greater consistency between different comparisons. For the F84 model, or for any of the models where one or both sequences contain at least one of the other ambiguity codons such as Y, R, etc., a maximum likelihood calculation is also done using code which was originally written for DNAML. Its disadvantage is that it is slow. The resulting distance is in effect a maximum likelihood estimate of the divergence time (total branch length between) the two sequences. However the present program will be much faster than versions earlier than 3.5, because I have speeded up the iterations. The LogDet model computes the distance from the determinant of the matrix of co-occurrence of nucleotides in the two species, according to the formula D = - 1/4(loge(|F|) - 1/2loge(fA1fC1fG1fT1fA2fC2fG2fT2)) Where F is a matrix whose (i,j) element is the fraction of sites at which base i occurs in one species and base j occurs in the other. fji is the fraction of sites at which species i has base j. The LogDet distance cannot cope with ambiguity codes. It must have completely defined sequences. One limitation of the LogDet distance is that it may be infinite sometimes, if there are too many changes between certain pairs of nucleotides. This can be particularly noticeable with distances computed from bootstrapped sequences. Note that there is an assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias the distances by making them too large, and that in turn would cause the distances to misinterpret the meaning of those sites that had changed. For all of these distance methods, the program allows us to specify that "third position" bases have a different rate of substitution than first and second positions, that introns have a different rate than exons, and so on. The Categories option which does this allows us to make up to 9 categories of sites and specify different rates of change for them. In addition to the four distance calculations, the program can also compute a table of similarities between nucleotide sequences. These values are the fractions of sites identical between the sequences. The diagonal values are 1.0000. No attempt is made to count similarity of nonidentical nucleotides, so that no credit is given for having (for example) different purines at corresponding sites in the two sequences. This option has been requested by many users, who need it for descriptive purposes. It is not intended that the table be used for inferring the tree. Usage Here is a sample session with fdnadist % fdnadist Nucleic acid sequence distance matrix program Input (aligned) nucleotide sequence set(s): dnadist.dat Distance methods f : F84 distance model k : Kimura 2-parameter distance j : Jukes-Cantor distance l : LogDet distance s : Similarity table Choose the method to use [F84 distance model]: Phylip distance matrix output file [dnadist.fdnadist]: Distances calculated for species Alpha .... Beta ... Gamma .. Delta . Epsilon Distances written to file "dnadist.fdnadist" Done. Go to the input files for this example Go to the output files for this example Command line arguments Nucleic acid sequence distance matrix program Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments -method menu [F84 distance model] Choose the method to use (Values: f (F84 distance model); k (Kimura 2-parameter distance); j (Jukes-Cantor distance); l (LogDet distance); s (Similarity table)) [-outfile] outfile [*.fdnadist] Phylip distance matrix output file Additional (Optional) qualifiers (* if not always prompted): * -gammatype menu [No distribution parameters used] Gamma distribution (Values: g (Gamma distributed rates); i (Gamma+invariant sites); n (No distribution parameters used)) * -ncategories integer [1] Number of substitution rate categories (Integer from 1 to 9) * -rate array [1.0] Category rates * -categories properties File of substitution rate categories -weights properties Weights file * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -invarfrac float [0.0] Fraction of invariant sites (Number from 0.000 to 1.000) * -ttratio float [2.0] Transition/transversion ratio (Number 0.001 or more) * -[no]freqsfrom toggle [Y] Use empirical base frequencies from seqeunce input * -basefreq array [0.25 0.25 0.25 0.25] Base frequencies for A C G T/U (use blanks to separate) -lower boolean [N] Output as a lower triangular distance matrix -humanreadable boolean [@($(method)==s?Y:N)] Output as a human-readable distance matrix -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnadist reads any normal sequence USAs. Input files for usage example File: dnadist.dat 5 13 Alpha AACGTGGCCACAT Beta AAGGTCGCCACAC Gamma CAGTTCGCCACAA Delta GAGATTTCCGCCT Epsilon GAGATCTCCGCCC Output file format fdnadist output contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. If the L option is used, the matrix or distances is in lower triangular form, so that only the distances to the other species that precede each species are printed. Otherwise the distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs. If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters, and the identity of the distance measure that is being used. If the C (Categories) option is used a table of the relative rates of expected substitution at each category of sites is printed, and a listing of the categories each site is in. There will then follow the equilibrium frequencies of the four bases. If the Jukes-Cantor or Kimura distances are used, these will necessarily be 0.25 : 0.25 : 0.25 : 0.25. The output then shows the transition/transversion ratio that was specified or used by default. In the case of the Jukes-Cantor distance this will always be 0.5. The transition-transversion parameter (as opposed to the ratio) is also printed out: this is used within the program and can be ignored. There then follow the data sequences, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The distances printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes may occur in the same site and overlie or even reverse each other. The branch lengths estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes. One problem that can arise is that two or more of the species can be so dissimilar that the distance between them would have to be infinite, as the likelihood rises indefinitely as the estimated divergence time increases. For example, with the Jukes-Cantor model, if the two sequences differ in 75% or more of their positions then the estimate of dovergence time would be infinite. Since there is no way to represent an infinite distance in the output file, the program regards this as an error, issues an error message indicating which pair of species are causing the problem, and stops. It might be that, had it continued running, it would have also run into the same problem with other pairs of species. If the Kimura distance is being used there may be no error message; the program may simply give a large distance value (it is iterating towards infinity and the value is just where the iteration stopped). Likewise some maximum likelihood estimates may also become large for the same reason (the sequences showing more divergence than is expected even with infinite branch length). I hope in the future to add more warning messages that would alert the user the this. If the similarity table is selected, the table that is produced is not in a format that can be used as input to the distance matrix programs. it has a heading, and the species names are also put at the tops of the columns of the table (or rather, the first 8 characters of each species name is there, the other two characters omitted to save space). There is not an option to put the table into a format that can be read by the distance matrix programs, nor is there one to make it into a table of fractions of difference by subtracting the similarity values from 1. This is done deliberately to make it more difficult for the use to use these values to construct trees. The similarity values are not corrected for multiple changes, and their use to construct trees (even after converting them to fractions of difference) would be wrong, as it would lead to severe conflict between the distant pairs of sequences and the close pairs of sequences. Output files for usage example File: dnadist.fdnadist 5 Alpha 0.000000 0.303900 0.857544 1.158927 1.542899 Beta 0.303900 0.000000 0.339727 0.913522 0.619671 Gamma 0.857544 0.339727 0.000000 1.631729 1.293713 Delta 1.158927 0.913522 1.631729 0.000000 0.165882 Epsilon 1.542899 0.619671 1.293713 0.165882 0.000000 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fmix.txt0000664000175000017500000004625412171064331015104 00000000000000 fmix Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Mixed parsimony algorithm Description Estimates phylogenies by some parsimony methods for discrete character data with two states (0 and 1). Allows use of the Wagner parsimony method, the Camin-Sokal parsimony method, or arbitrary mixtures of these. Also reconstructs ancestral states and allows weighting of characters (does not infer branch lengths). Algorithm MIX is a general parsimony program which carries out the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified separately. The program defaults to carrying out Wagner parsimony. The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin-Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969). Here are the assumptions of these two methods: 1. Ancestral states are known (Camin-Sokal) or unknown (Wagner). 2. Different characters evolve independently. 3. Different lineages evolve independently. 4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner). 5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question. 6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes. 7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Usage Here is a sample session with fmix % fmix Mixed parsimony algorithm Phylip character discrete states file: mix.dat Phylip tree file (optional): Phylip mix program output file [mix.fmix]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements !---------! ......... Output written to file "mix.fmix" Trees also written onto file "mix.treefile" Go to the input files for this example Go to the output files for this example Example 2 % fmix -printdata -ancfile mixancfile.dat Mixed parsimony algorithm Phylip character discrete states file: mix.dat Phylip tree file (optional): Phylip mix program output file [mix.fmix]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements !---------! ......... Output written to file "mix.fmix" Trees also written onto file "mix.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Mixed parsimony algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing one or more data sets [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fmix] Phylip mix program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -ancfile properties Ancestral states file -mixfile properties Mixture file -method menu [Wagner] Choose the method to use (Values: w (Wagner); c (Camin-Sokal); m (Mixed)) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -threshold float [$(infile.discretesize)] Threshold value (Number 1.000 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fmix] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -ancseq boolean [N] Print states at all nodes of tree -stepbox boolean [N] Print out steps in each character Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fmix reads discrete character data. States "?", "P", and "B" are allowed. (0,1) Discrete character data These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both". There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form: 1 ---> 0 ---> 2 | | V 3 so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters: Old State New States --- ----- --- ------ 0 001 1 000 2 011 3 101 The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops. However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979). If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR. We now also have the program PARS, which can do parsimony for unordered character states. Input files for usage example File: mix.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Input files for usage example 2 File: mixancfile.dat 001??1 Output file format fmix output is standard: a list of equally parsimonious trees, which will be printed as rooted or unrooted depending on which is appropriate, and, if the user chooses, a table of the number of changes of state required in each character. If the Wagner option is in force for a character, it may not be possible to unambiguously locate the places on the tree where the changes occur, as there may be multiple possibilities. If the user selects menu option 5, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If the Camin-Sokal parsimony method is invoked and the Ancestors option is also used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest state changes. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use MOVE to display the tree and examine its interior states, as the algorithm in MOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in MIX gives up more easily on displaying these states. If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will also be printed out showing the best guesses of which are the ancestral states in each character. You will find it useful to understand the difference between the Camin-Sokal parsimony criterion with unknown ancestral state and the Wagner parsimony criterion. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: mix.fmix Mixed parsimony algorithm, version 3.69.650 Wagner parsimony method 4 trees in all found +--Epsilon +-----4 ! +--Gamma +--2 ! ! +--Delta --1 +-----3 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 9.000 +--------Gamma ! +--2 +--Epsilon ! ! +--4 ! +--3 +--Delta --1 ! ! +-----Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 9.000 +--------Epsilon +--4 ! ! +-----Gamma ! +--2 --1 ! +--Delta ! +--3 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 9.000 +--------Gamma +--2 ! ! +-----Epsilon ! +--4 --1 ! +--Delta ! +--3 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 9.000 File: mix.treefile (((Epsilon,Gamma),(Delta,Beta)),Alpha)[0.2500]; ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.2500]; ((Epsilon,(Gamma,(Delta,Beta))),Alpha)[0.2500]; ((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.2500]; Output files for usage example 2 File: mix.fmix Mixed parsimony algorithm, version 3.69.650 5 species, 6 characters Wagner parsimony method Name Characters ---- ---------- Alpha 11011 0 Beta 11000 0 Gamma 10011 0 Delta 00100 1 Epsilon 00111 0 Ancestral states: 001?? 1 One most parsimonious tree found: +-----------Delta --3 ! +--------Epsilon +--4 ! +-----Gamma +--2 ! +--Beta +--1 +--Alpha requires a total of 8.000 best guesses of ancestral states: 0 1 2 3 4 5 6 7 8 9 *-------------------- 0! 0 0 1 ? ? 1 File: mix.treefile (Delta,(Epsilon,(Gamma,(Beta,Alpha)))); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdollop Dollo and polymorphism parsimony algorithm fdolpenny Penny algorithm Dollo or polymorphism ffactor Multistate to binary recoding program fmove Interactive mixed method parsimony fpars Discrete character parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fpenny.txt0000664000175000017500000006506312171064331015437 00000000000000 fpenny Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Penny algorithm, branch-and-bound Description Finds all most parsimonious phylogenies for discrete-character data with two states, for the Wagner, Camin-Sokal, and mixed parsimony criteria using the branch-and-bound method of exact search. May be impractical (depending on the data) for more than 10-11 species. Algorithm PENNY is a program that will find all of the most parsimonious trees implied by your data. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion). There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW. The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times). The Algorithm The search strategy used by PENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of steps. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated. Actually the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. Its number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups): Make tree of first two species (A,B) Add C in first place ((A,B),C) Add D in first place (((A,D),B),C) Add D in second place ((A,(B,D)),C) Add D in third place (((A,B),D),C) Add D in fourth place ((A,B),(C,D)) Add D in fifth place (((A,B),C),D) Add C in second place: ((A,C),B) Add D in first place (((A,D),C),B) Add D in second place ((A,(C,D)),B) Add D in third place (((A,C),D),B) Add D in fourth place ((A,C),(B,D)) Add D in fifth place (((A,C),B),D) Add C in third place (A,(B,C)) Add D in first place ((A,D),(B,C)) Add D in second place (A,((B,D),C)) Add D in third place (A,(B,(C,D))) Add D in fourth place (A,((B,C),D)) Add D in fifth place ((A,(B,C)),D) Among these fifteen trees you will find all of the four-species rooted bifurcating trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of steps is evaluated. In addition a prediction is made as to how many steps will be added, at a minimum, as further species are added. This is done by counting how many binary characters which are invariant in the data up the species most recently added will ultimately show variation when further species are added. Thus if 20 characters vary among species A, B, and C and their root, and if tree ((A,C),B) requires 24 steps, then if there are 8 more characters which will be seen to vary when species D is added, we can immediately say that no matter how we add D, the resulting tree can have no less than 24 + 8 = 32 steps. The point of all this is that if a previously-found tree such as ((A,B),(C,D)) required only 30 steps, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther. The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time. The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible. The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer steps than these, it clears out the list and restarts the list with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems. Controlling Run Times Among the quantities available to be set at the beginning of a run of PENNY, two (howoften and howmany) are of particular importance. As PENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 1000, the default settings. Every time 100 trees have been examined, PENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far. When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften times howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to wait for. In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften and nothing will be printed out except some headings and then the final trees. The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate. A constant that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if "maxtrees" is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If "maxtrees" is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that "maxtrees" be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, "maxtrees" is set to 100 in the distribution copy. Methods and Options The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in MIX, and thus the remainder of this document will be nearly identical to the MIX document. MIX is a general parsimony program which carries out the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified. The program defaults to carrying out Wagner parsimony. The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin-Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969). Here are the assumptions of these two methods: 1. Ancestral states are known (Camin-Sokal) or unknown (Wagner). 2. Different characters evolve independently. 3. Different lineages evolve independently. 4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner). 5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question. 6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes. 7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Usage Here is a sample session with fpenny % fpenny Penny algorithm, branch-and-bound Phylip character discrete states file: penny.dat Phylip penny program output file [penny.fpenny]: How many trees looked Approximate at so far Length of How many percentage (multiples shortest tree trees this long searched of 100): found so far found so far so far ---------- ------------ ------------ ------------ 1 8.00000 1 6.67 2 8.00000 3 20.00 3 8.00000 3 53.33 4 8.00000 3 93.33 Output written to file "penny.fpenny" Trees also written onto file "penny.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Penny algorithm, branch-and-bound Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing one or more data sets [-outfile] outfile [*.fpenny] Phylip penny program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Phylip weights file (optional) -ancfile properties Phylip ancestral states file (optional) -mixfile properties Phylip mix output file (optional) -method menu [Wagner] Choose the method to use (Values: Wag (Wagner); Cam (Camin-Sokal); Mix (Mixed)) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -howmany integer [1000] How many groups of trees (Any integer value) -howoften integer [100] How often to report, in trees (Any integer value) -simple boolean Branch and bound is simple -threshold float [$(infile.discretesize)] Threshold value (Number 1.000 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fpenny] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -stepbox boolean [N] Print out steps in each site -ancseq boolean [N] Print states at all nodes of tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fpenny reads discrste character data. (0,1) Discrete character data These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both". There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form: 1 ---> 0 ---> 2 | | V 3 so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters: Old State New States --- ----- --- ------ 0 001 1 000 2 011 3 101 The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops. However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979). If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR. We now also have the program PARS, which can do parsimony for unordered character states. Input files for usage example File: penny.dat 7 6 Alpha1 110110 Alpha2 110110 Beta1 110000 Beta2 110000 Gamma1 100110 Delta 001001 Epsilon 001110 Output file format fpenny output is standard: a set of trees, which will be printed as rooted or unrooted depending on which is appropriate, and if the user elects to see them, tables of the number of changes of state required in each character. If the Wagner option is in force for a character, it may not be possible to unambiguously locate the places on the tree where the changes occur, as there may be multiple possibilities. A table is available to be printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If the Camin-Sokal parsimony method (option C or S) is invoked and the A option is also used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest state changes. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use MOVE to display the tree and examine its interior states, as the algorithm in MOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in PENNY gives up more easily on displaying these states. If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will be printed out showing the best guesses of which are the ancestral states in each character. You will find it useful to understand the difference between the Camin-Sokal parsimony criterion with unknown ancestral state and the Wagner parsimony criterion. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: penny.fpenny Penny algorithm, version 3.69.650 branch-and-bound to find all most parsimonious trees Wagner parsimony method requires a total of 8.000 3 trees in all found +-----------------Alpha1 ! ! +--------Alpha2 --1 ! ! +-----4 +--Epsilon ! ! ! +--6 ! ! +--5 +--Delta +--2 ! ! +-----Gamma1 ! ! +--Beta2 +-----------3 +--Beta1 remember: this is an unrooted tree! +-----------------Alpha1 ! --1 +--------------Alpha2 ! ! ! ! +--Epsilon +--2 +--6 ! +-----5 +--Delta ! ! ! +--4 +-----Gamma1 ! ! +--Beta2 +--------3 +--Beta1 remember: this is an unrooted tree! +-----------------Alpha1 ! ! +-----Alpha2 --1 +--------2 ! ! ! +--Beta2 ! ! +--3 +--4 +--Beta1 ! ! +--Epsilon ! +--6 +--------5 +--Delta ! +-----Gamma1 remember: this is an unrooted tree! File: penny.treefile (Alpha1,((Alpha2,((Epsilon,Delta),Gamma1)),(Beta2,Beta1)))[0.3333]; (Alpha1,(Alpha2,(((Epsilon,Delta),Gamma1),(Beta2,Beta1))))[0.3333]; (Alpha1,((Alpha2,(Beta2,Beta1)),((Epsilon,Delta),Gamma1)))[0.3333]; Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdollop Dollo and polymorphism parsimony algorithm fdolpenny Penny algorithm Dollo or polymorphism ffactor Multistate to binary recoding program fmix Mixed parsimony algorithm fmove Interactive mixed method parsimony fpars Discrete character parsimony Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/ffactor.txt0000664000175000017500000003252112171064331015555 00000000000000 ffactor Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Multistate to binary recoding program Description Takes discrete multistate data with character state trees and produces the corresponding data set with two states (0 and 1). Written by Christopher Meacham. This program was formerly used to accomodate multistate characters in MIX, but this is less necessary now that PARS is available. Algorithm This program factors a data set that contains multistate characters, creating a data set consisting entirely of binary (0,1) characters that, in turn, can be used as input to any of the other discrete character programs in this package, except for PARS. Besides this primary function, FACTOR also provides an easy way of deleting characters from a data set. The input format for FACTOR is very similar to the input format for the other discrete character programs except for the addition of character-state tree descriptions. Note that this program has no way of converting an unordered multistate character into binary characters. Fortunately, PARS has joined the package, and it enables unordered multistate characters, in which any state can change to any other in one step, to be analyzed with parsimony. FACTOR is really for a different case, that in which there are multiple states related on a "character state tree", which specifies for each state which other states it can change to. That graph of states is assumed to be a tree, with no loops in it. Usage Here is a sample session with ffactor % ffactor Multistate to binary recoding program Phylip factor program input file: factor.dat Phylip factor program output file [factor.ffactor]: Data matrix written on file "factor.ffactor" Done. Go to the input files for this example Go to the output files for this example Command line arguments Multistate to binary recoding program Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] infile Phylip factor program input file [-outfile] outfile [*.ffactor] Phylip factor program output file Additional (Optional) qualifiers: -anc boolean [N] Put ancestral states in output file -factors boolean [N] Put factors information in output file -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: -outfactorfile outfile [*.ffactor] Phylip factor data output file (optional) -outancfile outfile [*.ffactor] Phylip ancestor data output file (optional) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outfactorfile" associated qualifiers -odirectory string Output directory "-outancfile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format ffactor reads character state tree data. This program factors a data set that contains multistate characters, creating a data set consisting entirely of binary (0,1) characters that, in turn, can be used as input to any of the other discrete character programs in this package, except for PARS. Besides this primary function, FACTOR also provides an easy way of deleting characters from a data set. The input format for FACTOR is very similar to the input format for the other discrete character programs except for the addition of character-state tree descriptions. Note that this program has no way of converting an unordered multistate character into binary characters. Fortunately, PARS has joined the package, and it enables unordered multistate characters, in which any state can change to any other in one step, to be analyzed with parsimony. FACTOR is really for a different case, that in which there are multiple states related on a "character state tree", which specifies for each state which other states it can change to. That graph of states is assumed to be a tree, with no loops in it. First line The first line of the input file should contain the number of species and the number of multistate characters. This first line is followed by the lines describing the character-state trees, one description per line. The species information constitutes the last part of the file. Any number of lines may be used for a single species. The first line is free format with the number of species first, separated by at least one blank (space) from the number of multistate characters, which in turn is separated by at least one blank from the options, if present. Character-state tree descriptions The character-state trees are described in free format. The character number of the multistate character is given first followed by the description of the tree itself. Each description must be completed on a single line. Each character that is to be factored must have a description, and the characters must be described in the order that they occur in the input, that is, in numerical order. The tree is described by listing the pairs of character states that are adjacent to each other in the character-state tree. The two character states in each adjacent pair are separated by a colon (":"). If character fifteen has this character state tree for possible states "A", "B", "C", and "D": A ---- B ---- C | | | D then the character-state tree description would be 15 A:B B:C D:B Note that either symbol may appear first. The ancestral state is identified, if desired, by putting it "adjacent" to a period. If we wanted to root character fifteen at state C: A <--- B <--- C | | V D we could write 15 B:D A:B C:B .:C Both the order in which the pairs are listed and the order of the symbols in each pair are arbitrary. However, each pair may only appear once in the list. Any symbols may be used for a character state in the input except the character that signals the connection between two states (in the distribution copy this is set to ":"), ".", and, of course, a blank. Blanks are ignored completely in the tree description so that even B:DA:BC:B.:C or B : DA : BC : B. : C would be equivalent to the above example. However, at least one blank must separate the character number from the tree description. Deleting characters from a data set If no description line appears in the input for a particular character, then that character will be omitted from the output. If the character number is given on the line, but no character-state tree is provided, then the symbol for the character in the input will be copied directly to the output without change. This is useful for characters that are already coded "0" and "1". Characters can be deleted from a data set simply by listing only those that are to appear in the output. Terminating the list of tree descriptions The last character-state tree description should be followed by a line containing the number "999". This terminates processing of the trees and indicates the beginning of the species information. Species information The format for the species information is basically identical to the other discrete character programs. The first ten character positions are allotted to the species name (this value may be changed by altering the value of the constant nmlngth at the beginning of the program). The character states follow and may be continued to as many lines as desired. There is no current method for indicating polymorphisms. It is possible to either put blanks between characters or not. There is a method for indicating uncertainty about states. There is one character value that stands for "unknown". If this appears in the input data then "?" is written out in all the corresponding positions in the output file. The character value that designates "unknown" is given in the constant unkchar at the beginning of the program, and can be changed by changing that constant. It is set to "?" in the distribution copy. Input files for usage example File: factor.dat 4 6 1 A:B B:C 2 A:B B:. 4 5 0:1 1:2 .:0 6 .:# #:$ #:% 999 Alpha CAW00# Beta BBX01% Gamma ABY12# Epsilon CAZ01$ Output file format The first line of ffactor output will contain the number of species and the number of binary characters in the factored data set followed by the letter "A" if the A option was specified in the input. If option F was specified, the next line will begin "FACTORS". If option A was specified, the line describing the ancestor will follow next. Finally, the factored characters will be written for each species in the format required for input by the other discrete programs in the package. The maximum length of the output lines is 80 characters, but this maximum length can be changed prior to compilation. In fact, the format of the output file for the A and F options is not correct for the current release of PHYLIP. We need to change their output to write a factors file and an ancestors file instead of putting the Factors and Ancestors information into the data file. Output files for usage example File: factor.ffactor 4 5 Alpha CA00# Beta BB01% Gamma AB12# Epsilon CA01$ File: factor.factor File: factor.ancestor Data files None Notes None. References None. Warnings None. Diagnostic Error Messages The output should be checked for error messages. Errors will occur in the character-state tree descriptions if the format is incorrect (colons in the wrong place, etc.), if more than one root is specified, if the tree contains loops (and hence is not a tree), and if the tree is not connected, e.g. A:B B:C D:E describes A ---- B ---- C D ---- E This "tree" is in two unconnected pieces. An error will also occur if a symbol appears in the data set that is not in the tree description for that character. Blanks at the end of lines when the species information is continued to a new line will cause this kind of error. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdollop Dollo and polymorphism parsimony algorithm fdolpenny Penny algorithm Dollo or polymorphism fmix Mixed parsimony algorithm fmove Interactive mixed method parsimony fpars Discrete character parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fmove.txt0000664000175000017500000002276412171064331015255 00000000000000 fmove Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Interactive mixed method parsimony Description Interactive construction of phylogenies from discrete character data with two states (0 and 1). Evaluates parsimony and compatibility criteria for those phylogenies and displays reconstructed states throughout the tree. This can be used to find parsimony or compatibility estimates by hand. Algorithm MOVE is an interactive parsimony program, inspired by Wayne Maddison and David Maddison's marvellous program MacClade, which is written for Apple Macintosh computers. MOVE reads in a data set which is prepared in almost the same format as one for the mixed method parsimony program MIX. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different characters and the way their states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different characters are affected by changes in the tree topology. This program is compatible with fewer computer systems than the other programs in PHYLIP. It can be adapted to MSDOS systems or to any system whose screen or terminals emulate DEC VT100 terminals (such as Telnet programs for logging in to remote computers over a TCP/IP network, VT100-compatible windows in the X windowing system, and any terminal compatible with ANSI standard terminals). For any other screen types, there is a generic option which does not make use of screen graphics characters to display the character states. This will be less effective, as the states will be less easy to see when displayed. MOVE uses as its numerical criterion the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified separately. The program defaults to carrying out Wagner parsimony. The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin- Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969). Here are the assumptions of these two methods: 1. Ancestral states are known (Camin-Sokal) or unknown (Wagner). 2. Different characters evolve independently. 3. Different lineages evolve independently. 4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner). 5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question. 6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes. 7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Usage Here is a sample session with fmove % fmove Interactive mixed method parsimony Phylip character discrete states file: move.dat Phylip tree file (optional): NEXT? (R # + - S . T U W O F H J K L C ? X Q) (? for Help): Q Do you want to write out the tree to a file? (Y or N): Y 5 species, 6 characters Wagner parsimony method Computing steps needed for compatibility in characters... (unrooted) 8.0 Steps 4 chars compatible ,-----------5:Epsilon --9 ! ,--------4:Delta `--8 ! ,-----3:Gamma `--7 ! ,--2:Beta `--6 `--1:Alpha Tree written to file "move.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Interactive mixed method parsimony Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing data set [-intreefile] tree Phylip tree file (optional) Additional (Optional) qualifiers: -weights properties Weights file -ancfile properties Ancestral states file -factorfile properties Factors file -method menu [Wagner] Choose the method to use (Values: w (Wagner); c (Camin-Sokal); m (Mixed)) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -threshold float [$(infile.discretesize)] Threshold value (Number 0.000 or more) -initialtree menu [Arbitary] Initial tree (Values: a (Arbitary); u (User); s (Specify)) -screenwidth integer [80] Width of terminal screen in characters (Any integer value) -screenlines integer [24] Number of lines on screen (Any integer value) -outtreefile outfile [*.fmove] Phylip tree output file (optional) Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format The fmove input data file is set up almost identically to the data files for MIX. Input files for usage example File: move.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Output file format fmove outputs a graph to the specified graphics device. outputs a report format file. The default format is ... Output files for usage example File: move.treefile (Epsilon,(Delta,(Gamma,(Beta,Alpha)))); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdollop Dollo and polymorphism parsimony algorithm fdolpenny Penny algorithm Dollo or polymorphism ffactor Multistate to binary recoding program fmix Mixed parsimony algorithm fpars Discrete character parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fcontml.txt0000664000175000017500000006031312171064331015573 00000000000000 fcontml Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Gene frequency and continuous character maximum likelihood Description Estimates phylogenies from gene frequency data by maximum likelihood under a model in which all divergence is due to genetic drift in the absence of new mutations. Does not assume a molecular clock. An alternative method of analyzing this data is to compute Nei's genetic distance and use one of the distance matrix programs. This program can also do maximum likelihood analysis of continuous characters that evolve by a Brownian Motion model, but it assumes that the characters evolve at equal rates and in an uncorrelated fashion, so that it does not take into account the usual correlations of characters. Algorithm This program estimates phylogenies by the restricted maximum likelihood method based on the Brownian motion model. It is based on the model of Edwards and Cavalli-Sforza (1964; Cavalli-Sforza and Edwards, 1967). Gomberg (1966), Felsenstein (1973b, 1981c) and Thompson (1975) have done extensive further work leading to efficient algorithms. CONTML uses restricted maximum likelihood estimation (REML), which is the criterion used by Felsenstein (1973b). The actual algorithm is an iterative EM Algorithm (Dempster, Laird, and Rubin, 1977) which is guaranteed to always give increasing likelihoods. The algorithm is described in detail in a paper of mine (Felsenstein, 1981c), which you should definitely consult if you are going to use this program. Some simulation tests of it are given by Rohlf and Wooten (1988) and Kim and Burgman (1988). The default (gene frequency) mode treats the input as gene frequencies at a series of loci, and square-root-transforms the allele frequencies (constructing the frequency of the missing allele at each locus first). This enables us to use the Brownian motion model on the resulting coordinates, in an approximation equivalent to using Cavalli-Sforza and Edwards's (1967) chord measure of genetic distance and taking that to give distance between particles undergoing pure Brownian motion. It assumes that each locus evolves independently by pure genetic drift. The alternative continuous characters mode (menu option C) treats the input as a series of coordinates of each species in N dimensions. It assumes that we have transformed the characters to remove correlations and to standardize their variances. A word about microsatellite data Many current users of CONTML use it to analyze microsatellite data. There are three ways to do this: * Coding each copy number as an allele, and feeding in the frequencies of these alleles. As CONTML's gene frequency mode assumes that all change is by genetic drift, this means that no copy number arises by mutation during the divergence of the populations. Since microsatellite loci have very high mutation rates, this is questionable. * Use some other program, one not in the PHYLIP package, to compute distances among the populations. Some of the programs that can do this are RSTCalc, poptrfdos, Microsat, and Populations. Links to them can be found at my Phylogeny Programs web site at http://evolution.gs.washington.edu/phylip/software.html. Those distance measures allow for mutation during the divergence of the populations. But even they are not perfect -- they do not allow us to use all the information contained in the gene frequency differences of within a copy number allele. There is a need for a more complete statistical treatment of inference of phylogenies from microsatellite models, ones that take both mutation and genetic drift fully into account. * Alternatively, there is the Brownian motion approximation to mean population copy number. This is described in my book (Felsenstein, 2004, Chapter 15, pp. 242-245), and it is implicit also in the microsatellite distances. Each locus is coded as a single continuous character, the mean of the copy number in at that microsatellite locus in that species. Thus if the species (or population) has frequencies 0.10, 0.24, 0.60, and 0.06 of alleles that have 18, 19, 20, and 21 copies, it is coded as having 0.10 X 18 + 0.24 X 19 + 0.60 X 20 + 0.06 X 21 = 19.62 copies. These values can, I believe, be calculated by a spreadsheet program. Each microsatellite is represented by one character, and the continuous character mode of CONTML is used (not the gene frequencies mode). This coding allows for mutation that changes copy number. It does not make complete use of all data, but neither does the treatment of microsatellite gene frequencies as changing only genetic drift. frequency Usage Here is a sample session with fcontml % fcontml -printdata Gene frequency and continuous character maximum likelihood Input file: contml.dat Phylip tree file (optional): Phylip contml program output file [contml.fcontml]: Adding species: 1. European 2. African 3. Chinese 4. American 5. Australian Output written to file "contml.fcontml" Tree also written onto file "contml.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Gene frequency and continuous character maximum likelihood Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] frequencies File containing one or more sets of data [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fcontml] Phylip contml program output file Additional (Optional) qualifiers (* if not always prompted): -datatype menu [g] Input type in infile (Values: g (Gene frequencies); i (Continuous characters)) * -lengths boolean [N] Use branch lengths from user trees * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) * -global boolean [N] Global rearrangements -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fcontml] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fcontml reads continuous character data. Continuous character data The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny. When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions: 1. Different lineages evolve independently. 2. After two lineages split, their characters change independently. 3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method). 4. Different loci or characters drift independently. How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c). The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option: 5 3 2 3 2 Alpha 0.90 0.80 0.10 0.56 Beta 0.72 0.54 0.30 0.20 Gamma 0.38 0.10 0.05 0.98 Delta 0.42 0.40 0.43 0.97 Epsilon 0.10 0.30 0.70 0.62 whereas here is what it would have to look like if the A option were invoked: 5 3 2 3 2 Alpha 0.90 0.10 0.80 0.10 0.10 0.56 0.44 Beta 0.72 0.28 0.54 0.30 0.16 0.20 0.80 Gamma 0.38 0.62 0.10 0.05 0.85 0.98 0.02 Delta 0.42 0.58 0.40 0.43 0.17 0.97 0.03 Epsilon 0.10 0.90 0.30 0.70 0.00 0.62 0.38 The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name). If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message. While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points. CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables. CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set. In the quantitative characters mode, a typical small data set would be: 5 6 Alpha 0.345 0.467 1.213 2.2 -1.2 1.0 Beta 0.457 0.444 1.1 1.987 -0.2 2.678 Gamma 0.6 0.12 0.97 2.3 -0.11 1.54 Delta 0.68 0.203 0.888 2.0 1.67 Epsilon 0.297 0.22 0.90 1.9 1.74 Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale. For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs. Input files for usage example File: contml.dat 5 10 2 2 2 2 2 2 2 2 2 2 European 0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205 0.8055 0.5043 African 0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600 0.7582 0.6207 Chinese 0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726 0.7482 0.7334 American 0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000 0.8086 0.8636 Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396 0.9097 0.2976 Output file format fcontml output has a standard appearance. The topology of the tree is given by an unrooted tree diagram. The lengths (in time or in expected amounts of variance) are given in a table below the topology, and a rough confidence interval given for each length. Negative lower bounds on length indicate that rearrangements may be acceptable. The units of length are amounts of expected accumulated variance (not time). The log likelihood (natural log) of each tree is also given, and it is indicated how many topologies have been tried. The tree does not necessarily have all tips contemporary, and the log likelihood may be either positive or negative (this simply corresponds to whether the density function does or does not exceed 1) and a negative log likelihood does not indicate any error. The log likelihood allows various formal likelihood ratio hypothesis tests. The description of the tree includes approximate standard errors on the lengths of segments of the tree. These are calculated by considering only the curvature of the likelihood surface as the length of the segment is varied, holding all other lengths constant. As such they are most probably underestimates of the variance, and hence may give too much confidence in the given tree. One should use caution in interpreting the likelihoods that are printed out. If the model is wrong, it will not be possible to use the likelihoods to make formal statistical statements. Thus, if gene frequencies are being analyzed, but the gene frequencies change not only by genetic drift, but also by mutation, the model is not correct. It would be as well-justified in this case to use GENDIST to compute the Nei (1972) genetic distance and then use FITCH, KITSCH or NEIGHBOR to make a tree. If continuous characters are being analyzed, but if the characters have not been transformed to new coordinates that evolve independently and at equal rates, then the model is also violated and no statistical analysis is possible. Doing such a transformation is not easy, and usually not even possible. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across loci. If the two trees means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. The version used here is a multivariate normal approximation to their test; it is due to Shimodaira (1998). The variances and covariances of the sum of log likelihoods across loci are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. One problem which sometimes arises is that the program is fed two species (or populations) with identical transformed gene frequencies: this can happen if sample sizes are small and/or many loci are monomorphic. In this case the program "gets its knickers in a twist" and can divide by zero, usually causing a crash. If you suspect that this has happened, check for two species with identical coordinates. If you find them, eliminate one from the problem: the two must always show up as being at the same point on the tree anyway. Output files for usage example File: contml.fcontml Continuous character Maximum Likelihood method version 3.69.650 5 Populations, 10 Loci Numbers of alleles at the loci: ------- -- ------- -- --- ----- 2 2 2 2 2 2 2 2 2 2 Name Gene Frequencies ---- ---- ----------- locus: 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 European 0.28680 0.71320 0.56840 0.43160 0.44220 0.55780 0.42860 0.57140 0.38280 0.61720 0.72850 0.27150 0.63860 0.36140 0.02050 0.97950 0.80550 0.19450 0.50430 0.49570 African 0.13560 0.86440 0.48400 0.51600 0.06020 0.93980 0.03970 0.96030 0.59770 0.40230 0.96750 0.03250 0.95110 0.04890 0.06000 0.94000 0.75820 0.24180 0.62070 0.37930 Chinese 0.16280 0.83720 0.59580 0.40420 0.72980 0.27020 1.00000 0.00000 0.38110 0.61890 0.79860 0.20140 0.77820 0.22180 0.07260 0.92740 0.74820 0.25180 0.73340 0.26660 American 0.01440 0.98560 0.69900 0.30100 0.32800 0.67200 0.74210 0.25790 0.66060 0.33940 0.86030 0.13970 0.79240 0.20760 0.00000 1.00000 0.80860 0.19140 0.86360 0.13640 Australian 0.12110 0.87890 0.22740 0.77260 0.58210 0.41790 1.00000 0.00000 0.20180 0.79820 0.90000 0.10000 0.98370 0.01630 0.03960 0.96040 0.90970 0.09030 0.29760 0.70240 +-----------------------------------------------------------African ! ! +-------------------------------Australian 1-------------3 ! ! +-----------------------American ! +-----2 ! +Chinese ! +European remember: this is an unrooted tree! Ln Likelihood = 38.71914 Between And Length Approx. Confidence Limits ------- --- ------ ------- ---------- ------ 1 African 0.09693444 ( 0.03123910, 0.19853604) 1 3 0.02252816 ( 0.00089799, 0.05598045) 3 Australian 0.05247405 ( 0.01177094, 0.11542374) 3 2 0.00945315 ( -0.00897717, 0.03795670) 2 American 0.03806240 ( 0.01095938, 0.07997877) 2 Chinese 0.00208822 ( -0.00960622, 0.02017433) 1 European 0.00000000 ( -0.01627246, 0.02516630) File: contml.treefile (African:0.09693444,(Australian:0.05247405,(American:0.03806240,Chinese:0.002088 22):0.00945315):0.02252816, European:0.00000000); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description egendist Genetic distance matrix program fgendist Compute genetic distances from gene frequencies Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdollop.txt0000664000175000017500000004515012171064331015572 00000000000000 fdollop Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Dollo and polymorphism parsimony algorithm Description Estimates phylogenies by the Dollo or polymorphism parsimony criteria for discrete character data with two states (0 and 1). Also reconstructs ancestral states and allows weighting of characters. Dollo parsimony is particularly appropriate for restriction sites data; with ancestor states specified as unknown it may be appropriate for restriction fragments data. Algorithm This program carries out the Dollo and polymorphism parsimony methods. The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary. The assumptions of this method are in effect: 1. We know which state is the ancestral one (state 0). 2. The characters are evolving independently. 3. Different lineages evolve independently. 4. The probability of a forward change (0-->1) is small over the evolutionary times involved. 5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change. 6. Retention of polymorphism for both states (0 and 1) is highly improbable. 7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment. One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred. The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was independently published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained. The assumptions of the polymorphism parsimony method are in effect: 1. The ancestral state (state 0) is known in each character. 2. The characters are evolving independently of each other. 3. Different lineages are evolving independently. 4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group. 5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change. 6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism. 7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Usage Here is a sample session with fdollop % fdollop Dollo and polymorphism parsimony algorithm Phylip character discrete states file: dollop.dat Phylip tree file (optional): Phylip dollop program output file [dollop.fdollop]: Dollo and polymorphism parsimony algorithm, version 3.69.650 Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements !---------! ......... ......... Output written to file "dollop.fdollop" Trees also written onto file "dollop.treefile" Go to the input files for this example Go to the output files for this example Command line arguments Dollo and polymorphism parsimony algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing one or more data sets [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fdollop] Phylip dollop program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Phylip weights file (optional) -ancfile properties Ancestral states file -method menu [d] Parsimony method (Values: d (Dollo); p (Polymorphism)) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -threshold float [$(infile.discretesize)] Threshold value (Number 0.000 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdollop] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -ancseq boolean [N] Print states at all nodes of tree -stepbox boolean [N] Print out steps in each character Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdollop reads discrete character data with "?", "P", "B" states allowed. . (0,1) Discrete character data These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both". There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form: 1 ---> 0 ---> 2 | | V 3 so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters: Old State New States --- ----- --- ------ 0 001 1 000 2 011 3 101 The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops. However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979). If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR. We now also have the program PARS, which can do parsimony for unordered character states. Input files for usage example File: dollop.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Output file format fdollop output is standard: a list of equally parsimonious trees, and, if the user selects menu option 4, a table of the numbers of reversions or retentions of polymorphism necessary in each character. If any of the ancestral states has been specified to be unknown, a table of reconstructed ancestral states is also provided. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the tabulated counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1. If the user selects menu option 5, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there may be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the best tree. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use DOLMOVE to display the tree and examine its interior states, as the algorithm in DOLMOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in DOLLOP gives up more easily on displaying these states. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. Output files for usage example File: dollop.fdollop Dollo and polymorphism parsimony algorithm, version 3.69.650 Dollo parsimony method One most parsimonious tree found: +-----------Delta --3 ! +--------Epsilon +--4 ! +-----Gamma +--2 ! +--Beta +--1 +--Alpha requires a total of 3.000 File: dollop.treefile (Delta,(Epsilon,(Gamma,(Beta,Alpha)))); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdolpenny Penny algorithm Dollo or polymorphism ffactor Multistate to binary recoding program fmix Mixed parsimony algorithm fmove Interactive mixed method parsimony fpars Discrete character parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fconsense.txt0000664000175000017500000003336012171064331016116 00000000000000 fconsense Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Majority-rule and strict consensus tree Description Computes consensus trees by the majority-rule consensus tree method, which also allows one to easily find the strict consensus tree. Is not able to compute the Adams consensus tree. Trees are input in a tree file in standard nested-parenthesis notation, which is produced by many of the tree estimation programs in the package. This program can be used as the final step in doing bootstrap analyses for many of the methods in the package. Algorithm fconsense reads a file of computer-readable trees and prints out (and may also write out onto a file) a consensus tree. At the moment it carries out a family of consensus tree methods called the Ml methods (Margush and McMorris, 1981). These include strict consensus and majority rule consensus. Basically the consensus tree consists of monophyletic groups that occur as often as possible in the data. If a group occurs in more than a fraction l of all the input trees it will definitely appear in the consensus tree. The tree printed out has at each fork a number indicating how many times the group which consists of the species to the right of (descended from) the fork occurred. Thus if we read in 15 trees and find that a fork has the number 15, that group occurred in all of the trees. The strict consensus tree consists of all groups that occurred 100% of the time, the rest of the resolution being ignored. The tree printed out here includes groups down to 50%, and below it until the tree is fully resolved. The majority rule consensus tree consists of all groups that occur more than 50% of the time. Any other percentage level between 50% and 100% can also be used, and that is why the program in effect carries out a family of methods. You have to decide on the percentage level, figure out for yourself what number of occurrences that would be (e.g. 15 in the above case for 100%), and resolutely ignore any group below that number. Do not use numbers at or below 50%, because some groups occurring (say) 35% of the time will not be shown on the tree. The collection of all groups that occur 35% or more of the time may include two groups that are mutually self contradictory and cannot appear in the same tree. In this program, as the default method I have included groups that occur less than 50% of the time, working downwards in their frequency of occurrence, as long as they continue to resolve the tree and do not contradict more frequent groups. In this respect the method is similar to the Nelson consensus method (Nelson, 1979) as explicated by Page (1989) although it is not identical to it. The program can also carry out Strict consensus, Majority Rule consensus without the extension which adds groups until the tree is fully resolved, and other members of the Ml family, where the user supplied the fraction of times the group must appear in the input trees to be included in the consensus tree. For the moment the program cannot carry out any other consensus tree method, such as Adams consensus (Adams, 1972, 1986) or methods based on quadruples of species (Estabrook, McMorris, and Meacham, 1985). Usage Here is a sample session with fconsense % fconsense Majority-rule and strict consensus tree Phylip tree file: consense.dat Phylip consense program output file [consense.fconsense]: Consensus tree written to file "consense.treefile" Output written to file "consense.fconsense" Done. Go to the input files for this example Go to the output files for this example Command line arguments Majority-rule and strict consensus tree Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-intreefile] tree Phylip tree file [-outfile] outfile [*.fconsense] Phylip consense program output file Additional (Optional) qualifiers (* if not always prompted): -method menu [mre] Consensus method (Values: s (strict consensus tree); mr (Majority Rule); mre (Majority Rule (extended)); ml (Minimum fraction (0.5 to 1.0))) * -mlfrac float [0.5] Fraction (l) of times a branch must appear (Number from 0.500 to 1.000) -root toggle [N] Trees to be treated as Rooted -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fconsense] Phylip tree output file (optional) -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -[no]prntsets boolean [Y] Print out the sets of species Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fconsense reads any normal sequence USAs. Input files for usage example File: consense.dat (A,(B,(H,(D,(J,(((G,E),(F,I)),C)))))); (A,(B,(D,((J,H),(((G,E),(F,I)),C))))); (A,(B,(D,(H,(J,(((G,E),(F,I)),C)))))); (A,(B,(E,(G,((F,I),((J,(H,D)),C)))))); (A,(B,(E,(G,((F,I),(((J,H),D),C)))))); (A,(B,(E,((F,I),(G,((J,(H,D)),C)))))); (A,(B,(E,((F,I),(G,(((J,H),D),C)))))); (A,(B,(E,((G,(F,I)),((J,(H,D)),C))))); (A,(B,(E,((G,(F,I)),(((J,H),D),C))))); Output file format fconsense output is a list of the species (in the order in which they appear in the first tree, which is the numerical order used in the program), a list of the subsets that appear in the consensus tree, a list of those that appeared in one or another of the individual trees but did not occur frequently enough to get into the consensus tree, followed by a diagram showing the consensus tree. The lists of subsets consists of a row of symbols, each either "." or "*". The species that are in the set are marked by "*". Every ten species there is a blank, to help you keep track of the alignment of columns. The order of symbols corresponds to the order of species in the species list. Thus a set that consisted of the second, seventh, and eighth out of 13 species would be represented by: .*....**.. ... Note that if the trees are unrooted the final tree will have one group, consisting of every species except the Outgroup (which by default is the first species encountered on the first tree), which always appears. It will not be listed in either of the lists of sets, but it will be shown in the final tree as occurring all of the time. This is hardly surprising: in telling the program that this species is the outgroup we have specified that the set consisting of all of the others is always a monophyletic set. So this is not to be taken as interesting information, despite its dramatic appearance. Output files for usage example File: consense.fconsense Consensus tree program, version 3.69.650 Species in order: 1. A 2. B 3. H 4. D 5. J 6. G 7. E 8. F 9. I 10. C Sets included in the consensus tree Set (species in order) How many times out of 9.00 .......**. 9.00 ..******** 9.00 ..****.*** 6.00 ..***..... 6.00 ..***....* 6.00 ..*.*..... 4.00 ..***..*** 2.00 Sets NOT included in consensus tree: Set (species in order) How many times out of 9.00 .....**... 3.00 .....***** 3.00 ..**...... 3.00 .....****. 3.00 ..****...* 2.00 .....*.**. 2.00 ..*.****** 2.00 ....****** 2.00 ...******* 1.00 Extended majority rule consensus tree CONSENSUS TREE: the numbers on the branches indicate the number of times the partition of the species into the two sets which are separated by that branch occurred among the trees, out of 9.00 trees +-----------------------C | +--6.00-| +-------H | | +--4.00-| | +--6.00-| +-------J +--2.00-| | | | +---------------D | | +--6.00-| | +-------F | | +------------------9.00-| | | +-------I +--9.00-| | | | +---------------------------------------G +-------| | | | +-----------------------------------------------E | | | +-------------------------------------------------------B | +---------------------------------------------------------------A remember: this is an unrooted tree! File: consense.treefile ((((((C:9.00,((H:9.00,J:9.00):4.00,D:9.00):6.00):6.00,(F:9.00,I:9.00):9.00):2.00 ,G:9.00):6.00, E:9.00):9.00,B:9.00):9.00,A:9.00); Branch Lengths on the Consensus Tree? Note that the lengths on the tree on the output tree file are not branch lengths but the number of times that each group appeared in the input trees. This number is the sum of the weights of the trees in which it appeared, so that if there are 11 trees, ten of them having weight 0.1 and one weight 1.0, a group that appeared in the last tree and in 6 others would be shown as appearing 1.6 times and its branch length will be 1.6. This means that if you take the consensus tree from the output tree file and try to draw it, the branch lengths will be strange. I am often asked how to put the correct branch lengths on these (this is one of our Frequently Asked Questions). There is no simple answer to this. It depends on what "correct" means. For example, if you have a group of species that shows up in 80% of the trees, and the branch leading to that group has average length 0.1 among that 80%, is the "correct" length 0.1? Or is it (0.80 x 0.1)? There is no simple answer. However, if you want to take the consensus tree as an estimate of the true tree (rather than as an indicator of the conflicts among trees) you may be able to use the User Tree (option U) mode of the phylogeny program that you used, and use it to put branch lengths on that tree. Thus, if you used DNAML, you can take the consensus tree, make sure it is an unrooted tree, and feed that to DNAML using the original data set (before bootstrapping) and DNAML's option U. As DNAML wants an unrooted tree, you may have to use RETREE to make the tree unrooted (using the W option of RETREE and choosing the unrooted option within it). Of course you will also want to change the tree file name from "outree" to "intree". If you used a phylogeny program that does not infer branch lengths, you might want to use a different one (such as FITCH or DNAML) to infer the branch lengths, again making sure the tree is unrooted, if the program needs that. Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description econsense Majority-rule and strict consensus tree ftreedist Calculate distances between trees ftreedistpair Calculate distance between two sets of trees Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/Makefile0000664000175000017500000003534212171071711015034 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # emboss_doc/text/Makefile. Generated from Makefile.in by configure. # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/PHYLIPNEW pkglibdir = $(libdir)/PHYLIPNEW pkglibexecdir = $(libexecdir)/PHYLIPNEW am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = x86_64-unknown-linux-gnu host_triplet = x86_64-unknown-linux-gnu subdir = emboss_doc/text DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/EMBOSS/doc/programs/text ACLOCAL = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run aclocal-1.12 AMTAR = $${TAR-tar} ANT = AR = ar AUTOCONF = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoconf AUTOHEADER = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoheader AUTOMAKE = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run automake-1.12 AWK = gawk CC = gcc CCDEPMODE = depmode=gcc3 CFLAGS = -O2 CPP = gcc -E CPPFLAGS = -DAJ_LinuxLF -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 CXX = g++ CXXCPP = g++ -E CXXDEPMODE = depmode=gcc3 CXXFLAGS = -g -O2 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps DEVWARN_CFLAGS = DLLTOOL = false DSYMUTIL = DUMPBIN = ECHO_C = ECHO_N = -n ECHO_T = EGREP = /usr/bin/grep -E EXEEXT = FGREP = /usr/bin/grep -F GREP = /usr/bin/grep HAVE_MEMMOVE = HAVE_STRERROR = INSTALL = /usr/bin/install -c INSTALL_DATA = ${INSTALL} -m 644 INSTALL_PROGRAM = ${INSTALL} INSTALL_SCRIPT = ${INSTALL} INSTALL_STRIP_PROGRAM = $(install_sh) -c -s JAR = JAVA = JAVAC = JAVA_CFLAGS = JAVA_CPPFLAGS = -DNO_AUTH JAVA_LDFLAGS = LD = /usr/bin/ld -m elf_x86_64 LDFLAGS = LIBOBJS = LIBS = -lm -lhpdf -lgd -lpng -lz -lm LIBTOOL = $(SHELL) $(top_builddir)/libtool LIPO = LN_S = ln -s LTLIBOBJS = MAKEINFO = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run makeinfo MANIFEST_TOOL = : MKDIR_P = /usr/bin/mkdir -p MYSQL_CFLAGS = -I/usr/include/mysql -g -pipe -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-strict-aliasing -fwrapv -fPIC -fPIC -g -static-libgcc -fno-omit-frame-pointer -fno-strict-aliasing -DMY_PTHREAD_FASTMUTEX=1 MYSQL_CONFIG = /usr/bin/mysql_config MYSQL_CPPFLAGS = -I/usr/include/mysql MYSQL_LDFLAGS = -L/usr/lib64/mysql -lmysqlclient -lpthread -lz -lm -lrt -lssl -lcrypto -ldl MYSQL_VERSION = 5.5.32 NM = /usr/bin/nm -B NMEDIT = OBJDUMP = objdump OBJEXT = o OTOOL = OTOOL64 = PACKAGE = PHYLIPNEW PACKAGE_BUGREPORT = emboss-bug@emboss.open-bio.org PACKAGE_NAME = PHYLIPNEW PACKAGE_STRING = PHYLIPNEW 3.69.650 PACKAGE_TARNAME = PHYLIPNEW PACKAGE_URL = http://emboss.open-bio.org/ PACKAGE_VERSION = 3.69.650 PATH_SEPARATOR = : PCRE_DATE = 11-Apr-2009 PCRE_LIB_VERSION = 0:1:0 PCRE_MAJOR = 7 PCRE_MINOR = 9 PCRE_POSIXLIB_VERSION = 0:0:0 PCRE_VERSION = 7.9 POSIX_MALLOC_THRESHOLD = -DPOSIX_MALLOC_THRESHOLD=10 POSTGRESQL_CFLAGS = -I/usr/include POSTGRESQL_CONFIG = /usr/bin/pg_config POSTGRESQL_CPPFLAGS = -I/usr/include POSTGRESQL_LDFLAGS = -L/usr/lib64 -lpq POSTGRESQL_VERSION = 9.2.4 RANLIB = ranlib SED = /usr/bin/sed SET_MAKE = SHELL = /bin/sh STRIP = strip VERSION = 3.69.650 WARN_CFLAGS = XLIB = -lX11 -lXaw -lXt XMKMF = X_CFLAGS = X_EXTRA_LIBS = X_LIBS = X_PRE_LIBS = -lSM -lICE abs_builddir = /data/scratch/embossdist/embassy/phylipnew/emboss_doc/text abs_srcdir = /data/scratch/embossdist/embassy/phylipnew/emboss_doc/text abs_top_builddir = /data/scratch/embossdist/embassy/phylipnew abs_top_srcdir = /data/scratch/embossdist/embassy/phylipnew ac_ct_AR = ar ac_ct_CC = gcc ac_ct_CXX = g++ ac_ct_DUMPBIN = am__include = include am__leading_dot = . am__quote = am__tar = $${TAR-tar} chof - "$$tardir" am__untar = $${TAR-tar} xf - bindir = ${exec_prefix}/bin build = x86_64-unknown-linux-gnu build_alias = build_cpu = x86_64 build_os = linux-gnu build_vendor = unknown builddir = . datadir = ${datarootdir} datarootdir = ${prefix}/share docdir = ${datarootdir}/doc/${PACKAGE_TARNAME} dvidir = ${docdir} embprefix = /usr/local exec_prefix = ${prefix} host = x86_64-unknown-linux-gnu host_alias = host_cpu = x86_64 host_os = linux-gnu host_vendor = unknown htmldir = ${docdir} includedir = ${prefix}/include infodir = ${datarootdir}/info install_sh = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/install-sh libdir = ${exec_prefix}/lib libexecdir = ${exec_prefix}/libexec localedir = ${datarootdir}/locale localstatedir = ${prefix}/var mandir = ${datarootdir}/man mkdir_p = $(MKDIR_P) oldincludedir = /usr/include pdfdir = ${docdir} prefix = /usr/local program_transform_name = s,x,x, psdir = ${docdir} sbindir = ${exec_prefix}/sbin sharedstatedir = ${prefix}/com srcdir = . sysconfdir = ${prefix}/etc target_alias = top_build_prefix = ../../ top_builddir = ../.. top_srcdir = ../.. pkgdata_DATA = fclique.txt \ fconsense.txt fcontml.txt fcontrast.txt \ fdiscboot.txt fdnacomp.txt fdnadist.txt fdnainvar.txt \ fdnaml.txt fdnamlk.txt fdnamove.txt fdnapars.txt fdnapenny.txt \ fdollop.txt fdolmove.txt fdolpenny.txt \ fdrawgram.txt fdrawtree.txt \ ffactor.txt ffitch.txt ffreqboot.txt \ fgendist.txt fkitsch.txt \ fmix.txt fmove.txt fneighbor.txt \ fpars.txt fpenny.txt fproml.txt fpromlk.txt \ fprotdist.txt fprotpars.txt \ frestboot.txt frestdist.txt frestml.txt fretree.txt \ fseqboot.txt fseqbootall.txt \ ftreedist.txt ftreedistpair.txt all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_doc/text/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_doc/text/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_doc/text/fclique.txt0000664000175000017500000002310312171064331015555 00000000000000 fclique Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Largest clique program Description Finds the largest clique of mutually compatible characters, and the phylogeny which they recommend, for discrete character data with two states. The largest clique (or all cliques within a given size range of the largest one) are found by a very fast branch and bound search method. The method does not allow for missing data. For such cases the T (Threshold) option of PARS or MIX may be a useful alternative. Compatibility methods are particular useful when some characters are of poor quality and the rest of good quality, but when it is not known in advance which ones are which. Algorithm This program uses the compatibility method for unrooted two-state characters to obtain the largest cliques of characters and the trees which they suggest. This approach originated in the work of Le Quesne (1969), though the algorithms were not precisely specified until the later work of Estabrook, Johnson, and McMorris (1976a, 1976b). These authors proved the theorem that a group of two-state characters which were pairwise compatible would be jointly compatible. This program uses an algorithm inspired by the Kent Fiala - George Estabrook program CLINCH, though closer in detail to the algorithm of Bron and Kerbosch (1973). I am indebted to Kent Fiala for pointing out that paper to me, and to David Penny for decribing to me his branch-and-bound approach to finding largest cliques, from which I have also borrowed. I am particularly grateful to Kent Fiala for catching a bug in versions 2.0 and 2.1 which resulted in those versions failing to find all of the cliques which they should. The program computes a compatibility matrix for the characters, then uses a recursive procedure to examine all possible cliques of characters. After one pass through all possible cliques, the program knows the size of the largest clique, and during a second pass it prints out the cliques of the right size. It also, along with each clique, prints out the tree suggested by that clique. ASSUMPTIONS Basically the following assumptions are made: 1. Each character evolves independently. 2. Different lineages evolve independently. 3. The ancestral state is not known. 4. Each character has a small chance of being one which evolves so rapidly, or is so thoroughly misinterpreted, that it provides no information on the tree. 5. The probability of a single change in a character (other than in the high rate characters) is low but not as low as the probability of being one of these "bad" characters. 6. The probability of two changes in a low-rate character is much less than the probability that it is a high-rate character. 7. The true tree has segments which are not so unequal in length that two changes in a long are as easy to envisage as one change in a short segment. The assumptions of compatibility methods have been treated in several of my papers (1978b, 1979, 1981b, 1988b), especially the 1981 paper. For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). A constant available for alteration at the beginning of the program is the form width, "FormWide", which you may want to change to make it as large as possible consistent with the page width available on your output device, so as to avoid the output of cliques and of trees getting wrapped around unnecessarily. Usage Here is a sample session with fclique % fclique Largest clique program Phylip discrete states file: clique.dat Phylip clique program output file [clique.fclique]: Output written to file "clique.fclique" Tree written on file "clique.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Largest clique program Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates Phylip discrete states file [-outfile] outfile [*.fclique] Phylip clique program output file Additional (Optional) qualifiers (* if not always prompted): -ancfile properties Phylip ancestral states file (optional) -factorfile properties Phylip multistate factors file (optional) -weights properties Phylip weights file (optional) -cliqmin integer [0] Minimum clique size (Integer 0 or more) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fclique] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -printcomp boolean [N] Print out compatibility matrix Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format Input to the algorithm is standard, but the "?", "P", and "B" states are not allowed. This is a serious limitation of this program. If you want to find large cliques in data that have "?" states, I recommend that you use fmix instead with the -Threshold option and the value of the threshold set to 2.0. The theory underlying this is given in my paper on character weighting (Felsenstein, 1981b). fclique reads discrete character data with 2 states. Input files for usage example File: clique.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Output file format fclique writes the cliques to the text output file and a tree to a separate output file Output files for usage example File: clique.fclique Largest clique program, version 3.69.650 Largest Cliques ------- ------- Characters: ( 1 2 3 6) Tree and characters: 2 1 3 6 0 0 1 1 +1-Delta +0--1-+ +--0-+ +--Epsilon ! ! ! +--------Gamma ! +-------------Alpha ! +-------------Beta remember: this is an unrooted tree! File: clique.treefile (((Delta,Epsilon),Gamma),Alpha,Beta); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fdollop Dollo and polymorphism parsimony algorithm fdolpenny Penny algorithm Dollo or polymorphism ffactor Multistate to binary recoding program fmix Mixed parsimony algorithm fmove Interactive mixed method parsimony fpars Discrete character parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fneighbor.txt0000664000175000017500000003177412171064331016105 00000000000000 fneighbor Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Phylogenies from distance matrix by N-J or UPGMA method Description An implementation by Mary Kuhner and John Yamato of Saitou and Nei's "Neighbor Joining Method," and of the UPGMA (Average Linkage clustering) method. Neighbor Joining is a distance matrix method producing an unrooted tree without the assumption of a clock. UPGMA does assume a clock. The branch lengths are not optimized by the least squares criterion but the methods are very fast and thus can handle much larger data sets. Algorithm This program implements the Neighbor-Joining method of Saitou and Nei (1987) and the UPGMA method of clustering. The program was written by Mary Kuhner and Jon Yamato, using some code from program FITCH. An important part of the code was translated from FORTRAN code from the neighbor-joining program written by Naruya Saitou and by Li Jin, and is used with the kind permission of Drs. Saitou and Jin. NEIGHBOR constructs a tree by successive clustering of lineages, setting branch lengths as the lineages join. The tree is not rearranged thereafter. The tree does not assume an evolutionary clock, so that it is in effect an unrooted tree. It should be somewhat similar to the tree obtained by FITCH. The program cannot evaluate a User tree, nor can it prevent branch lengths from becoming negative. However the algorithm is far faster than FITCH or KITSCH. This will make it particularly effective in their place for large studies or for bootstrap or jackknife resampling studies which require runs on multiple data sets. The UPGMA option constructs a tree by successive (agglomerative) clustering using an average-linkage method of clustering. It has some relationship to KITSCH, in that when the tree topology turns out the same, the branch lengths with UPGMA will turn out to be the same as with the P = 0 option of KITSCH. The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that: * Each distance is measured independently from the others: no item of data contributes to more than one distance. * The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation. These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious. THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b). For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better. Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST. For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true). FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time. Usage Here is a sample session with fneighbor % fneighbor Phylogenies from distance matrix by N-J or UPGMA method Phylip distance matrix file: neighbor.dat Phylip neighbor program output file [neighbor.fneighbor]: Cycle 4: species 1 ( 0.91769) joins species 2 ( 0.76891) Cycle 3: node 1 ( 0.42027) joins species 3 ( 0.35793) Cycle 2: species 6 ( 0.15168) joins species 7 ( 0.11752) Cycle 1: node 1 ( 0.04648) joins species 4 ( 0.28469) last cycle: node 1 ( 0.02696) joins species 5 ( 0.15393) joins node 6 ( 0.03982) Output written on file "neighbor.fneighbor" Tree written on file "neighbor.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Phylogenies from distance matrix by N-J or UPGMA method Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-datafile] distances Phylip distance matrix file [-outfile] outfile [*.fneighbor] Phylip neighbor program output file Additional (Optional) qualifiers (* if not always prompted): -matrixtype menu [s] Type of data matrix (Values: s (Square); u (Upper triangular); l (Lower triangular)) -treetype menu [n] Neighbor-joining or UPGMA tree (Values: n (Neighbor-joining); u (UPGMA)) * -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -jumble toggle [N] Randomise input order of species * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -replicates boolean [N] Subreplicates -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fneighbor] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory2 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fneighbor reads any normal sequence USAs. Input files for usage example File: neighbor.dat 7 Bovine 0.0000 1.6866 1.7198 1.6606 1.5243 1.6043 1.5905 Mouse 1.6866 0.0000 1.5232 1.4841 1.4465 1.4389 1.4629 Gibbon 1.7198 1.5232 0.0000 0.7115 0.5958 0.6179 0.5583 Orang 1.6606 1.4841 0.7115 0.0000 0.4631 0.5061 0.4710 Gorilla 1.5243 1.4465 0.5958 0.4631 0.0000 0.3484 0.3083 Chimp 1.6043 1.4389 0.6179 0.5061 0.3484 0.0000 0.2692 Human 1.5905 1.4629 0.5583 0.4710 0.3083 0.2692 0.0000 Output file format fneighbor output consists of an tree (rooted if UPGMA, unrooted if Neighbor-Joining) and the lengths of the interior segments. The Average Percent Standard Deviation is not computed or printed out. If the tree found by Neighbor is fed into FITCH as a User Tree, it will compute this quantity if one also selects the N option of FITCH to ensure that none of the branch lengths is re-estimated. As NEIGHBOR runs it prints out an account of the successive clustering levels, if you allow it to. This is mostly for reassurance and can be suppressed using menu option 2. In this printout of cluster levels the word "OTU" refers to a tip species, and the word "NODE" to an interior node of the resulting tree. Output files for usage example File: neighbor.fneighbor Neighbor-Joining/UPGMA method version 3.69.650 7 Populations Neighbor-joining method Negative branch lengths allowed +---------------------------------------------Mouse ! ! +---------------------Gibbon 1------------------------2 ! ! +----------------Orang ! +--4 ! ! +--------Gorilla ! +-5 ! ! +--------Chimp ! +-3 ! +------Human ! +------------------------------------------------------Bovine remember: this is an unrooted tree! Between And Length ------- --- ------ 1 Mouse 0.76891 1 2 0.42027 2 Gibbon 0.35793 2 4 0.04648 4 Orang 0.28469 4 5 0.02696 5 Gorilla 0.15393 5 3 0.03982 3 Chimp 0.15168 3 Human 0.11752 1 Bovine 0.91769 File: neighbor.treefile (Mouse:0.76891,(Gibbon:0.35793,(Orang:0.28469,(Gorilla:0.15393, (Chimp:0.15168,Human:0.11752):0.03982):0.02696):0.04648):0.42027,Bovine:0.91769) ; Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description efitch Fitch-Margoliash and least-squares distance methods ekitsch Fitch-Margoliash method with contemporary tips eneighbor Phylogenies from distance matrix by N-J or UPGMA method ffitch Fitch-Margoliash and least-squares distance methods fkitsch Fitch-Margoliash method with contemporary tips Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fpars.txt0000664000175000017500000004424112171064331015246 00000000000000 fpars Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Discrete character parsimony Description Multistate discrete-characters parsimony method. Up to 8 states (as well as "?") are allowed. Cannot do Camin-Sokal or Dollo Parsimony. Can cope with multifurcations, reconstruct ancestral states, use character weights, and infer branch lengths. Algorithm PARS is a general parsimony program which carries out the Wagner parsimony method with multiple states. Wagner parsimony allows changes among all states. The criterion is to find the tree which requires the minimum number of changes. The Wagner method was originated by Eck and Dayhoff (1966) and by Kluge and Farris (1969). Here are its assumptions: 1. Ancestral states are unknown unknown. 2. Different characters evolve independently. 3. Different lineages evolve independently. 4. Changes to all other states are equally probable (Wagner). 5. These changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question. 6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than these state changes. 7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Usage Here is a sample session with fpars % fpars Discrete character parsimony Input file: pars.dat Phylip tree file (optional): Phylip pars program output file [pars.fpars]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements on the first of the trees tied for best !---------! ......... ......... Collapsing best trees . Output written to file "pars.fpars" Tree also written onto file "pars.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Discrete character parsimony Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] discretestates File containing one or more data sets [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fpars] Phylip pars program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -method menu [Wagner] Choose the parsimony method to use (Values: w (Wagner); c (Camin-Sokal)) -maxtrees integer [100] Number of trees to save (Integer from 1 to 1000000) * -[no]thorough toggle [Y] More thorough search * -[no]rearrange boolean [Y] Rearrange on just one best tree * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -dothreshold toggle [N] Use threshold parsimony * -threshold float [1] Threshold value (Number 1.000 or more) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fpars] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -stepbox boolean [N] Print steps at each site -ancseq boolean [N] Print states at all nodes of tree * -[no]dotdiff boolean [Y] Use dot differencing to display results Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fpars reads discrete characters input, except that multiple states (up to 9 of them) are allowed. Any characters other than "?" are allowed as states, up to a maximum of 9 states. In fact, one can use different symbols in different columns of the data matrix, although it is rather unlikely that you would want to do that. The symbols you can use are: * The digits 0-9, * The letters A-Z and a-z, * The symbols "!\"#$%&'()*+,-./:;<=>?@\[\\]^_`\{|}~ (of these, probably only + and - will be of interest to most users). But note that these do not include blank (" "). Blanks in the input data are simply skipped by the program, so that they can be used to make characters into groups for ease of viewing. The "?" (question mark) symbol has special meaning. It is allowed in the input but is not available as the symbol of a state. Rather, it means that the state is unknown. PARS can handle both bifurcating and multifurcating trees. In doing its search for most parsimonious trees, it adds species not only by creating new forks in the middle of existing branches, but it also tries putting them at the end of new branches which are added to existing forks. Thus it searches among both bifurcating and multifurcating trees. If a branch in a tree does not have any characters which might change in that branch in the most parsimonious tree, it does not save that tree. Thus in any tree that results, a branch exists only if some character has a most parsimonious reconstruction that would involve change in that branch. It also saves a number of trees tied for best (you can alter the number it saves using the V option in the menu). When rearranging trees, it tries rearrangements of all of the saved trees. This makes the algorithm slower than earlier programs such as MIX. (0,1) Discrete character data These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both". There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form: 1 ---> 0 ---> 2 | | V 3 so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters: Old State New States --- ----- --- ------ 0 001 1 000 2 011 3 101 The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops. However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979). If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR. We now also have the program PARS, which can do parsimony for unordered character states. Input files for usage example File: pars.dat 5 6 Alpha 110110 Beta 110000 Gamma 100110 Delta 001001 Epsilon 001110 Output file format fpars output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees. Each tree has branch lengths. These are computed using an algorithm published by Hochbaum and Pathria (1997) which I first heard of from Wayne Maddison who invented it independently of them. This algorithm averages the number of reconstructed changes of state over all sites a over all possible most parsimonious placements of the changes of state among branches. Note that it does not correct in any way for multiple changes that overlay each other. If option 2 is toggled on a table of the number of changes of state required in each character is also printed. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item D appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?", there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across sites. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the discrete characters are evolving independently, which is unlikely to be true for If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. Option 6 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. Output files for usage example File: pars.fpars Discrete character parsimony algorithm, version 3.69.650 One most parsimonious tree found: +Epsilon +----------------3 +--------2 +-------------------------Delta | | | +Gamma | 1----------------Beta | +Alpha requires a total of 8.000 between and length ------- --- ------ 1 2 1.00 2 3 2.00 3 Epsilon 0.00 3 Delta 3.00 2 Gamma 0.00 1 Beta 2.00 1 Alpha 0.00 File: pars.treefile (((Epsilon:0.00,Delta:3.00):2.00,Gamma:0.00):1.00,Beta:2.00,Alpha:0.00); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description eclique Largest clique program edollop Dollo and polymorphism parsimony algorithm edolpenny Penny algorithm Dollo or polymorphism efactor Multistate to binary recoding program emix Mixed parsimony algorithm epenny Penny algorithm, branch-and-bound fclique Largest clique program fdollop Dollo and polymorphism parsimony algorithm fdolpenny Penny algorithm Dollo or polymorphism ffactor Multistate to binary recoding program fmix Mixed parsimony algorithm fmove Interactive mixed method parsimony fpenny Penny algorithm, branch-and-bound Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/ffitch.txt0000664000175000017500000003100712171064331015372 00000000000000 ffitch Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Fitch-Margoliash and least-squares distance methods Description Estimates phylogenies from distance matrix data under the "additive tree model" according to which the distances are expected to equal the sums of branch lengths between the species. Uses the Fitch-Margoliash criterion and some related least squares criteria, or the Minimum Evolution distance matrix method. Does not assume an evolutionary clock. This program will be useful with distances computed from molecular sequences, restriction sites or fragments distances, with DNA hybridization measurements, and with genetic distances computed from gene frequencies. Algorithm The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that: * Each distance is measured independently from the others: no item of data contributes to more than one distance. * The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation. These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious. THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b). For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better. Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST. For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true). FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time. Usage Here is a sample session with ffitch % ffitch Fitch-Margoliash and least-squares distance methods Phylip distance matrix file: fitch.dat Phylip tree file (optional): Phylip fitch program output file [fitch.ffitch]: Adding species: 1. Bovine 2. Mouse 3. Gibbon 4. Orang 5. Gorilla 6. Chimp 7. Human Output written to file "fitch.ffitch" Tree also written onto file "fitch.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Fitch-Margoliash and least-squares distance methods Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-datafile] distances File containing one or more distance matrices [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.ffitch] Phylip fitch program output file Additional (Optional) qualifiers (* if not always prompted): -matrixtype menu [s] Type of input data matrix (Values: s (Square); u (Upper triangular); l (Lower triangular)) -minev boolean [N] Minimum evolution * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -power float [2.0] Power (Any numeric value) * -lengths boolean [N] Use branch lengths from user trees * -negallowed boolean [N] Negative branch lengths allowed * -global boolean [N] Global rearrangements -replicates boolean [N] Subreplicates -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.ffitch] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format ffitch reads any normal sequence USAs. Input files for usage example File: fitch.dat 7 Bovine 0.0000 1.6866 1.7198 1.6606 1.5243 1.6043 1.5905 Mouse 1.6866 0.0000 1.5232 1.4841 1.4465 1.4389 1.4629 Gibbon 1.7198 1.5232 0.0000 0.7115 0.5958 0.6179 0.5583 Orang 1.6606 1.4841 0.7115 0.0000 0.4631 0.5061 0.4710 Gorilla 1.5243 1.4465 0.5958 0.4631 0.0000 0.3484 0.3083 Chimp 1.6043 1.4389 0.6179 0.5061 0.3484 0.0000 0.2692 Human 1.5905 1.4629 0.5583 0.4710 0.3083 0.2692 0.0000 Output file format ffitch output consists of an unrooted tree and the lengths of the interior segments. The sum of squares is printed out, and if P = 2.0 Fitch and Margoliash's "average percent standard deviation" is also computed and printed out. This is the sum of squares, divided by N-2, and then square-rooted and then multiplied by 100 (n is the number of species on the tree): APSD = ( SSQ / (N-2) )1/2 x 100. where N is the total number of off-diagonal distance measurements that are in the (square) distance matrix. If the S (subreplication) option is in force it is instead the sum of the numbers of replicates in all the non-diagonal cells of the distance matrix. But if the L or R option is also in effect, so that the distance matrix read in is lower- or upper-triangular, then the sum of replicates is only over those cells actually read in. If S is not in force, the number of replicates in each cell is assumed to be 1, so that N is n(n-1), where n is the number of species. The APSD gives an indication of the average percentage error. The number of trees examined is also printed out. Output files for usage example File: fitch.ffitch 7 Populations Fitch-Margoliash method version 3.69.650 __ __ 2 \ \ (Obs - Exp) Sum of squares = /_ /_ ------------ 2 i j Obs Negative branch lengths not allowed +---------------------------------------------Mouse ! ! +------Human ! +--5 ! +-4 +--------Chimp ! ! ! ! +--3 +---------Gorilla ! ! ! 1------------------------2 +-----------------Orang ! ! ! +---------------------Gibbon ! +------------------------------------------------------Bovine remember: this is an unrooted tree! Sum of squares = 0.01375 Average percent standard deviation = 1.85418 Between And Length ------- --- ------ 1 Mouse 0.76985 1 2 0.41983 2 3 0.04986 3 4 0.02121 4 5 0.03695 5 Human 0.11449 5 Chimp 0.15471 4 Gorilla 0.15680 3 Orang 0.29209 2 Gibbon 0.35537 1 Bovine 0.91675 File: fitch.treefile (Mouse:0.76985,((((Human:0.11449,Chimp:0.15471):0.03695, Gorilla:0.15680):0.02121,Orang:0.29209):0.04986,Gibbon:0.35537):0.41983,Bovine:0 .91675); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description efitch Fitch-Margoliash and least-squares distance methods ekitsch Fitch-Margoliash method with contemporary tips eneighbor Phylogenies from distance matrix by N-J or UPGMA method fkitsch Fitch-Margoliash method with contemporary tips fneighbor Phylogenies from distance matrix by N-J or UPGMA method Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdnainvar.txt0000664000175000017500000004632512171064331016110 00000000000000 fdnainvar Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Nucleic acid sequence invariants method Description For nucleic acid sequence data on four species, computes Lake's and Cavender's phylogenetic invariants, which test alternative tree topologies. The program also tabulates the frequencies of occurrence of the different nucleotide patterns. Lake's invariants are the method which he calls "evolutionary parsimony". Algorithm This program reads in nucleotide sequences for four species and computes the phylogenetic invariants discovered by James Cavender (Cavender and Felsenstein, 1987) and James Lake (1987). Lake's method is also called by him "evolutionary parsimony". I prefer Cavender's more mathematically precise term "invariants", as the method bears somewhat more relationship to likelihood methods than to parsimony. The invariants are mathematical formulas (in the present case linear or quadratic) in the EXPECTED frequencies of site patterns which are zero for all trees of a given tree topology, irrespective of branch lengths. We can consider at a given site that if there are no ambiguities, we could have for four species the nucleotide patterns (considering the same site across all four species) AAAA, AAAC, AAAG, ... through TTTT, 256 patterns in all. The invariants are formulas in the expected pattern frequencies, not the observed pattern frequencies. When they are computed using the observed pattern frequencies, we will usually find that they are not precisely zero even when the model is correct and we have the correct tree topology. Only as the number of nucleotides scored becomes infinite will the observed pattern frequencies approach their expectations; otherwise, we must do a statistical test of the invariants. Some explanation of invariants will be found in the above papers, and also in my recent review article on statistical aspects of inferring phylogenies (Felsenstein, 1988b). Although invariants have some important advantages, their validity also depends on symmetry assumptions that may not be satisfied. In the discussion below suppose that the possible unrooted phylogenies are I: ((A,B),(C,D)), II: ((A,C),(B,D)), and III: ((A,D),(B,C)). Lake's Invariants, Their Testing and Assumptions Lake's invariants are fairly simple to describe: the patterns involved are only those in which there are two purines and two pyrimidines at a site. Thus a site with AACT would affect the invariants, but a site with AAGG would not. Let us use (as Lake does) the symbols 1, 2, 3, and 4, with the proviso that 1 and 2 are either both of the purines or both of the pyrimidines; 3 and 4 are the other two nucleotides. Thus 1 and 2 always differ by a transition; so do 3 and 4. Lake's invariants, expressed in terms of expected frequencies, are the three quantities: (1) P(1133) + P(1234) - P(1134) - P(1233), (2) P(1313) + P(1324) - P(1314) - P(1323), (3) P(1331) + P(1342) - P(1341) - P(1332), He showed that invariants (2) and (3) are zero under Topology I, (1) and (3) are zero under topology II, and (1) and (2) are zero under Topology III. If, for example, we see a site with pattern ACGC, we can start by setting 1=A. Then 2 must be G. We can then set 3=C (so that 4 is T). Thus its pattern type, making those substitutions, is 1323. P(1323) is the expected probability of the type of pattern which includes ACGC, TGAG, GTAT, etc. Lake's invariants are easily tested with observed frequencies. For example, the first of them is a test of whether there are as many sites of types 1133 and 1234 as there are of types 1134 and 1233; this is easily tested with a chi-square test or, as in this program, with an exact binomial test. Note that with several invariants to test, we risk overestimating the significance of results if we simply accept the nominal 95% levels of significance (Li and Guoy, 1990). Lake's invariants assume that each site is evolving independently, and that starting from any base a transversion is equally likely to end up at each of the two possible bases (thus, an A undergoing a transversion is equally likely to end up as a C or a T, and similarly for the other four bases from which one could start. Interestingly, Lake's results do not assume that rates of evolution are the same at all sites. The result that the total of 1133 and 1234 is expected to be the same as the total of 1134 and 1233 is unaffected by the fact that we may have aggregated the counts over classes of sites evolving at different rates. Cavender's Invariants, Their Testing and Assumptions Cavender's invariants (Cavender and Felsenstein, 1987) are for the case of a character with two states. In the nucleic acid case we can classify nucleotides into two states, R and Y (Purine and Pyrimidine) and then use the two-state results. Cavender starts, as before, with the pattern frequencies. Coding purines as R and pyrimidines as Y, the patterns types are RRRR, RRRY, and so on until YYYY, a total of 16 types. Cavender found quadratic functions of the expected frequencies of these 16 types that were expected to be zero under a given phylogeny, irrespective of branch lengths. Two invariants (called K and L) were found for each tree topology. The L invariants are particularly easy to understand. If we have the tree topology ((A,B),(C,D)), then in the case of two symmetric states, the event that A and B have the same state should be independent of whether C and D have the same state, as the events determining these happen in different parts of the tree. We can set up a contingency table: C = D C =/= D ------------------------------ | A = B | YYYY, YYRR, YYYR, YYRY, | RRRR, RRYY RRYR, RRRY | A =/= B | YRYY, YRRR, YRYR, YRRY, | RYYY, RYRR RYYR, RYRY and we expect that the events C = D and A = B will be independent. Cavender's L invariant for this tree topology is simply the negative of the crossproduct difference, P(A=/=B and C=D) P(A=B and C=/=D) - P(A=B and C=D) P(A=/=B and C=/=D). One of these L invariants is defined for each of the three tree topologies. They can obviously be tested simply by doing a chi-square test on the contingency table. The one corresponding to the correct topology should be statistically indistinguishable from zero. Again, there is a possible multiple tests problem if all three are tested at a nominal value of 95%. The K invariants are differences between the L invariants. When one of the tables is expected to have crossproduct difference zero, the other two are expected to be nonzero, and also to be equal. So the difference of their crossproduct differences can be taken; this is the K invariant. It is not so easily tested. The assumptions of Cavender's invariants are different from those of Lake's. One obviously need not assume anything about the frequencies of, or transitions among, the two different purines or the two different pyrimidines. However one does need to assume independent events at each site, and one needs to assume that the Y and R states are symmetric, that the probability per unit time that a Y changes into an R is the same as the probability that an R changes into a Y, so that we expect equal frequencies of the two states. There is also an assumption that all sites are changing between these two states at the same expected rate. This assumption is not needed for Lake's invariants, since expectations of sums are equal to sums of expectations, but for Cavender's it is, since products of expectations are not equal to expectations of products. It is helpful to have both sorts of invariants available; with further work we may appreciate what other invaraints there are for various models of nucleic acid change. Usage Here is a sample session with fdnainvar % fdnainvar -printdata Nucleic acid sequence invariants method Input (aligned) nucleotide sequence set(s): dnainvar.dat Phylip weights file (optional): Phylip dnainvar program output file [dnainvar.fdnainvar]: Output written to output file "dnainvar.fdnainvar" Done. Go to the input files for this example Go to the output files for this example Command line arguments Nucleic acid sequence invariants method Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments -weights properties Phylip weights file (optional) [-outfile] outfile [*.fdnainvar] Phylip dnainvar program output file Additional (Optional) qualifiers (* if not always prompted): -printdata boolean [N] Print data at start of run * -[no]dotdiff boolean [Y] Use dot-differencing to display results -[no]printpattern boolean [Y] Print counts of patterns -[no]printinvariant boolean [Y] Print invariants -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnainvar reads any normal sequence USAs. Input files for usage example File: dnainvar.dat 4 13 Alpha AACGTGGCCAAAT Beta AAGGTCGCCAAAC Gamma CATTTCGTCACAA Delta GGTATTTCGGCCT Output file format fdnainvar output consists first (if option 1 is selected) of a reprinting of the input data, then (if option 2 is on) tables of observed pattern frequencies and pattern type frequencies. A table will be printed out, in alphabetic order AAAA through TTTT of all the patterns that appear among the sites and the number of times each appears. This table will be invaluable for computation of any other invariants. There follows another table, of pattern types, using the 1234 notation, in numerical order 1111 through 1234, of the number of times each type of pattern appears. In this computation all sites at which there are any ambiguities or deletions are omitted. Cavender's invariants could actually be computed from sites that have only Y or R ambiguities; this will be done in the next release of this program. If option 3 is on the invariants are then printed out, together with their statistical tests. For Lake's invariants the two sums which are expected to be equal are printed out, and then the result of an one-tailed exact binomial test which tests whether the difference is expected to be this positive or more. The P level is given (but remember the multiple-tests problem!). For Cavender's L invariants the contingency tables are given. Each is tested with a one-tailed chi-square test. It is possible that the expected numbers in some categories could be too small for valid use of this test; the program does not check for this. It is also possible that the chi-square could be significant but in the wrong direction; this is not tested in the current version of the program. To check it beware of a chi-square greater than 3.841 but with a positive invariant. The invariants themselves are computed, as the difference of cross-products. Their absolute magnitudes are not important, but which one is closest to zero may be indicative. Significantly nonzero invariants should be negative if the model is valid. The K invariants, which are simply differences among the L invariants, are also printed out without any test on them being conducted. Note that it is possible to use the bootstrap utility SEQBOOT to create multiple data sets, and from the output from sunning all of these get the empirical variability of these quadratic invariants. Output files for usage example File: dnainvar.fdnainvar Nucleic acid sequence Invariants method, version 3.69.650 4 species, 13 sites Name Sequences ---- --------- Alpha AACGTGGCCA AAT Beta ..G..C.... ..C Gamma C.TT.C.T.. C.A Delta GGTA.TT.GG CC. Pattern Number of times AAAC 1 AAAG 2 AACC 1 AACG 1 CCCG 1 CCTC 1 CGTT 1 GCCT 1 GGGT 1 GGTA 1 TCAT 1 TTTT 1 Symmetrized patterns (1, 2 = the two purines and 3, 4 = the two pyrimidines or 1, 2 = the two pyrimidines and 3, 4 = the two purines) 1111 1 1112 2 1113 3 1121 1 1132 2 1133 1 1231 1 1322 1 1334 1 Tree topologies (unrooted): I: ((Alpha,Beta),(Gamma,Delta)) II: ((Alpha,Gamma),(Beta,Delta)) III: ((Alpha,Delta),(Beta,Gamma)) [Part of this file has been deleted for brevity] different purine:pyrimidine ratios from 1:1. Tree I: Contingency Table 2 8 1 2 Quadratic invariant = 4.0 Chi-square = 0.23111 (not significant) Tree II: Contingency Table 1 5 1 6 Quadratic invariant = -1.0 Chi-square = 0.01407 (not significant) Tree III: Contingency Table 1 2 6 4 Quadratic invariant = 8.0 Chi-square = 0.66032 (not significant) Cavender's quadratic invariants (type K) using purines vs. pyrimidines (these are expected to be zero for the correct tree topology) They will be misled if there are substantially different evolutionary rate between sites, or different purine:pyrimidine ratios from 1:1. No statistical test is done on them here. Tree I: -9.0 Tree II: 4.0 Tree III: 5.0 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fprotpars.txt0000664000175000017500000007331412171064331016156 00000000000000 fprotpars Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Protein parsimony algorithm Description Estimates phylogenies from protein sequences (input using the standard one-letter code for amino acids) using the parsimony method, in a variant which counts only those nucleotide changes that change the amino acid, on the assumption that silent changes are more easily accomplished. Algorithm This program infers an unrooted phylogeny from protein sequences, using a new method intermediate between the approaches of Eck and Dayhoff (1966) and Fitch (1971). Eck and Dayhoff (1966) allowed any amino acid to change to any other, and counted the number of such changes needed to evolve the protein sequences on each given phylogeny. This has the problem that it allows replacements which are not consistent with the genetic code, counting them equally with replacements that are consistent. Fitch, on the other hand, counted the minimum number of nucleotide substitutions that would be needed to achieve the given protein sequences. This counts silent changes equally with those that change the amino acid. The present method insists that any changes of amino acid be consistent with the genetic code so that, for example, lysine is allowed to change to methionine but not to proline. However, changes between two amino acids via a third are allowed and counted as two changes if each of the two replacements is individually allowed. This sometimes allows changes that at first sight you would think should be outlawed. Thus we can change from phenylalanine to glutamine via leucine in two steps total. Consulting the genetic code, you will find that there is a leucine codon one step away from a phenylalanine codon, and a leucine codon one step away from glutamine. But they are not the same leucine codon. It actually takes three base substitutions to get from either of the phenylalanine codons TTT and TTC to either of the glutamine codons CAA or CAG. Why then does this program count only two? The answer is that recent DNA sequence comparisons seem to show that synonymous changes are considerably faster and easier than ones that change the amino acid. We are assuming that, in effect, synonymous changes occur so much more readily that they need not be counted. Thus, in the chain of changes TTT (Phe) --> CTT (Leu) --> CTA (Leu) --> CAA (Glu), the middle one is not counted because it does not change the amino acid (leucine). To maintain consistency with the genetic code, it is necessary for the program internally to treat serine as two separate states (ser1 and ser2) since the two groups of serine codons are not adjacent in the code. Changes to the state "deletion" are counted as three steps to prevent the algorithm from assuming unnecessary deletions. The state "unknown" is simply taken to mean that the amino acid, which has not been determined, will in each part of a tree that is evaluated be assumed be whichever one causes the fewest steps. The assumptions of this method (which has not been described in the literature), are thus something like this: Change in different sites is independent. Change in different lineages is independent. The probability of a base substitution that changes the amino acid sequence is small over the lengths of time involved in a branch of the phylogeny. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another. The probability of a base change that is synonymous is much higher than the probability of a change that is not synonymous. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the works by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986). The input for the program is fairly standard. The first line contains the number of species and the number of amino acid positions (counting any stop codons that you want to include). Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion. The protein sequences are given by the one-letter code used by described in the Molecular Sequence Programs documentation file. Note that if two polypeptide chains are being used that are of different length owing to one terminating before the other, they should be coded as (say) HIINMA*???? HIPNMGVWABT since after the stop codon we do not definitely know that there has been a deletion, and do not know what amino acid would have been there. If DNA studies tell us that there is DNA sequence in that region, then we could use "X" rather than "?". Note that "X" means an unknown amino acid, but definitely an amino acid, while "?" could mean either that or a deletion. The distinction is often significant in regions where there are deletions: one may want to encode a six-base deletion as "-?????" since that way the program will only count one deletion, not six deletion events, when the deletion arises. However, if there are overlapping deletions it may not be so easy to know what coding is correct. One will usually want to use "?" after a stop codon, if one does not know what amino acid is there. If the DNA sequence has been observed there, one probably ought to resist putting in the amino acids that this DNA would code for, and one should use "X" instead, because under the assumptions implicit in this parsimony method, changes to any noncoding sequence are much easier than changes in a coding region that change the amino acid, so that they shouldn't be counted anyway! The form of this information is the standard one described in the main documentation file. For the U option the tree provided must be a rooted bifurcating tree, with the root placed anywhere you want, since that root placement does not affect anything. Usage Here is a sample session with fprotpars % fprotpars Protein parsimony algorithm Input (aligned) protein sequence set(s): protpars.dat Phylip tree file (optional): Phylip protpars program output file [protpars.fprotpars]: Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Doing global rearrangements !---------! ......... ......... Output written to file "protpars.fprotpars" Trees also written onto file "protpars.treefile" Done. Go to the input files for this example Go to the output files for this example Example 2 % fprotpars -njumble 3 -seed 3 -printdata -ancseq -whichcode m -stepbox -outgrno 2 -dothreshold -threshold 3 Protein parsimony algorithm Input (aligned) protein sequence set(s): protpars.dat Phylip tree file (optional): Phylip protpars program output file [protpars.fprotpars]: Adding species: 1. Delta 2. Epsilon 3. Alpha 4. Beta 5. Gamma Doing global rearrangements !---------! ......... ......... Adding species: 1. Beta 2. Epsilon 3. Delta 4. Alpha 5. Gamma Doing global rearrangements !---------! ......... Adding species: 1. Epsilon 2. Alpha 3. Gamma 4. Delta 5. Beta Doing global rearrangements !---------! ......... Output written to file "protpars.fprotpars" Trees also written onto file "protpars.treefile" Done. Go to the output files for this example Example 3 % fprotpars -njumble 3 -seed 3 Protein parsimony algorithm Input (aligned) protein sequence set(s): protpars2.dat Phylip tree file (optional): Phylip protpars program output file [protpars2.fprotpars]: Data set # 1: Adding species: 1. Delta 2. Epsilon 3. Alpha 4. Beta 5. Gamma Doing global rearrangements !---------! ......... ......... Adding species: 1. Beta 2. Epsilon 3. Delta 4. Alpha 5. Gamma Doing global rearrangements !---------! ......... Adding species: 1. Epsilon 2. Alpha 3. Gamma 4. Delta 5. Beta Doing global rearrangements !---------! ......... Output written to file "protpars2.fprotpars" Trees also written onto file "protpars2.treefile" Data set # 2: Adding species: 1. Gamma 2. Delta 3. Epsilon 4. Beta 5. Alpha Doing global rearrangements !---------! ......... ......... Adding species: 1. Alpha 2. Delta 3. Epsilon 4. Gamma 5. Beta Doing global rearrangements !---------! ......... Adding species: 1. Epsilon 2. Beta 3. Gamma 4. Alpha 5. Delta Doing global rearrangements !---------! ......... Output written to file "protpars2.fprotpars" Trees also written onto file "protpars2.treefile" Data set # 3: Adding species: 1. Delta 2. Beta 3. Gamma 4. Alpha 5. Epsilon Doing global rearrangements !---------! ......... ......... Adding species: 1. Gamma 2. Delta 3. Beta 4. Epsilon 5. Alpha Doing global rearrangements !---------! ......... Adding species: 1. Epsilon 2. Alpha 3. Gamma 4. Delta 5. Beta Doing global rearrangements !---------! ......... Output written to file "protpars2.fprotpars" Trees also written onto file "protpars2.treefile" Done. Go to the input files for this example Go to the output files for this example Example 4 % fprotpars -option Protein parsimony algorithm Input (aligned) protein sequence set(s): protpars.dat Phylip tree file (optional): Phylip weights file (optional): protparswts.dat Number of times to randomise [0]: Species number to use as outgroup [0]: Use threshold parsimony [N]: Genetic codes U : Universal M : Mitochondrial V : Vertebrate mitochondrial F : Fly mitochondrial Y : Yeast mitochondrial Use which genetic code [Universal]: Phylip protpars program output file [protpars.fprotpars]: Write out trees to tree file [Y]: Phylip tree output file (optional) [protpars.treefile]: Print data at start of run [N]: Print indications of progress of run [Y]: Print out tree [Y]: Print steps at each site [N]: Print sequences at all nodes of tree [N]: Weights set # 1: Adding species: 1. Delta 2. Alpha 3. Gamma 4. Epsilon 5. Beta Doing global rearrangements !---------! ......... ......... Output written to file "protpars.fprotpars" Trees also written onto file "protpars.treefile" Weights set # 2: Adding species: 1. Epsilon 2. Alpha 3. Delta 4. Gamma 5. Beta Doing global rearrangements !---------! ......... ......... Output written to file "protpars.fprotpars" Trees also written onto file "protpars.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Protein parsimony algorithm Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fprotpars] Phylip protpars program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Phylip weights file (optional) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -dothreshold toggle [N] Use threshold parsimony * -threshold float [1] Threshold value (Number 1.000 or more) -whichcode menu [Universal] Use which genetic code (Values: U (Universal); M (Mitochondrial); V (Vertebrate mitochondrial); F (Fly mitochondrial); Y (Yeast mitochondrial)) -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fprotpars] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -stepbox boolean [N] Print steps at each site -ancseq boolean [N] Print sequences at all nodes of tree * -[no]dotdiff boolean [Y] Use dot differencing to display results Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fprotpars reads any normal sequence USAs. Input files for usage example File: protpars.dat 5 10 Alpha ABCDEFGHIK Beta AB--EFGHIK Gamma ?BCDSFG*?? Delta CIKDEFGHIK Epsilon DIKDEFGHIK Input files for usage example 3 File: protpars2.dat 5 10 Alpha AABBCCCFHK Beta AABB---FHK Gamma ??BBCCCF*? Delta CCIIKKKFHK Epsilon DDIIKKKFHK 5 10 Alpha AADDEGGIIK Beta AA--EGGIIK Gamma ??DDSGG??? Delta CCDDEGGIIK Epsilon DDDDEGGIIK 5 10 Alpha AACDDDEGHI Beta AA----EGHI Gamma ??CDDDSG*? Delta CCKDDDEGHI Epsilon DDKDDDEGHI Input files for usage example 4 File: protparswts.dat 1111100000 0000011111 Output file format fprotpars output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each position. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item "." appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across positions. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual positions, and a conclusion as to whether that tree is or is not significantly worse than the best one. Output files for usage example File: protpars.fprotpars Protein parsimony algorithm, version 3.69.650 3 trees in all found +--------Gamma ! +--2 +--Epsilon ! ! +--4 ! +--3 +--Delta 1 ! ! +-----Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 16.000 +--Epsilon +--4 +--3 +--Delta ! ! +--2 +-----Gamma ! ! 1 +--------Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 16.000 +--Epsilon +-----4 ! +--Delta +--3 ! ! +--Gamma 1 +-----2 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 16.000 File: protpars.treefile ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.3333]; ((((Epsilon,Delta),Gamma),Beta),Alpha)[0.3333]; (((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.3333]; Output files for usage example 2 File: protpars.fprotpars Protein parsimony algorithm, version 3.69.650 5 species, 10 sites Name Sequences ---- --------- Alpha ABCDEFGHIK Beta ..--...... Gamma ?...S..*?? Delta CIK....... Epsilon DIK....... 3 trees in all found +-----------Beta ! 1 +--------Gamma ! ! +--2 +--Epsilon ! +--4 +--3 +--Delta ! +-----Alpha remember: (although rooted by outgroup) this is an unrooted tree! requires a total of 14.000 steps in each position: 0 1 2 3 4 5 6 7 8 9 *----------------------------------------- 0! 3 1 5 3 2 0 0 2 0 10! 0 From To Any Steps? State at upper node ( . means same as in the node below it on tree) root 1 AN??EFGHIK 1 Beta maybe .B--...... [Part of this file has been deleted for brevity] root 1 AN??EFGHIK 1 Beta maybe .B--...... 1 2 maybe ..CD...... 2 3 maybe ?......... 3 4 yes .IK....... 4 Epsilon maybe D......... 4 Delta yes C......... 3 Gamma yes ?B..S..*?? 2 Alpha maybe .B........ +-----------Beta ! 1 +--Epsilon ! +-----4 ! ! +--Delta +--3 ! +--Gamma +-----2 +--Alpha remember: (although rooted by outgroup) this is an unrooted tree! requires a total of 14.000 steps in each position: 0 1 2 3 4 5 6 7 8 9 *----------------------------------------- 0! 3 1 5 3 2 0 0 2 0 10! 0 From To Any Steps? State at upper node ( . means same as in the node below it on tree) root 1 AN??EFGHIK 1 Beta maybe .B--...... 1 3 yes ..?D...... 3 4 yes ?IK....... 4 Epsilon maybe D......... 4 Delta yes C......... 3 2 yes ..C....... 2 Gamma yes ?B..S..*?? 2 Alpha maybe .B........ File: protpars.treefile (Beta,(Gamma,((Epsilon,Delta),Alpha)))[0.3333]; (Beta,(((Epsilon,Delta),Gamma),Alpha))[0.3333]; (Beta,((Epsilon,Delta),(Gamma,Alpha)))[0.3333]; Output files for usage example 3 File: protpars2.fprotpars Protein parsimony algorithm, version 3.69.650 Data set # 1: 3 trees in all found +--------Gamma ! +--2 +--Epsilon ! ! +--4 ! +--3 +--Delta 1 ! ! +-----Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 25.000 +--Epsilon +--4 +--3 +--Delta ! ! +--2 +-----Gamma ! ! 1 +--------Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 25.000 +--Epsilon +-----4 [Part of this file has been deleted for brevity] +--------Gamma +--2 ! ! +-----Epsilon ! +--4 1 ! +--Delta ! +--3 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 24.000 +--Epsilon +--4 +--3 +--Delta ! ! +--2 +-----Gamma ! ! 1 +--------Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 24.000 +--Epsilon +-----4 ! +--Delta +--3 ! ! +--Gamma 1 +-----2 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 24.000 File: protpars2.treefile ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.3333]; ((((Epsilon,Delta),Gamma),Beta),Alpha)[0.3333]; (((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.3333]; ((Gamma,(Delta,(Epsilon,Beta))),Alpha)[0.0667]; (((Epsilon,Gamma),(Delta,Beta)),Alpha)[0.0667]; ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.0667]; ((Epsilon,(Gamma,(Delta,Beta))),Alpha)[0.0667]; ((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.0667]; (((Delta,Gamma),(Epsilon,Beta)),Alpha)[0.0667]; (((Delta,(Epsilon,Gamma)),Beta),Alpha)[0.0667]; ((((Epsilon,Delta),Gamma),Beta),Alpha)[0.0667]; ((Epsilon,((Delta,Gamma),Beta)),Alpha)[0.0667]; (((Epsilon,(Delta,Gamma)),Beta),Alpha)[0.0667]; ((Delta,(Gamma,(Epsilon,Beta))),Alpha)[0.0667]; ((Delta,((Epsilon,Gamma),Beta)),Alpha)[0.0667]; (((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.0667]; ((Delta,(Epsilon,(Gamma,Beta))),Alpha)[0.0667]; ((Epsilon,(Delta,(Gamma,Beta))),Alpha)[0.0667]; ((Gamma,(Delta,(Epsilon,Beta))),Alpha)[0.2000]; ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.2000]; ((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.2000]; ((((Epsilon,Delta),Gamma),Beta),Alpha)[0.2000]; (((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.2000]; Output files for usage example 4 File: protpars.fprotpars Protein parsimony algorithm, version 3.69.650 Weights set # 1: 3 trees in all found +--------Gamma ! +--2 +--Epsilon ! ! +--4 ! +--3 +--Delta 1 ! ! +-----Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 14.000 +--Epsilon +--4 +--3 +--Delta ! ! +--2 +-----Gamma ! ! 1 +--------Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 14.000 [Part of this file has been deleted for brevity] +--Epsilon +-----4 ! +--Delta +--3 ! ! +--Gamma 1 +-----2 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 2.000 +--------Delta +--3 ! ! +-----Epsilon ! +--4 1 ! +--Gamma ! +--2 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 2.000 +--------Epsilon +--4 ! ! +-----Delta ! +--3 1 ! +--Gamma ! +--2 ! +--Beta ! +-----------Alpha remember: this is an unrooted tree! requires a total of 2.000 File: protpars.treefile ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.3333]; ((((Epsilon,Delta),Gamma),Beta),Alpha)[0.3333]; (((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.3333]; ((Gamma,(Delta,(Epsilon,Beta))),Alpha)[0.0667]; (((Epsilon,Gamma),(Delta,Beta)),Alpha)[0.0667]; ((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.0667]; ((Epsilon,(Gamma,(Delta,Beta))),Alpha)[0.0667]; ((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.0667]; (((Delta,Gamma),(Epsilon,Beta)),Alpha)[0.0667]; (((Delta,(Epsilon,Gamma)),Beta),Alpha)[0.0667]; ((((Epsilon,Delta),Gamma),Beta),Alpha)[0.0667]; ((Epsilon,((Delta,Gamma),Beta)),Alpha)[0.0667]; (((Epsilon,(Delta,Gamma)),Beta),Alpha)[0.0667]; ((Delta,(Gamma,(Epsilon,Beta))),Alpha)[0.0667]; ((Delta,((Epsilon,Gamma),Beta)),Alpha)[0.0667]; (((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.0667]; ((Delta,(Epsilon,(Gamma,Beta))),Alpha)[0.0667]; ((Epsilon,(Delta,(Gamma,Beta))),Alpha)[0.0667]; Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fcontrast.txt0000664000175000017500000004345712171064331016146 00000000000000 fcontrast Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Continuous character contrasts Description Reads a tree from a tree file, and a data set with continuous characters data, and produces the independent contrasts for those characters, for use in any multivariate statistics package. Will also produce covariances, regressions and correlations between characters for those contrasts. Can also correct for within-species sampling variation when individual phenotypes are available within a population. Algorithm This program implements the contrasts calculation described in my 1985 paper on the comparative method (Felsenstein, 1985d). It reads in a data set of the standard quantitative characters sort, and also a tree from the treefile. It then forms the contrasts between species that, according to that tree, are statistically independent. This is done for each character. The contrasts are all standardized by branch lengths (actually, square roots of branch lengths). The method is explained in the 1985 paper. It assumes a Brownian motion model. This model was introduced by Edwards and Cavalli-Sforza (1964; Cavalli-Sforza and Edwards, 1967) as an approximation to the evolution of gene frequencies. I have discussed (Felsenstein, 1973b, 1981c, 1985d, 1988b) the difficulties inherent in using it as a model for the evolution of quantitative characters. Chief among these is that the characters do not necessarily evolve independently or at equal rates. This program allows one to evaluate this, if there is independent information on the phylogeny. You can compute the variance of the contrasts for each character, as a measure of the variance accumulating per unit branch length. You can also test covariances of characters. The statistics that are printed out include the covariances between all pairs of characters, the regressions of each character on each other (column j is regressed on row i), and the correlations between all pairs of characters. In assessing degress of freedom it is important to realize that each contrast was taken to have expectation zero, which is known because each contrast could as easily have been computed xi-xj instead of xj-xi. Thus there is no loss of a degree of freedom for estimation of a mean. The degrees of freedom is thus the same as the number of contrasts, namely one less than the number of species (tips). If you feed these contrasts into a multivariate statistics program make sure that it knows that each variable has expectation exactly zero. Within-species variation With the W option selected, CONTRAST analyzes data sets with variation within species, using a model like that proposed by Michael Lynch (1990). The method is described in vague terms in my book (Felsenstein, 2004, pp. 441). If you select the W option for within-species variation, the data set should have this structure (on the left are the data, on the right my comments: 10 5 number of species, number of characters Alpha 2 name of 1st species, # of individuals 2.01 5.3 1.5 -3.41 0.3 data for individual #1 1.98 4.3 2.1 -2.98 0.45 data for individual #2 Gammarus 3 name of 2nd species, # of individuals 6.57 3.1 2.0 -1.89 0.6 data for individual #1 7.62 3.4 1.9 -2.01 0.7 data for individual #2 6.02 3.0 1.9 -2.03 0.6 data for individual #3 ... (and so on) The covariances, correlations, and regressions for the "additive" (between-species evolutionary variation) and "environmental" (within-species phenotypic variation) are printed out (the maximum likelihood estimates of each). The program also estimates the within-species phenotypic variation in the case where the between-species evolutionary covariances are forced to be zero. The log-likelihoods of these two cases are compared and a likelihood ratio test (LRT) is carried out. The program prints the result of this test as a chi-square variate, and gives the number of degrees of freedom of the LRT. You have to look up the chi-square variable on a table of the chi-square distribution. The A option is available (if the W option is invoked) to allow you to turn off the doing of this test if you want to. The log-likelihood of the data under the models with and without between-species For the moment the program cannot handle the case where within-species variation is to be taken into account but where only species means are available. (It can handle cases where some species have only one member in their sample). We hope to fix this soon. We are also on our way to incorporating full-sib, half-sib, or clonal groups within species, so as to do one analysis for within-species genetic and between-species phylogenetic variation. The data set used as an example below is the example from a paper by Michael Lynch (1990), his characters having been log-transformed. In the case where there is only one specimen per species, Lynch's model is identical to our model of within-species variation (for multiple individuals per species it is not a subcase of his model). Usage Here is a sample session with fcontrast % fcontrast Continuous character contrasts Input file: contrast.dat Phylip tree file (optional): contrast.tree Phylip contrast program output file [contrast.fcontrast]: Output written to file "contrast.fcontrast" Done. Go to the input files for this example Go to the output files for this example Command line arguments Continuous character contrasts Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-infile] frequencies File containing one or more sets of data [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fcontrast] Phylip contrast program output file Additional (Optional) qualifiers (* if not always prompted): -varywithin boolean [N] Within-population variation in data * -[no]reg boolean [Y] Print out correlations and regressions * -writecont boolean [N] Print out contrasts * -[no]nophylo boolean [Y] LRT test of no phylogenetic component, with and without VarA -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fcontrast reads continuous character data. Continuous character data The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny. When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions: 1. Different lineages evolve independently. 2. After two lineages split, their characters change independently. 3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method). 4. Different loci or characters drift independently. How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c). The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option: 5 3 2 3 2 Alpha 0.90 0.80 0.10 0.56 Beta 0.72 0.54 0.30 0.20 Gamma 0.38 0.10 0.05 0.98 Delta 0.42 0.40 0.43 0.97 Epsilon 0.10 0.30 0.70 0.62 whereas here is what it would have to look like if the A option were invoked: 5 3 2 3 2 Alpha 0.90 0.10 0.80 0.10 0.10 0.56 0.44 Beta 0.72 0.28 0.54 0.30 0.16 0.20 0.80 Gamma 0.38 0.62 0.10 0.05 0.85 0.98 0.02 Delta 0.42 0.58 0.40 0.43 0.17 0.97 0.03 Epsilon 0.10 0.90 0.30 0.70 0.00 0.62 0.38 The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name). If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message. While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points. CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables. CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set. In the quantitative characters mode, a typical small data set would be: 5 6 Alpha 0.345 0.467 1.213 2.2 -1.2 1.0 Beta 0.457 0.444 1.1 1.987 -0.2 2.678 Gamma 0.6 0.12 0.97 2.3 -0.11 1.54 Delta 0.68 0.203 0.888 2.0 1.67 Epsilon 0.297 0.22 0.90 1.9 1.74 Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale. For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs. Input files for usage example File: contrast.dat 5 2 Homo 4.09434 4.74493 Pongo 3.61092 3.33220 Macaca 2.37024 3.36730 Ateles 2.02815 2.89037 Galago -1.46968 2.30259 File: contrast.tree ((((Homo:0.21,Pongo:0.21):0.28,Macaca:0.49):0.13,Ateles:0.62):0.38,Galago:1.00); Output file format fcontrast statistics that are printed out include the covariances between all pairs of characters, the regressions of each character on each other (column j is regressed on row i), and the correlations between all pairs of characters. In assessing degress of freedom it is important to realize that each contrast was taken to have expectation zero, which is known because each contrast could as easily have been computed xi-xj instead of xj-xi. Thus there is no loss of a degree of freedom for estimation of a mean. The degrees of freedom is thus the same as the number of contrasts, namely one less than the number of species (tips). If you feed these contrasts into a multivariate statistics program make sure that it knows that each variable has expectation exactly zero. With the W option selected, the covariances, correlations, and regressions for the "additive" (between-species evolutionary variation) and "environmental" (within-species phenotypic variation) are printed out (the maximum likelihood estimates of each). The program also estimates the within-species phenotypic variation in the case where the between-species evolutionary covariances are forced to be zero. The log-likelihoods of these two cases are compared and a likelihood ratio test (LRT) is carried out. The program prints the result of this test as a chi-square variate, and gives the number of degrees of freedom of the LRT. You have to look up the chi-square variable on a table of the chi-square distribution. The A option is available (if the W option is invoked) to allow you to turn off the doing of this test if you want to. Output files for usage example File: contrast.fcontrast Covariance matrix ---------- ------ 3.9423 1.7028 1.7028 1.7062 Regressions (columns on rows) ----------- -------- -- ----- 1.0000 0.4319 0.9980 1.0000 Correlations ------------ 1.0000 0.6566 0.6566 1.0000 Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description econtml Continuous character maximum likelihood method econtrast Continuous character contrasts Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/Makefile.am0000664000175000017500000000116011137346163015426 00000000000000pkgdata_DATA = fclique.txt \ fconsense.txt fcontml.txt fcontrast.txt \ fdiscboot.txt fdnacomp.txt fdnadist.txt fdnainvar.txt \ fdnaml.txt fdnamlk.txt fdnamove.txt fdnapars.txt fdnapenny.txt \ fdollop.txt fdolmove.txt fdolpenny.txt \ fdrawgram.txt fdrawtree.txt \ ffactor.txt ffitch.txt ffreqboot.txt \ fgendist.txt fkitsch.txt \ fmix.txt fmove.txt fneighbor.txt \ fpars.txt fpenny.txt fproml.txt fpromlk.txt \ fprotdist.txt fprotpars.txt \ frestboot.txt frestdist.txt frestml.txt fretree.txt \ fseqboot.txt fseqbootall.txt \ ftreedist.txt ftreedistpair.txt pkgdatadir=$(prefix)/share/EMBOSS/doc/programs/text PHYLIPNEW-3.69.650/emboss_doc/text/Makefile.in0000664000175000017500000003407212171071677015453 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = emboss_doc/text DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/EMBOSS/doc/programs/text ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ pkgdata_DATA = fclique.txt \ fconsense.txt fcontml.txt fcontrast.txt \ fdiscboot.txt fdnacomp.txt fdnadist.txt fdnainvar.txt \ fdnaml.txt fdnamlk.txt fdnamove.txt fdnapars.txt fdnapenny.txt \ fdollop.txt fdolmove.txt fdolpenny.txt \ fdrawgram.txt fdrawtree.txt \ ffactor.txt ffitch.txt ffreqboot.txt \ fgendist.txt fkitsch.txt \ fmix.txt fmove.txt fneighbor.txt \ fpars.txt fpenny.txt fproml.txt fpromlk.txt \ fprotdist.txt fprotpars.txt \ frestboot.txt frestdist.txt frestml.txt fretree.txt \ fseqboot.txt fseqbootall.txt \ ftreedist.txt ftreedistpair.txt all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_doc/text/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_doc/text/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_doc/text/fdnaml.txt0000664000175000017500000010641412171064331015375 00000000000000 fdnaml Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Estimate nucleotide phylogeny by maximum likelihood Description Estimates phylogenies from nucleotide sequences by maximum likelihood. The model employed allows for unequal expected frequencies of the four nucleotides, for unequal rates of transitions and transversions, and for different (prespecified) rates of change in different categories of sites, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites. Algorithm This program implements the maximum likelihood method for DNA sequences. The present version is faster than earlier versions of DNAML. Details of the algorithm are published in the paper by Felsenstein and Churchill (1996). The model of base substitution allows the expected frequencies of the four bases to be unequal, allows the expected frequencies of transitions and transversions to be unequal, and has several ways of allowing different rates of evolution at different sites. The assumptions of the present model are: 1. Each site in the sequence evolves independently. 2. Different lineages evolve independently. 3. Each site undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify. 4. All relevant sites are included in the sequence, not just those that have changed or those that are "phylogenetically informative". 5. A substitution consists of one of two sorts of events: 1. The first kind of event consists of the replacement of the existing base by a base drawn from a pool of purines or a pool of pyrimidines (depending on whether the base being replaced was a purine or a pyrimidine). It can lead either to no change or to a transition. 2. The second kind of event consists of the replacement of the existing base by a base drawn at random from a pool of bases at known frequencies, independently of the identity of the base which is being replaced. This could lead either to a no change, to a transition or to a transversion. The ratio of the two purines in the purine replacement pool is the same as their ratio in the overall pool, and similarly for the pyrimidines. The ratios of transitions to transversions can be set by the user. The substitution process can be diagrammed as follows: Suppose that you specified A, C, G, and T base frequencies of 0.24, 0.28, 0.27, and 0.21. o First kind of event: Determine whether the existing base is a purine or a pyrimidine. Draw from the proper pool: Purine pool: Pyrimidine pool: | | | | | 0.4706 A | | 0.5714 C | | 0.5294 G | | 0.4286 T | | (ratio is | | (ratio is | | 0.24 : 0.27) | | 0.28 : 0.21) | |_______________| |_______________| o Second kind of event: Draw from the overall pool: | | | 0.24 A | | 0.28 C | | 0.27 G | | 0.21 T | |__________________| Note that if the existing base is, say, an A, the first kind of event has a 0.4706 probability of "replacing" it by another A. The second kind of event has a 0.24 chance of replacing it by another A. This rather disconcerting model is used because it has nice mathematical properties that make likelihood calculations far easier. A closely similar, but not precisely identical model having different rates of transitions and transversions has been used by Hasegawa et. al. (1985b). The transition probability formulas for the current model were given (with my permission) by Kishino and Hasegawa (1989). Another explanation is available in the paper by Felsenstein and Churchill (1996). Note the assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those sites that had changed. This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different sites. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of sites all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant sites. The program computes the the likelihood by summing it over all possible assignments of rates to sites, weighting each by its prior probability of occurrence. For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a site having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive sites with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all sites to rate 2.4, or that fail to have consecutive sites that have the same rate. The Hidden Markov Model framework for rate variation among sites was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant sites. This feature effectively removes the artificial assumption that all sites have the same rate, and also means that we need not know in advance the identities of the sites that have a particular rate of evolution. Another layer of rate variation also is available. The user can assign categories of rates to each site (for example, we might want first, second, and third codon positions in a protein coding sequence to be three different categories. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of sites in the different categories. For example, we might specify that first, second, and third positions evolve at relative rates of 1.0, 0.8, and 2.7. If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a site is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation. Usage Here is a sample session with fdnaml % fdnaml -printdata -ncategories 2 -categories "1111112222222" -rate "1.0 2.0" - gammatype h -nhmmcategories 5 -hmmrates "0.264 1.413 3.596 7.086 12.641" -hmmpro babilities "0.522 0.399 0.076 0.0036 0.000023" -lambda 1.5 -weight "011111111111 0" Estimate nucleotide phylogeny by maximum likelihood Input (aligned) nucleotide sequence set(s): dnaml.dat Phylip tree file (optional): Phylip dnaml program output file [dnaml.fdnaml]: mulsets: false datasets : 1 rctgry : true gama : false invar : false numwts : 1 numseqs : 1 ctgry: true categs : 2 rcategs : 5 auto_: false freqsfrom : true global : false hypstate : false improve : false invar : false jumble : false njumble : 1 lngths : false lambda : 1.000000 cv : 1.000000 freqa : 0.000000 freqc : 0.000000 freqg : 0.000000 freqt : 0.000000 outgrno : 1 outgropt: false trout : true ttratio : 2.000000 ttr : false usertree : false weights: true printdata : true progress : true treeprint: true interleaved : false Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Output written to file "dnaml.fdnaml" Tree also written onto file "dnaml.treefile" Done. Go to the input files for this example Go to the output files for this example Example 2 % fdnaml -printdata -njumble 3 -seed 3 Estimate nucleotide phylogeny by maximum likelihood Input (aligned) nucleotide sequence set(s): dnaml.dat Phylip tree file (optional): Phylip dnaml program output file [dnaml.fdnaml]: mulsets: false datasets : 1 rctgry : false gama : false invar : false numwts : 0 numseqs : 1 ctgry: false categs : 1 rcategs : 1 auto_: false freqsfrom : true global : false hypstate : false improve : false invar : false jumble : true njumble : 3 lngths : false lambda : 1.000000 cv : 1.000000 freqa : 0.000000 freqc : 0.000000 freqg : 0.000000 freqt : 0.000000 outgrno : 1 outgropt: false trout : true ttratio : 2.000000 ttr : false usertree : false weights: false printdata : true progress : true treeprint: true interleaved : false Adding species: 1. Delta 2. Epsilon 3. Alpha 4. Beta 5. Gamma Adding species: 1. Beta 2. Epsilon 3. Delta 4. Alpha 5. Gamma Adding species: 1. Epsilon 2. Alpha 3. Gamma 4. Delta 5. Beta Output written to file "dnaml.fdnaml" Tree also written onto file "dnaml.treefile" Done. Go to the output files for this example Command line arguments Estimate nucleotide phylogeny by maximum likelihood Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-sequence] seqsetall File containing one or more sequence alignments [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.fdnaml] Phylip dnaml program output file Additional (Optional) qualifiers (* if not always prompted): -ncategories integer [1] Number of substitution rate categories (Integer from 1 to 9) * -rate array Rate for each category * -categories properties File of substitution rate categories -weights properties Weights file * -lengths boolean [N] Use branch lengths from user trees -ttratio float [2.0] Transition/transversion ratio (Number 0.001 or more) -[no]freqsfrom toggle [Y] Use empirical base frequencies from seqeunce input * -basefreq array [0.25 0.25 0.25 0.25] Base frequencies for A C G T/U (use blanks to separate) -gammatype menu [Constant rate] Rate variation among sites (Values: g (Gamma distributed rates); i (Gamma+invariant sites); h (User defined HMM of rates); n (Constant rate)) * -gammacoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ngammacat integer [1] Number of categories (1-9) (Integer from 1 to 9) * -invarcoefficient float [1] Coefficient of variation of substitution rate among sites (Number 0.001 or more) * -ninvarcat integer [1] Number of categories (1-9) including one for invariant sites (Integer from 1 to 9) * -invarfrac float [0.0] Fraction of invariant sites (Number from 0.000 to 1.000) * -nhmmcategories integer [1] Number of HMM rate categories (Integer from 1 to 9) * -hmmrates array [1.0] HMM category rates * -hmmprobabilities array [1.0] Probability for each HMM category * -adjsite boolean [N] Rates at adjacent sites correlated * -lambda float [1.0] Mean block length of sites having the same rate (Number 1.000 or more) * -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) * -global boolean [N] Global rearrangements -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]rough boolean [Y] Speedier but rougher analysis -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.fdnaml] Phylip tree output file (optional) -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree -hypstate boolean [N] Reconstruct hypothetical sequence Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-sequence" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdnaml reads any normal sequence USAs. Input files for usage example File: dnaml.dat 5 13 Alpha AACGTGGCCAAAT Beta AAGGTCGCCAAAC Gamma CATTTCGTCACAA Delta GGTATTTCGGCCT Epsilon GGGATCTCGGCCC Output file format fdnaml output starts by giving the number of species, the number of sites, and the base frequencies for A, C, G, and T that have been specified. It then prints out the transition/transversion ratio that was specified or used by default. It also uses the base frequencies to compute the actual transition/transversion ratio implied by the parameter. If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of sites is printed, as well as the probabilities of each of those rates. There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. Note that the trees printed out have a trifurcation at the base. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. A table is printed showing the length of each tree segment (in units of expected nucleotide substitutions per site), as well as (very) rough confidence limits on their lengths. If a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow. In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant. YOU ARE WARNED AGAINST TAKING IT TOO SERIOUSLY. If you want to get a better likelihood curve for a branch length you can do multiple runs with different prespecified lengths for that branch, as discussed above in the discussion of the L option. One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that). The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the expected transition/transversion ratio to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring sites (option A) and is not done in those cases. The branch lengths printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes. Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length. Note that versions 2.7 and earlier of this program printed out the branch lengths in terms of expected probability of change, so that they were scaled differently. Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14. At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what site categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each site which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead. Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file. Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). In that table, if a site has a base which accounts for more than 95% of the likelihood, it is printed in capital letters (A rather than a). If the best nucleotide accounts for less than 50% of the likelihood, the program prints out an ambiguity code (such as M for "A or C") for the set of nucleotides which, taken together, account for more half of the likelihood. The ambiguity codes are listed in the sequence programs documentation file. One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed nucleotides are based on only the single assignment of rates to sites which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates. Output files for usage example File: dnaml.fdnaml Nucleic acid sequence Maximum Likelihood method, version 3.69.650 5 species, 13 sites Site categories are: 1111112222 222 Sites are weighted as follows: 01111 11111 110 Name Sequences ---- --------- Alpha AACGTGGCCA AAT Beta AAGGTCGCCA AAC Gamma CATTTCGTCA CAA Delta GGTATTTCGG CCT Epsilon GGGATCTCGG CCC Empirical Base Frequencies: A 0.23636 C 0.29091 G 0.25455 T(U) 0.21818 Transition/transversion ratio = 2.000000 State in HMM Rate of change Probability 1 0.264 0.522 2 1.413 0.399 3 3.596 0.076 4 7.086 0.0036 5 12.641 0.000023 Site category Rate of change 1 1.000 2 2.000 +Epsilon +--------------------------------------------------------3 +--2 +-Delta | | | +Beta | 1------Gamma | +-Alpha remember: this is an unrooted tree! Ln Likelihood = -57.87892 Between And Length Approx. Confidence Limits ------- --- ------ ------- ---------- ------ 1 Alpha 0.26766 ( zero, 0.80513) * 1 2 0.04687 ( zero, 0.48388) 2 3 7.59821 ( zero, 22.01485) ** 3 Epsilon 0.00006 ( zero, 0.46205) 3 Delta 0.27319 ( zero, 0.73380) ** 2 Beta 0.00006 ( zero, 0.44052) 1 Gamma 0.95677 ( zero, 2.46186) ** * = significantly positive, P < 0.05 ** = significantly positive, P < 0.01 Combination of categories that contributes the most to the likelihood: 1132121111 211 Most probable category at each site if > 0.95 probability ("." otherwise) .......... ... File: dnaml.treefile (((Epsilon:0.00006,Delta:0.27319):7.59821,Beta:0.00006):0.04687, Gamma:0.95677,Alpha:0.26766); Output files for usage example 2 File: dnaml.fdnaml Nucleic acid sequence Maximum Likelihood method, version 3.69.650 5 species, 13 sites Name Sequences ---- --------- Alpha AACGTGGCCA AAT Beta AAGGTCGCCA AAC Gamma CATTTCGTCA CAA Delta GGTATTTCGG CCT Epsilon GGGATCTCGG CCC Empirical Base Frequencies: A 0.24615 C 0.29231 G 0.24615 T(U) 0.21538 Transition/transversion ratio = 2.000000 +Epsilon +--------------------------------------------1 +--2 +--------Delta | | | +Beta | 3------------------------------Gamma | +-----Alpha remember: this is an unrooted tree! Ln Likelihood = -72.25088 Between And Length Approx. Confidence Limits ------- --- ------ ------- ---------- ------ 3 Alpha 0.20745 ( zero, 0.56578) 3 2 0.09408 ( zero, 0.40912) 2 1 1.51296 ( zero, 3.31130) ** 1 Epsilon 0.00006 ( zero, 0.34299) 1 Delta 0.28137 ( zero, 0.62654) ** 2 Beta 0.00006 ( zero, 0.32900) 3 Gamma 1.01651 ( zero, 2.33178) ** * = significantly positive, P < 0.05 ** = significantly positive, P < 0.01 File: dnaml.treefile (((Epsilon:0.00006,Delta:0.28137):1.51296,Beta:0.00006):0.09408, Gamma:1.01651,Alpha:0.20745); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments frestml Restriction site maximum likelihood method fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/fdrawgram.txt0000664000175000017500000002633312171064331016107 00000000000000 fdrawgram Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Plots a cladogram- or phenogram-like rooted tree diagram Description Plots rooted phylogenies, cladograms, circular trees and phenograms in a wide variety of user-controllable formats. The program is interactive and allows previewing of the tree on PC, Macintosh, or X Windows screens, or on Tektronix or Digital graphics terminals. Final output can be to a file formatted for one of the drawing programs, for a ray-tracing or VRML browser, or one at can be sent to a laser printer (such as Postscript or PCL-compatible printers), on graphics screens or terminals, on pen plotters or on dot matrix printers capable of graphics. Similar to DRAWTREE but plots rooted phylogenies. Algorithm DRAWGRAM interactively plots a cladogram- or phenogram-like rooted tree diagram, with many options including orientation of tree and branches, style of tree, label sizes and angles, tree depth, margin sizes, stem lengths, and placement of nodes in the tree. Particularly if you can use your computer to preview the plot, you can very effectively adjust the details of the plotting to get just the kind of plot you want. To understand the working of DRAWGRAM and DRAWTREE, you should first read the Tree Drawing Programs web page in this documentation. As with DRAWTREE, to run DRAWGRAM you need a compiled copy of the program, a font file, and a tree file. The tree file has a default name of intree. The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Note that the program will get confused if the input tree file has the number of trees on the first line of the file, so that numbr may have to be removed. Usage Here is a sample session with fdrawgram % fdrawgram -previewer n Plots a cladogram- or phenogram-like rooted tree diagram Phylip tree file: drawgram.tree Phylip drawgram output file [drawgram.fdrawgram]: DRAWGRAM from PHYLIP version 3.69.650 Reading tree ... Tree has been read. Loading the font .... Font loaded. Writing plot file ... Plot written to file "drawgram.fdrawgram" Done. Go to the input files for this example Go to the output files for this example Command line arguments Plots a cladogram- or phenogram-like rooted tree diagram Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-intreefile] tree Phylip tree file [-plotfile] outfile [*.fdrawgram] Phylip drawgram output file Additional (Optional) qualifiers (* if not always prompted): -[no]grows boolean [Y] Tree grows horizontally -style menu [c] Tree style output (Values: c (cladogram (v-shaped)); p (phenogram (branches are square)); v (curvogram (branches are 1/4 out of an ellipse)); e (eurogram (branches angle outward, then up)); s (swooporam (branches curve outward then reverse)); o (circular tree)) -plotter menu [l] Plotter or printer the tree will be drawn on (Values: l (Postscript printer file format); m (PICT format (for drawing programs)); j (HP 75 DPI Laserjet PCL file format); s (HP 150 DPI Laserjet PCL file format); y (HP 300 DPI Laserjet PCL file format); w (MS-Windows Bitmap); f (FIG 2.0 drawing program format); a (Idraw drawing program format); z (VRML Virtual Reality Markup Language file); n (PCX 640x350 file format (for drawing programs)); p (PCX 800x600 file format (for drawing programs)); q (PCX 1024x768 file format (for drawing programs)); k (TeKtronix 4010 graphics terminal); x (X Bitmap format); v (POVRAY 3D rendering program file); r (Rayshade 3D rendering program file); h (Hewlett-Packard pen plotter (HPGL file format)); d (DEC ReGIS graphics (VT240 terminal)); e (Epson MX-80 dot-matrix printer); c (Prowriter/Imagewriter dot-matrix printer); t (Toshiba 24-pin dot-matrix printer); o (Okidata dot-matrix printer); b (Houston Instruments plotter); u (other (one you have inserted code for))) -previewer menu [x] Previewing device (Values: n (Will not be previewed); I i (MSDOS graphics screen m:Macintosh screens); x (X Windows display); w (MS Windows display); k (TeKtronix 4010 graphics terminal); d (DEC ReGIS graphics (VT240 terminal)); o (Other (one you have inserted code for))) -lengths boolean [N] Use branch lengths from user trees * -labelrotation float [90.0] Angle of labels (0 degrees is horizontal for a tree growing vertically) (Number from 0.000 to 360.000) -[no]rescaled toggle [Y] Automatically rescale branch lengths * -bscale float [1.0] Centimeters per unit branch length (Any numeric value) -treedepth float [0.53] Depth of tree as fraction of its breadth (Number from 0.100 to 100.000) -stemlength float [0.05] Stem length as fraction of tree depth (Number from 0.010 to 100.000) -nodespace float [0.3333] Character height as fraction of tip spacing (Number from 0.100 to 100.000) -nodeposition menu [c] Position of interior nodes (Values: i (Intermediate between their immediate descendants); w (Weighted average of tip positions); c (Centered among their ultimate descendants); n (Innermost of immediate descendants); v (So tree is v shaped)) * -xmargin float [1.65] Horizontal margin (cm) (Number 0.100 or more) * -ymargin float [2.16] Vertical margin (cm) (Number 0.100 or more) * -xrayshade float [1.65] Horizontal margin (pixels) for Rayshade output (Number 0.100 or more) * -yrayshade float [2.16] Vertical margin (pixels) for Rayshade output (Number 0.100 or more) -paperx float [20.63750] Paper width (Any numeric value) -papery float [26.98750] Paper height (Number 0.100 or more) -pagesheight float [1] Number of trees across height of page (Number 1.000 or more) -pageswidth float [1] Number of trees across width of page (Number 1.000 or more) -hpmargin float [0.41275] Horizontal overlap (cm) (Number 0.001 or more) -vpmargin float [0.53975] Vertical overlap (cm) (Number 0.001 or more) Advanced (Unprompted) qualifiers: -fontfile string [font1] Fontfile name (Any string) Associated qualifiers: "-plotfile" associated qualifiers -odirectory2 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format fdrawgram reads ... Input files for usage example File: drawgram.tree (Delta,(Epsilon,(Gamma,(Beta,Alpha)))); Output file format fdrawgram output ... Output files for usage example Graphics File: drawgram.fdrawgram [fdrawgram results] Data files The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description fdrawtree Plots an unrooted tree diagram fretree Interactive tree rearrangement Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/frestml.txt0000664000175000017500000003174412171064331015613 00000000000000 frestml Wiki The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki. Please help by correcting and extending the Wiki pages. Function Restriction site maximum likelihood method Description Estimation of phylogenies by maximum likelihood using restriction sites data (not restriction fragments but presence/absence of individual sites). It employs the Jukes-Cantor symmetrical model of nucleotide change, which does not allow for differences of rate between transitions and transversions. This program is very slow. Algorithm This program implements a maximum likelihood method for restriction sites data (not restriction fragment data). This program is one of the slowest programs in this package, and can be very tedious to run. It is possible to have the program search for the maximum likelihood tree. It will be more practical for some users (those that do not have fast machines) to use the U (User Tree) option, which takes less run time, optimizing branch lengths and computing likelihoods for particular tree topologies suggested by the user. The model used here is essentially identical to that used by Smouse and Li (1987) who give explicit expressions for computing the likelihood for three-species trees. It does not place prior probabilities on trees as they do. The present program extends their approach to multiple species by a technique which, while it does not give explicit expressions for likelihoods, does enable their computation and the iterative improvement of branch lengths. It also allows for multiple restriction enzymes. The algorithm has been described in a paper (Felsenstein, 1992). Another relevant paper is that of DeBry and Slade (1985). The assumptions of the present model are: 1. Each restriction site evolves independently. 2. Different lineages evolve independently. 3. Each site undergoes substitution at an expected rate which we specify. 4. Substitutions consist of replacement of a nucleotide by one of the other three nucleotides, chosen at random. Note that if the existing base is, say, an A, the chance of it being replaced by a G is 1/3, and so is the chance that it is replaced by a T. This means that there can be no difference in the (expected) rate of transitions and transversions. Users who are upset at this might ponder the fact that a version allowing different rates of transitions and transversions would run an estimated 16 times slower. If it also allowed for unequal frequencies of the four bases, it would run about 300,000 times slower! For the moment, until a better method is available, I guess I'll stick with this one! Subject to these assumptions, the program is an approximately correct maximum likelihood method. Usage Here is a sample session with frestml % frestml Restriction site maximum likelihood method Input file: restml.dat Phylip tree file (optional): Phylip restml program output file [restml.frestml]: numseqs: 1 Adding species: 1. Alpha 2. Beta 3. Gamma 4. Delta 5. Epsilon Output written to file "restml.frestml" Tree also written onto file "restml.treefile" Done. Go to the input files for this example Go to the output files for this example Command line arguments Restriction site maximum likelihood method Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-data] discretestates File containing one or more sets of restriction data [-intreefile] tree Phylip tree file (optional) [-outfile] outfile [*.frestml] Phylip restml program output file Additional (Optional) qualifiers (* if not always prompted): -weights properties Weights file -njumble integer [0] Number of times to randomise (Integer 0 or more) * -seed integer [1] Random number seed between 1 and 32767 (must be odd) (Integer from 1 to 32767) -outgrno integer [0] Species number to use as outgroup (Integer 0 or more) -[no]allsites boolean [Y] All sites detected * -lengths boolean [N] Use lengths from user trees -sitelength integer [6] Site length (Integer from 1 to 8) * -global boolean [N] Global rearrangements * -[no]rough boolean [Y] Speedier but rougher analysis -[no]trout toggle [Y] Write out trees to tree file * -outtreefile outfile [*.frestml] Phylip tree output file -printdata boolean [N] Print data at start of run -[no]progress boolean [Y] Print indications of progress of run -[no]treeprint boolean [Y] Print out tree Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-outfile" associated qualifiers -odirectory3 string Output directory "-outtreefile" associated qualifiers -odirectory string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit Input file format frestml input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites, but there is also a third number, which is the number of different restriction enzymes that were used to detect the restriction sites. Thus a data set with 10 species and 35 different sites, representing digestion with 4 different enzymes, would have the first line of the data file look like this: 10 35 4 The first line of the data file will also contain a letter W following these numbers (and separated from them by a space) if the Weights option is being used. As with all programs using the weights option, a line or lines must then follow, before the data, with the weights for each site. The site data are in standard form. Each species starts with a species name whose maximum length is given by the constant "nmlngth" (whose value in the program as distributed is 10 characters). The name should, as usual, be padded out to that length with blanks if necessary. The sites data then follows, one character per site (any blanks will be skipped and ignored). Like the DNA and protein sequence data, the restriction sites data may be either in the "interleaved" form or the "sequential" form. Note that if you are analyzing restriction sites data with the programs DOLLOP or MIX or other discrete character programs, at the moment those programs do not use the "aligned" or "interleaved" data format. Therefore you may want to avoid that format when you have restriction sites data that you will want to feed into those programs. The presence of a site is indicated by a "+" and the absence by a "-". I have also allowed the use of "1" and "0" as synonyms for "+" and "-", for compatibility with MIX and DOLLOP which do not allow "+" and "-". If the presence of the site is unknown (for example, if the DNA containing it has been deleted so that one does not know whether it would have contained the site) then the state "?" can be used to indicate that the state of this site is unknown. User-defined trees may follow the data in the usual way. The trees must be unrooted, which means that at their base they must have a trifurcation. Input files for usage example File: restml.dat 5 13 2 Alpha ++-+-++--+++- Beta ++++--+--+++- Gamma -+--+-++-+-++ Delta ++-+----++--- Epsilon ++++----++--- Output file format frestml outputs a graph to the specified graphics device. outputs a report format file. The default format is ... Output files for usage example File: restml.frestml Restriction site Maximum Likelihood method, version 3.69.650 Recognition sequences all 6 bases long Sites absent from all species are assumed to have been omitted +----Gamma | | +Beta 1--2 | | +Epsilon | +--3 | +Delta | +Alpha remember: this is an unrooted tree! Ln Likelihood = -40.47082 Between And Length Approx. Confidence Limits ------- --- ------ ------- ---------- ------ 1 Gamma 0.10794 ( 0.01144, 0.21872) ** 1 2 0.01244 ( zero, 0.04712) 2 Beta 0.00100 ( zero, infinity) 2 3 0.05878 ( zero, 0.12675) ** 3 Epsilon 0.00022 ( zero, infinity) 3 Delta 0.01451 ( zero, 0.04459) ** 1 Alpha 0.01244 ( zero, 0.04717) * = significantly positive, P < 0.05 ** = significantly positive, P < 0.01 File: restml.treefile (Gamma:0.10794,(Beta:0.00100,(Epsilon:0.00022, Delta:0.01451):0.05878):0.01244,Alpha:0.01244); Data files None Notes None. References None. Warnings None. Diagnostic Error Messages None. Exit status It always exits with status 0. Known bugs None. See also Program name Description distmat Create a distance matrix from a multiple sequence alignment ednacomp DNA compatibility algorithm ednadist Nucleic acid sequence distance matrix program ednainvar Nucleic acid sequence invariants method ednaml Phylogenies from nucleic acid maximum likelihood ednamlk Phylogenies from nucleic acid maximum likelihood with clock ednapars DNA parsimony algorithm ednapenny Penny algorithm for DNA eprotdist Protein distance algorithm eprotpars Protein parsimony algorithm erestml Restriction site maximum likelihood method eseqboot Bootstrapped sequences algorithm fdiscboot Bootstrapped discrete sites algorithm fdnacomp DNA compatibility algorithm fdnadist Nucleic acid sequence distance matrix program fdnainvar Nucleic acid sequence invariants method fdnaml Estimate nucleotide phylogeny by maximum likelihood fdnamlk Estimates nucleotide phylogeny by maximum likelihood fdnamove Interactive DNA parsimony fdnapars DNA parsimony algorithm fdnapenny Penny algorithm for DNA fdolmove Interactive Dollo or polymorphism parsimony ffreqboot Bootstrapped genetic frequencies algorithm fproml Protein phylogeny by maximum likelihood fpromlk Protein phylogeny by maximum likelihood fprotdist Protein distance algorithm fprotpars Protein parsimony algorithm frestboot Bootstrapped restriction sites algorithm frestdist Calculate distance matrix from restriction sites or fragments fseqboot Bootstrapped sequences algorithm fseqbootall Bootstrapped sequences algorithm Author(s) This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package. Please report all bugs to the EMBOSS bug team (emboss-bug (c) emboss.open-bio.org) not to the original author. History Written (2004) - Joe Felsenstein, University of Washington. Converted (August 2004) to an EMBASSY program by the EMBOSS team. Target users This program is intended to be used by everyone and everything, from naive users to embedded scripts. Comments None PHYLIPNEW-3.69.650/emboss_doc/text/.cvsignore0000664000175000017500000000002511326104506015362 00000000000000Makefile.in Makefile PHYLIPNEW-3.69.650/emboss_doc/html/0002775000175000017500000000000012171071711013427 500000000000000PHYLIPNEW-3.69.650/emboss_doc/html/CVS/0002775000175000017500000000000012171064331014062 500000000000000PHYLIPNEW-3.69.650/emboss_doc/html/CVS/Entries0000664000175000017500000000414112171064331015334 00000000000000/.cvsignore/1.1/Thu Jan 21 17:06:46 2010/-kk/ /Makefile.am/1.2/Mon Jan 26 14:45:56 2009// /index.html/1.5/Wed Aug 3 11:37:09 2011// /fclique.html/1.24/Mon Jul 15 21:25:45 2013// /fconsense.html/1.25/Mon Jul 15 21:25:45 2013// /fcontml.html/1.27/Mon Jul 15 21:25:45 2013// /fcontrast.html/1.27/Mon Jul 15 21:25:45 2013// /fdiscboot.html/1.24/Mon Jul 15 21:25:45 2013// /fdnacomp.html/1.30/Mon Jul 15 21:25:45 2013// /fdnadist.html/1.30/Mon Jul 15 21:25:45 2013// /fdnainvar.html/1.30/Mon Jul 15 21:25:45 2013// /fdnaml.html/1.31/Mon Jul 15 21:25:45 2013// /fdnamlk.html/1.31/Mon Jul 15 21:25:45 2013// /fdnamove.html/1.28/Mon Jul 15 21:25:45 2013// /fdnapars.html/1.30/Mon Jul 15 21:25:45 2013// /fdnapenny.html/1.30/Mon Jul 15 21:25:45 2013// /fdollop.html/1.24/Mon Jul 15 21:25:45 2013// /fdolmove.html/1.27/Mon Jul 15 21:25:45 2013// /fdolpenny.html/1.25/Mon Jul 15 21:25:45 2013// /fdrawgram.1.drawgram.fdrawgram.gif/1.4/Mon Jul 15 21:25:45 2013// /fdrawgram.html/1.25/Mon Jul 15 21:25:45 2013// /fdrawtree.1.drawgram.fdrawtree.gif/1.4/Mon Jul 15 21:25:45 2013// /fdrawtree.html/1.25/Mon Jul 15 21:25:45 2013// /ffactor.html/1.22/Mon Jul 15 21:25:45 2013// /ffitch.html/1.26/Mon Jul 15 21:25:45 2013// /ffreqboot.html/1.25/Mon Jul 15 21:25:45 2013// /fgendist.html/1.25/Mon Jul 15 21:25:45 2013// /fkitsch.html/1.26/Mon Jul 15 21:25:45 2013// /fmix.html/1.24/Mon Jul 15 21:25:45 2013// /fmove.html/1.23/Mon Jul 15 21:25:45 2013// /fneighbor.html/1.26/Mon Jul 15 21:25:45 2013// /fpars.html/1.25/Mon Jul 15 21:25:45 2013// /fpenny.html/1.24/Mon Jul 15 21:25:45 2013// /fproml.html/1.30/Mon Jul 15 21:25:45 2013// /fpromlk.html/1.30/Mon Jul 15 21:25:45 2013// /fprotdist.html/1.30/Mon Jul 15 21:25:45 2013// /fprotpars.html/1.30/Mon Jul 15 21:25:45 2013// /frestboot.html/1.25/Mon Jul 15 21:25:45 2013// /frestdist.html/1.28/Mon Jul 15 21:25:45 2013// /frestml.html/1.27/Mon Jul 15 21:25:45 2013// /fretree.html/1.24/Mon Jul 15 21:25:45 2013// /fseqboot.html/1.27/Mon Jul 15 21:25:45 2013// /fseqbootall.html/1.28/Mon Jul 15 21:25:45 2013// /ftreedist.html/1.28/Mon Jul 15 21:25:45 2013// /ftreedistpair.html/1.28/Mon Jul 15 21:25:45 2013// D PHYLIPNEW-3.69.650/emboss_doc/html/CVS/Root0000664000175000017500000000005612000651523014643 00000000000000rice@dev.open-bio.org:/home/repository/emboss PHYLIPNEW-3.69.650/emboss_doc/html/CVS/Repository0000664000175000017500000000006012000651523016072 00000000000000emboss/emboss/embassy/phylipnew/emboss_doc/html PHYLIPNEW-3.69.650/emboss_doc/html/ffitch.html0000664000175000017500000005320312171064331015501 00000000000000 EMBOSS: ffitch
ffitch

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Fitch-Margoliash and least-squares distance methods

Description

Estimates phylogenies from distance matrix data under the "additive tree model" according to which the distances are expected to equal the sums of branch lengths between the species. Uses the Fitch-Margoliash criterion and some related least squares criteria, or the Minimum Evolution distance matrix method. Does not assume an evolutionary clock. This program will be useful with distances computed from molecular sequences, restriction sites or fragments distances, with DNA hybridization measurements, and with genetic distances computed from gene frequencies.

Algorithm

The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that:
  • Each distance is measured independently from the others: no item of data contributes to more than one distance.
  • The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation.

These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious.

THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b).

For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better.

Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST.

For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true).

FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time.

Usage

Here is a sample session with ffitch


% ffitch 
Fitch-Margoliash and least-squares distance methods
Phylip distance matrix file: fitch.dat
Phylip tree file (optional): 
Phylip fitch program output file [fitch.ffitch]: 

Adding species:
   1. Bovine    
   2. Mouse     
   3. Gibbon    
   4. Orang     
   5. Gorilla   
   6. Chimp     
   7. Human     

Output written to file "fitch.ffitch"

Tree also written onto file "fitch.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Fitch-Margoliash and least-squares distance methods
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-datafile]          distances  File containing one or more distance
                                  matrices
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.ffitch] Phylip fitch program output file

   Additional (Optional) qualifiers (* if not always prompted):
   -matrixtype         menu       [s] Type of input data matrix (Values: s
                                  (Square); u (Upper triangular); l (Lower
                                  triangular))
   -minev              boolean    [N] Minimum evolution
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -power              float      [2.0] Power (Any numeric value)
*  -lengths            boolean    [N] Use branch lengths from user trees
*  -negallowed         boolean    [N] Negative branch lengths allowed
*  -global             boolean    [N] Global rearrangements
   -replicates         boolean    [N] Subreplicates
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.ffitch] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-datafile]
(Parameter 1)
distances File containing one or more distance matrices Distance matrix  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip fitch program output file Output file <*>.ffitch
Additional (Optional) qualifiers
-matrixtype list Type of input data matrix
s (Square)
u (Upper triangular)
l (Lower triangular)
s
-minev boolean Minimum evolution Boolean value Yes/No No
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-power float Power Any numeric value 2.0
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-negallowed boolean Negative branch lengths allowed Boolean value Yes/No No
-global boolean Global rearrangements Boolean value Yes/No No
-replicates boolean Subreplicates Boolean value Yes/No No
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.ffitch
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

ffitch reads any normal sequence USAs.

Input files for usage example

File: fitch.dat

    7
Bovine      0.0000  1.6866  1.7198  1.6606  1.5243  1.6043  1.5905
Mouse       1.6866  0.0000  1.5232  1.4841  1.4465  1.4389  1.4629
Gibbon      1.7198  1.5232  0.0000  0.7115  0.5958  0.6179  0.5583
Orang       1.6606  1.4841  0.7115  0.0000  0.4631  0.5061  0.4710
Gorilla     1.5243  1.4465  0.5958  0.4631  0.0000  0.3484  0.3083
Chimp       1.6043  1.4389  0.6179  0.5061  0.3484  0.0000  0.2692
Human       1.5905  1.4629  0.5583  0.4710  0.3083  0.2692  0.0000

Output file format

ffitch output consists of an unrooted tree and the lengths of the interior segments. The sum of squares is printed out, and if P = 2.0 Fitch and Margoliash's "average percent standard deviation" is also computed and printed out. This is the sum of squares, divided by N-2, and then square-rooted and then multiplied by 100 (n is the number of species on the tree):

     APSD = ( SSQ / (N-2) )1/2 x 100. 

where N is the total number of off-diagonal distance measurements that are in the (square) distance matrix. If the S (subreplication) option is in force it is instead the sum of the numbers of replicates in all the non-diagonal cells of the distance matrix. But if the L or R option is also in effect, so that the distance matrix read in is lower- or upper-triangular, then the sum of replicates is only over those cells actually read in. If S is not in force, the number of replicates in each cell is assumed to be 1, so that N is n(n-1), where n is the number of species. The APSD gives an indication of the average percentage error. The number of trees examined is also printed out.

Output files for usage example

File: fitch.ffitch


   7 Populations

Fitch-Margoliash method version 3.69.650

                  __ __             2
                  \  \   (Obs - Exp)
Sum of squares =  /_ /_  ------------
                                2
                   i  j      Obs

Negative branch lengths not allowed


  +---------------------------------------------Mouse     
  ! 
  !                                +------Human     
  !                             +--5 
  !                           +-4  +--------Chimp     
  !                           ! ! 
  !                        +--3 +---------Gorilla   
  !                        !  ! 
  1------------------------2  +-----------------Orang     
  !                        ! 
  !                        +---------------------Gibbon    
  ! 
  +------------------------------------------------------Bovine    


remember: this is an unrooted tree!

Sum of squares =     0.01375

Average percent standard deviation =     1.85418

Between        And            Length
-------        ---            ------
   1          Mouse             0.76985
   1             2              0.41983
   2             3              0.04986
   3             4              0.02121
   4             5              0.03695
   5          Human             0.11449
   5          Chimp             0.15471
   4          Gorilla           0.15680
   3          Orang             0.29209
   2          Gibbon            0.35537
   1          Bovine            0.91675


File: fitch.treefile

(Mouse:0.76985,((((Human:0.11449,Chimp:0.15471):0.03695,
Gorilla:0.15680):0.02121,Orang:0.29209):0.04986,Gibbon:0.35537):0.41983,Bovine:0.91675);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
efitch Fitch-Margoliash and least-squares distance methods
ekitsch Fitch-Margoliash method with contemporary tips
eneighbor Phylogenies from distance matrix by N-J or UPGMA method
fkitsch Fitch-Margoliash method with contemporary tips
fneighbor Phylogenies from distance matrix by N-J or UPGMA method

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fpenny.html0000664000175000017500000010725012171064331015537 00000000000000 EMBOSS: fpenny
fpenny

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Penny algorithm, branch-and-bound

Description

Finds all most parsimonious phylogenies for discrete-character data with two states, for the Wagner, Camin-Sokal, and mixed parsimony criteria using the branch-and-bound method of exact search. May be impractical (depending on the data) for more than 10-11 species.

Algorithm

PENNY is a program that will find all of the most parsimonious trees implied by your data. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion).

There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW.

The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times).

The Algorithm

The search strategy used by PENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of steps. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated.

Actually the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. Its number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups):

     Make tree of first two species     (A,B)
          Add C in first place     ((A,B),C)
               Add D in first place     (((A,D),B),C)
               Add D in second place     ((A,(B,D)),C)
               Add D in third place     (((A,B),D),C)
               Add D in fourth place     ((A,B),(C,D))
               Add D in fifth place     (((A,B),C),D)
          Add C in second place: ((A,C),B)
               Add D in first place     (((A,D),C),B)
               Add D in second place     ((A,(C,D)),B)
               Add D in third place     (((A,C),D),B)
               Add D in fourth place     ((A,C),(B,D))
               Add D in fifth place     (((A,C),B),D)
          Add C in third place     (A,(B,C))
               Add D in first place     ((A,D),(B,C))
               Add D in second place     (A,((B,D),C))
               Add D in third place     (A,(B,(C,D)))
               Add D in fourth place     (A,((B,C),D))
               Add D in fifth place     ((A,(B,C)),D)

Among these fifteen trees you will find all of the four-species rooted bifurcating trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of steps is evaluated. In addition a prediction is made as to how many steps will be added, at a minimum, as further species are added.

This is done by counting how many binary characters which are invariant in the data up the species most recently added will ultimately show variation when further species are added. Thus if 20 characters vary among species A, B, and C and their root, and if tree ((A,C),B) requires 24 steps, then if there are 8 more characters which will be seen to vary when species D is added, we can immediately say that no matter how we add D, the resulting tree can have no less than 24 + 8 = 32 steps. The point of all this is that if a previously-found tree such as ((A,B),(C,D)) required only 30 steps, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther.

The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time.

The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible.

The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer steps than these, it clears out the list and restarts the list with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems.

Controlling Run Times

Among the quantities available to be set at the beginning of a run of PENNY, two (howoften and howmany) are of particular importance. As PENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 1000, the default settings. Every time 100 trees have been examined, PENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far.

When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften times howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to wait for. In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften and nothing will be printed out except some headings and then the final trees.

The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate.

A constant that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if "maxtrees" is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If "maxtrees" is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that "maxtrees" be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, "maxtrees" is set to 100 in the distribution copy.

Methods and Options

The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in MIX, and thus the remainder of this document will be nearly identical to the MIX document. MIX is a general parsimony program which carries out the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified. The program defaults to carrying out Wagner parsimony.

The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin-Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969).

Here are the assumptions of these two methods:

  1. Ancestral states are known (Camin-Sokal) or unknown (Wagner).
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner).
  5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Usage

Here is a sample session with fpenny


% fpenny 
Penny algorithm, branch-and-bound
Phylip character discrete states file: penny.dat
Phylip penny program output file [penny.fpenny]: 


How many
trees looked                                       Approximate
at so far      Length of        How many           percentage
(multiples     shortest tree    trees this long    searched
of  100):      found so far     found so far       so far
----------     ------------     ------------       ------------
     1           8.00000                1                6.67
     2           8.00000                3               20.00
     3           8.00000                3               53.33
     4           8.00000                3               93.33

Output written to file "penny.fpenny"

Trees also written onto file "penny.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Penny algorithm, branch-and-bound
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing one or more data sets
  [-outfile]           outfile    [*.fpenny] Phylip penny program output file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Phylip weights file (optional)
   -ancfile            properties Phylip ancestral states file (optional)
   -mixfile            properties Phylip mix output file (optional)
   -method             menu       [Wagner] Choose the method to use (Values:
                                  Wag (Wagner); Cam (Camin-Sokal); Mix
                                  (Mixed))
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -howmany            integer    [1000] How many groups of trees (Any integer
                                  value)
   -howoften           integer    [100] How often to report, in trees (Any
                                  integer value)
   -simple             boolean    Branch and bound is simple
   -threshold          float      [$(infile.discretesize)] Threshold value
                                  (Number 1.000 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fpenny] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -stepbox            boolean    [N] Print out steps in each site
   -ancseq             boolean    [N] Print states at all nodes of tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing one or more data sets Discrete states file  
[-outfile]
(Parameter 2)
outfile Phylip penny program output file Output file <*>.fpenny
Additional (Optional) qualifiers
-weights properties Phylip weights file (optional) Property value(s)  
-ancfile properties Phylip ancestral states file (optional) Property value(s)  
-mixfile properties Phylip mix output file (optional) Property value(s)  
-method list Choose the method to use
Wag (Wagner)
Cam (Camin-Sokal)
Mix (Mixed)
Wagner
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-howmany integer How many groups of trees Any integer value 1000
-howoften integer How often to report, in trees Any integer value 100
-simple boolean Branch and bound is simple Boolean value Yes/No No
-threshold float Threshold value Number 1.000 or more $(infile.discretesize)
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fpenny
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-stepbox boolean Print out steps in each site Boolean value Yes/No No
-ancseq boolean Print states at all nodes of tree Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fpenny reads discrste character data.

(0,1) Discrete character data

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

Input files for usage example

File: penny.dat

    7    6
Alpha1    110110
Alpha2    110110
Beta1     110000
Beta2     110000
Gamma1    100110
Delta     001001
Epsilon   001110

Output file format

fpenny output is standard: a set of trees, which will be printed as rooted or unrooted depending on which is appropriate, and if the user elects to see them, tables of the number of changes of state required in each character. If the Wagner option is in force for a character, it may not be possible to unambiguously locate the places on the tree where the changes occur, as there may be multiple possibilities. A table is available to be printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the Camin-Sokal parsimony method (option C or S) is invoked and the A option is also used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest state changes. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use MOVE to display the tree and examine its interior states, as the algorithm in MOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in PENNY gives up more easily on displaying these states.

If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will be printed out showing the best guesses of which are the ancestral states in each character. You will find it useful to understand the difference between the Camin-Sokal parsimony criterion with unknown ancestral state and the Wagner parsimony criterion.

If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: penny.fpenny


Penny algorithm, version 3.69.650
 branch-and-bound to find all most parsimonious trees

Wagner parsimony method

                     


requires a total of              8.000

    3 trees in all found




  +-----------------Alpha1    
  !  
  !        +--------Alpha2    
--1        !  
  !  +-----4     +--Epsilon   
  !  !     !  +--6  
  !  !     +--5  +--Delta     
  +--2        !  
     !        +-----Gamma1    
     !  
     !           +--Beta2     
     +-----------3  
                 +--Beta1     

  remember: this is an unrooted tree!




  +-----------------Alpha1    
  !  
--1  +--------------Alpha2    
  !  !  
  !  !           +--Epsilon   
  +--2        +--6  
     !  +-----5  +--Delta     
     !  !     !  
     +--4     +-----Gamma1    
        !  
        !        +--Beta2     
        +--------3  
                 +--Beta1     

  remember: this is an unrooted tree!




  +-----------------Alpha1    
  !  
  !           +-----Alpha2    
--1  +--------2  
  !  !        !  +--Beta2     
  !  !        +--3  
  +--4           +--Beta1     
     !  
     !           +--Epsilon   
     !        +--6  
     +--------5  +--Delta     
              !  
              +-----Gamma1    

  remember: this is an unrooted tree!

File: penny.treefile

(Alpha1,((Alpha2,((Epsilon,Delta),Gamma1)),(Beta2,Beta1)))[0.3333];
(Alpha1,(Alpha2,(((Epsilon,Delta),Gamma1),(Beta2,Beta1))))[0.3333];
(Alpha1,((Alpha2,(Beta2,Beta1)),((Epsilon,Delta),Gamma1)))[0.3333];

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdollop Dollo and polymorphism parsimony algorithm
fdolpenny Penny algorithm Dollo or polymorphism
ffactor Multistate to binary recoding program
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fpars Discrete character parsimony

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdrawtree.html0000664000175000017500000005477512171064331016240 00000000000000 EMBOSS: fdrawtree
fdrawtree

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Plots an unrooted tree diagram

Description

Plots unrooted phylogenies, cladograms, circular trees and phenograms in a wide variety of user-controllable formats. The program is interactive and allows previewing of the tree on PC, Macintosh, or X Windows screens, or on Tektronix or Digital graphics terminals. Final output can be to a file formatted for one of the drawing programs, for a ray-tracing or VRML browser, or one at can be sent to a laser printer (such as Postscript or PCL-compatible printers), on graphics screens or terminals, on pen plotters or on dot matrix printers capable of graphics.

Similar to DRAWGRAM but plots unrooted phylogenies.

Algorithm

DRAWTREE interactively plots an unrooted tree diagram, with many options including orientation of tree and branches, label sizes and angles, margin sizes. Particularly if you can use your computer screen to preview the plot, you can very effectively adjust the details of the plotting to get just the kind of plot you want.

To understand the working of DRAWGRAM and DRAWTREE, you should first read the Tree Drawing Programs web page in this documentation.

As with DRAWGRAM, to run DRAWTREE you need a compiled copy of the program, a font file, and a tree file. The tree file has a default name of intree. The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Note that the program will get confused if the input tree file has the number of trees on the first line of the file, so that number may have to be removed.

Usage

Here is a sample session with fdrawtree


% fdrawtree -previewer n 
Plots an unrooted tree diagram
Phylip tree file: drawgram.tree
Phylip drawtree output file [drawgram.fdrawtree]: 

DRAWTREE from PHYLIP version 3.69.650
Reading tree ... 
Tree has been read.
Loading the font ... 
Font loaded.

Writing plot file ...

Plot written to file "drawgram.fdrawtree"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Plots an unrooted tree diagram
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-intreefile]        tree       Phylip tree file
  [-plotfile]          outfile    [*.fdrawtree] Phylip drawtree output file

   Additional (Optional) qualifiers (* if not always prompted):
   -plotter            menu       [l] Plotter or printer the tree will be
                                  drawn on (Values: l (Postscript printer file
                                  format); m (PICT format (for drawing
                                  programs)); j (HP Laserjet 75 dpi PCL file
                                  format); s (HP Laserjet 150 dpi PCL file
                                  format); y (HP Laserjet 300 dpi PCL file
                                  format); w (MS-Windows Bitmap); f (FIG 2.0
                                  drawing program format); a (Idraw drawing
                                  program format); z (VRML Virtual Reality
                                  Markup Language file); n (PCX 640x350 file
                                  format (for drawing programs)); p (PCX
                                  800x600 file format (for drawing programs));
                                  q (PCX 1024x768 file format (for drawing
                                  programs)); k (TeKtronix 4010 graphics
                                  terminal); x (X Bitmap format); v (POVRAY 3D
                                  rendering program file); r (Rayshade 3D
                                  rendering program file); h (Hewlett-Packard
                                  pen plotter (HPGL file format)); d (DEC
                                  ReGIS graphics (VT240 terminal)); e (Epson
                                  MX-80 dot-matrix printer); c
                                  (Prowriter/Imagewriter dot-matrix printer);
                                  t (Toshiba 24-pin dot-matrix printer); o
                                  (Okidata dot-matrix printer); b (Houston
                                  Instruments plotter); u (other (one you have
                                  inserted code for)))
   -previewer          menu       [x] Previewing device (Values: n (Will not
                                  be previewed); I i (MSDOS graphics screen
                                  m:Macintosh screens); x (X Windows display);
                                  w (MS Windows display); k (TeKtronix 4010
                                  graphics terminal); d (DEC ReGIS graphics
                                  (VT240 terminal)); o (Other (one you have
                                  inserted code for)))
   -iterate            menu       [e] Iterate to improve tree (Values: n (No);
                                  e (Equal-Daylight algorithm); b (n-Body
                                  algorithm))
   -lengths            boolean    [N] Use branch lengths from user trees
   -labeldirection     menu       [m] Label direction (Values: a (along); f
                                  (fixed); r (radial); m (middle))
   -treeangle          float      [90.0] Angle the tree is to be plotted
                                  (Number from -360.000 to 360.000)
   -arc                float      [360] Degrees the arc should occupy (Number
                                  from 0.000 to 360.000)
*  -labelrotation      float      [90.0] Angle of labels (0 degrees is
                                  horizontal for a tree growing vertically)
                                  (Number from 0.000 to 360.000)
   -[no]rescaled       toggle     [Y] Automatically rescale branch lengths
*  -bscale             float      [1.0] Centimeters per unit branch length
                                  (Any numeric value)
   -treedepth          float      [0.53] Depth of tree as fraction of its
                                  breadth (Number from 0.100 to 100.000)
*  -xmargin            float      [1.65] Horizontal margin (cm) (Number 0.100
                                  or more)
*  -ymargin            float      [2.16] Vertical margin (cm) (Number 0.100 or
                                  more)
*  -xrayshade          float      [1.65] Horizontal margin (pixels) (Number
                                  0.100 or more)
*  -yrayshade          float      [2.16] Vertical margin (pixels) (Number
                                  0.100 or more)
   -paperx             float      [20.63750] Paper width (Any numeric value)
   -papery             float      [26.98750] Paper height (Number 0.100 or
                                  more)
   -pagesheight        float      [1] Number of trees across height of page
                                  (Number 1.000 or more)
   -pageswidth         float      [1] Number of trees across width of page
                                  (Number 1.000 or more)
   -hpmargin           float      [0.41275] Horizontal overlap (cm) (Number
                                  0.001 or more)
   -vpmargin           float      [0.53975] Vertical overlap (cm) (Number
                                  0.001 or more)

   Advanced (Unprompted) qualifiers:
   -fontfile           string     [font1] Fontfile name (Any string)

   Associated qualifiers:

   "-plotfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-intreefile]
(Parameter 1)
tree Phylip tree file Phylogenetic tree  
[-plotfile]
(Parameter 2)
outfile Phylip drawtree output file Output file <*>.fdrawtree
Additional (Optional) qualifiers
-plotter list Plotter or printer the tree will be drawn on
l (Postscript printer file format)
m (PICT format (for drawing programs))
j (HP Laserjet 75 dpi PCL file format)
s (HP Laserjet 150 dpi PCL file format)
y (HP Laserjet 300 dpi PCL file format)
w (MS-Windows Bitmap)
f (FIG 2.0 drawing program format)
a (Idraw drawing program format)
z (VRML Virtual Reality Markup Language file)
n (PCX 640x350 file format (for drawing programs))
p (PCX 800x600 file format (for drawing programs))
q (PCX 1024x768 file format (for drawing programs))
k (TeKtronix 4010 graphics terminal)
x (X Bitmap format)
v (POVRAY 3D rendering program file)
r (Rayshade 3D rendering program file)
h (Hewlett-Packard pen plotter (HPGL file format))
d (DEC ReGIS graphics (VT240 terminal))
e (Epson MX-80 dot-matrix printer)
c (Prowriter/Imagewriter dot-matrix printer)
t (Toshiba 24-pin dot-matrix printer)
o (Okidata dot-matrix printer)
b (Houston Instruments plotter)
u (other (one you have inserted code for))
l
-previewer list Previewing device
n (Will not be previewed)
I i (MSDOS graphics screen m:Macintosh screens)
x (X Windows display)
w (MS Windows display)
k (TeKtronix 4010 graphics terminal)
d (DEC ReGIS graphics (VT240 terminal))
o (Other (one you have inserted code for))
x
-iterate list Iterate to improve tree
n (No)
e (Equal-Daylight algorithm)
b (n-Body algorithm)
e
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-labeldirection list Label direction
a (along)
f (fixed)
r (radial)
m (middle)
m
-treeangle float Angle the tree is to be plotted Number from -360.000 to 360.000 90.0
-arc float Degrees the arc should occupy Number from 0.000 to 360.000 360
-labelrotation float Angle of labels (0 degrees is horizontal for a tree growing vertically) Number from 0.000 to 360.000 90.0
-[no]rescaled toggle Automatically rescale branch lengths Toggle value Yes/No Yes
-bscale float Centimeters per unit branch length Any numeric value 1.0
-treedepth float Depth of tree as fraction of its breadth Number from 0.100 to 100.000 0.53
-xmargin float Horizontal margin (cm) Number 0.100 or more 1.65
-ymargin float Vertical margin (cm) Number 0.100 or more 2.16
-xrayshade float Horizontal margin (pixels) Number 0.100 or more 1.65
-yrayshade float Vertical margin (pixels) Number 0.100 or more 2.16
-paperx float Paper width Any numeric value 20.63750
-papery float Paper height Number 0.100 or more 26.98750
-pagesheight float Number of trees across height of page Number 1.000 or more 1
-pageswidth float Number of trees across width of page Number 1.000 or more 1
-hpmargin float Horizontal overlap (cm) Number 0.001 or more 0.41275
-vpmargin float Vertical overlap (cm) Number 0.001 or more 0.53975
Advanced (Unprompted) qualifiers
-fontfile string Fontfile name Any string font1
Associated qualifiers
"-plotfile" associated outfile qualifiers
-odirectory2
-odirectory_plotfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdrawtree input ...

Input files for usage example

File: drawgram.tree

(Delta,(Epsilon,(Gamma,(Beta,Alpha))));

Output file format

fdrawtree outputs ...

Output files for usage example

Graphics File: drawgram.fdrawtree

[fdrawtree results]

Data files

The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default.

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
fdrawgram Plots a cladogram- or phenogram-like rooted tree diagram
fretree Interactive tree rearrangement

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdiscboot.html0000664000175000017500000007201312171064331016212 00000000000000 EMBOSS: fdiscboot
fdiscboot

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Bootstrapped discrete sites algorithm

Description

Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development

Algorithm

SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format.

To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis.

This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does.

If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input.

The resampling methods available are:

  • The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.
  • The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values.
  • Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Künsch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3.
  • Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters.
  • Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters.
  • Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained.
  • Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species).
  • Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test).
  • Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species.
  • Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats:
    Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there.
    MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects.
    BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format

Usage

Here is a sample session with fdiscboot


% fdiscboot -seed 3 
Bootstrapped discrete sites algorithm
Input file: discboot.dat
Phylip seqboot_disc program output file [discboot.fdiscboot]: 
Phylip ancestor data output file (optional) [discboot.ancfile]: 
Phylip mix data output file (optional) [discboot.mixfile]: 
Phylip factor data output file (optional) [discboot.factfile]: 


completed replicate number   10
completed replicate number   20
completed replicate number   30
completed replicate number   40
completed replicate number   50
completed replicate number   60
completed replicate number   70
completed replicate number   80
completed replicate number   90
completed replicate number  100

Output written to file "discboot.fdiscboot"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Bootstrapped discrete sites algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates (no help text) discretestates value
  [-outfile]           outfile    [*.fdiscboot] Phylip seqboot_disc program
                                  output file
  [-outancfile]        outfile    [*.fdiscboot] Phylip ancestor data output
                                  file (optional)
  [-outmixfile]        outfile    [*.fdiscboot] Phylip mix data output file
                                  (optional)
  [-outfactfile]       outfile    [*.fdiscboot] Phylip factor data output file
                                  (optional)

   Additional (Optional) qualifiers (* if not always prompted):
   -mixfile            properties File of mixtures
   -ancfile            properties File of ancestors
   -weights            properties Weights file
   -factorfile         properties Factors file
   -test               menu       [b] Choose test (Values: b (Bootstrap); j
                                  (Jackknife); c (Permute species for each
                                  character); o (Permute character order); s
                                  (Permute within species); r (Rewrite data))
*  -regular            toggle     [N] Altered sampling fraction
*  -fracsample         float      [100.0] Samples as percentage of sites
                                  (Number from 0.100 to 100.000)
*  -morphseqtype       menu       [p] Output format (Values: p (PHYLIP); n
                                  (NEXUS))
*  -blocksize          integer    [1] Block size for bootstraping (Integer 1
                                  or more)
*  -reps               integer    [100] How many replicates (Integer 1 or
                                  more)
*  -justweights        menu       [d] Write out datasets or just weights
                                  (Values: d (Datasets); w (Weights))
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -printdata          boolean    [N] Print out the data at start of run
*  -[no]dotdiff        boolean    [Y] Use dot-differencing
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outancfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outmixfile" associated qualifiers
   -odirectory4        string     Output directory

   "-outfactfile" associated qualifiers
   -odirectory5        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates (no help text) discretestates value Discrete states file  
[-outfile]
(Parameter 2)
outfile Phylip seqboot_disc program output file Output file <*>.fdiscboot
[-outancfile]
(Parameter 3)
outfile Phylip ancestor data output file (optional) Output file <*>.fdiscboot
[-outmixfile]
(Parameter 4)
outfile Phylip mix data output file (optional) Output file <*>.fdiscboot
[-outfactfile]
(Parameter 5)
outfile Phylip factor data output file (optional) Output file <*>.fdiscboot
Additional (Optional) qualifiers
-mixfile properties File of mixtures Property value(s)  
-ancfile properties File of ancestors Property value(s)  
-weights properties Weights file Property value(s)  
-factorfile properties Factors file Property value(s)  
-test list Choose test
b (Bootstrap)
j (Jackknife)
c (Permute species for each character)
o (Permute character order)
s (Permute within species)
r (Rewrite data)
b
-regular toggle Altered sampling fraction Toggle value Yes/No No
-fracsample float Samples as percentage of sites Number from 0.100 to 100.000 100.0
-morphseqtype list Output format
p (PHYLIP)
n (NEXUS)
p
-blocksize integer Block size for bootstraping Integer 1 or more 1
-reps integer How many replicates Integer 1 or more 100
-justweights list Write out datasets or just weights
d (Datasets)
w (Weights)
d
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-printdata boolean Print out the data at start of run Boolean value Yes/No No
-[no]dotdiff boolean Use dot-differencing Boolean value Yes/No Yes
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outancfile" associated outfile qualifiers
-odirectory3
-odirectory_outancfile
string Output directory Any string  
"-outmixfile" associated outfile qualifiers
-odirectory4
-odirectory_outmixfile
string Output directory Any string  
"-outfactfile" associated outfile qualifiers
-odirectory5
-odirectory_outfactfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdiscboot reads discrete character data

Input files for usage example

File: discboot.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Output file format

fdiscboot writes a bootstrap multiple set of discrete character data

Output files for usage example

File: discboot.ancfile


File: discboot.factfile


File: discboot.mixfile


File: discboot.fdiscboot

    5     6
Alpha     111001
Beta      111000
Gamma     100001
Delta     000110
Epsilon   000111
    5     6
Alpha     111011
Beta      111000
Gamma     100011
Delta     000100
Epsilon   000111
    5     6
Alpha     111110
Beta      111000
Gamma     110110
Delta     000001
Epsilon   000110
    5     6
Alpha     000001
Beta      000000
Gamma     000001
Delta     111110
Epsilon   111111
    5     6
Alpha     111100
Beta      111000
Gamma     110100
Delta     000011
Epsilon   000100
    5     6
Alpha     111100
Beta      100000
Gamma     111100
Delta     000011
Epsilon   011100
    5     6
Alpha     110011
Beta      110000
Gamma     100011
Delta     001100
Epsilon   001111
    5     6
Alpha     111100
Beta      100000
Gamma     111100
Delta     000011
Epsilon   011100
    5     6
Alpha     110100


  [Part of this file has been deleted for brevity]

Gamma     101111
Delta     000000
Epsilon   001111
    5     6
Alpha     110110
Beta      110000
Gamma     110110
Delta     001001
Epsilon   001110
    5     6
Alpha     110111
Beta      110000
Gamma     000111
Delta     001000
Epsilon   001111
    5     6
Alpha     101111
Beta      100000
Gamma     001111
Delta     010000
Epsilon   011111
    5     6
Alpha     011111
Beta      000000
Gamma     011111
Delta     100000
Epsilon   111111
    5     6
Alpha     011000
Beta      000000
Gamma     011000
Delta     100111
Epsilon   111000
    5     6
Alpha     101100
Beta      100000
Gamma     101100
Delta     010011
Epsilon   011100
    5     6
Alpha     111111
Beta      111110
Gamma     100001
Delta     000000
Epsilon   000001
    5     6
Alpha     110110
Beta      110000
Gamma     000110
Delta     001001
Epsilon   001110

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None PHYLIPNEW-3.69.650/emboss_doc/html/fseqbootall.html0000664000175000017500000010654612171064331016562 00000000000000 EMBOSS: fseqbootall
fseqbootall

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Bootstrapped sequences algorithm

Description

Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development

Algorithm

SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format.

To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis.

This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does.

If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input.

The resampling methods available are:

  • The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.
  • The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values.
  • Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Künsch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3.
  • Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters.
  • Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters.
  • Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained.
  • Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species).
  • Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test).
  • Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species.
  • Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats:
    Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there.
    MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects.
    BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format

Usage

Here is a sample session with fseqbootall


% fseqbootall -seed 3 
Bootstrapped sequences algorithm
Input (aligned) sequence set: seqboot.dat
Phylip seqboot program output file [seqboot.fseqbootall]: 


 bootstrap: true
jackknife: false
 permute: false
 lockhart: false
 ild: false
 justwts: false 

completed replicate number   10
completed replicate number   20
completed replicate number   30
completed replicate number   40
completed replicate number   50
completed replicate number   60
completed replicate number   70
completed replicate number   80
completed replicate number   90
completed replicate number  100

Output written to file "seqboot.fseqbootall"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Bootstrapped sequences algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infilesequences]   seqset     (Aligned) sequence set filename and optional
                                  format, or reference (input USA)
  [-outfile]           outfile    [*.fseqbootall] Phylip seqboot program
                                  output file

   Additional (Optional) qualifiers (* if not always prompted):
   -categories         properties File of input categories
   -mixfile            properties File of mixtures
   -ancfile            properties File of ancestors
   -weights            properties Weights file
   -factorfile         properties Factors file
   -datatype           menu       [s] Choose the datatype (Values: s
                                  (Molecular sequences); m (Discrete
                                  Morphology); r (Restriction Sites); g (Gene
                                  Frequencies))
   -test               menu       [b] Choose test (Values: b (Bootstrap); j
                                  (Jackknife); c (Permute species for each
                                  character); o (Permute character order); s
                                  (Permute within species); r (Rewrite data))
*  -regular            toggle     [N] Altered sampling fraction
*  -fracsample         float      [100.0] Samples as percentage of sites
                                  (Number from 0.100 to 100.000)
*  -rewriteformat      menu       [p] Output format (Values: p (PHYLIP); n
                                  (NEXUS); x (XML))
*  -seqtype            menu       [d] Output format (Values: d (dna); p
                                  (protein); r (rna))
*  -morphseqtype       menu       [p] Output format (Values: p (PHYLIP); n
                                  (NEXUS))
*  -blocksize          integer    [1] Block size for bootstraping (Integer 1
                                  or more)
*  -reps               integer    [100] How many replicates (Integer 1 or
                                  more)
*  -justweights        menu       [d] Write out datasets or just weights
                                  (Values: d (Datasets); w (Weights))
*  -enzymes            boolean    [N] Is the number of enzymes present in
                                  input file
*  -all                boolean    [N] All alleles present at each locus
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -printdata          boolean    [N] Print out the data at start of run
*  -[no]dotdiff        boolean    [Y] Use dot-differencing
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-infilesequences" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infilesequences]
(Parameter 1)
seqset (Aligned) sequence set filename and optional format, or reference (input USA) Readable set of sequences Required
[-outfile]
(Parameter 2)
outfile Phylip seqboot program output file Output file <*>.fseqbootall
Additional (Optional) qualifiers
-categories properties File of input categories Property value(s)  
-mixfile properties File of mixtures Property value(s)  
-ancfile properties File of ancestors Property value(s)  
-weights properties Weights file Property value(s)  
-factorfile properties Factors file Property value(s)  
-datatype list Choose the datatype
s (Molecular sequences)
m (Discrete Morphology)
r (Restriction Sites)
g (Gene Frequencies)
s
-test list Choose test
b (Bootstrap)
j (Jackknife)
c (Permute species for each character)
o (Permute character order)
s (Permute within species)
r (Rewrite data)
b
-regular toggle Altered sampling fraction Toggle value Yes/No No
-fracsample float Samples as percentage of sites Number from 0.100 to 100.000 100.0
-rewriteformat list Output format
p (PHYLIP)
n (NEXUS)
x (XML)
p
-seqtype list Output format
d (dna)
p (protein)
r (rna)
d
-morphseqtype list Output format
p (PHYLIP)
n (NEXUS)
p
-blocksize integer Block size for bootstraping Integer 1 or more 1
-reps integer How many replicates Integer 1 or more 100
-justweights list Write out datasets or just weights
d (Datasets)
w (Weights)
d
-enzymes boolean Is the number of enzymes present in input file Boolean value Yes/No No
-all boolean All alleles present at each locus Boolean value Yes/No No
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-printdata boolean Print out the data at start of run Boolean value Yes/No No
-[no]dotdiff boolean Use dot-differencing Boolean value Yes/No Yes
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-infilesequences" associated seqset qualifiers
-sbegin1
-sbegin_infilesequences
integer Start of each sequence to be used Any integer value 0
-send1
-send_infilesequences
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_infilesequences
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_infilesequences
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_infilesequences
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_infilesequences
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_infilesequences
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_infilesequences
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_infilesequences
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_infilesequences
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_infilesequences
string Input sequence format Any string  
-iquery1
-iquery_infilesequences
string Input query fields or ID list Any string  
-ioffset1
-ioffset_infilesequences
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_infilesequences
string Database name Any string  
-sid1
-sid_infilesequences
string Entryname Any string  
-ufo1
-ufo_infilesequences
string UFO features Any string  
-fformat1
-fformat_infilesequences
string Features format Any string  
-fopenfile1
-fopenfile_infilesequences
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fseqbootall data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs.

Input files for usage example

File: seqboot.dat

    5    6
Alpha     AACAAC
Beta      AACCCC
Gamma     ACCAAC
Delta     CCACCA
Epsilon   CCAAAC

Output file format

fseqbootall output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters.

The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group.

The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor.

The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file.

Output files for usage example

File: seqboot.fseqbootall

    5     6
Alpha      AAACCA
Beta       AAACCC
Gamma      ACCCCA
Delta      CCCAAC
Epsilon    CCCAAA
    5     6
Alpha      AAACAA
Beta       AAACCC
Gamma      ACCCAA
Delta      CCCACC
Epsilon    CCCAAA
    5     6
Alpha      AAAAAC
Beta       AAACCC
Gamma      AACAAC
Delta      CCCCCA
Epsilon    CCCAAC
    5     6
Alpha      CCCCCA
Beta       CCCCCC
Gamma      CCCCCA
Delta      AAAAAC
Epsilon    AAAAAA
    5     6
Alpha      AAAACC
Beta       AAACCC
Gamma      AACACC
Delta      CCCCAA
Epsilon    CCCACC
    5     6
Alpha      AAAACC
Beta       ACCCCC
Gamma      AAAACC
Delta      CCCCAA
Epsilon    CAAACC
    5     6
Alpha      AACCAA
Beta       AACCCC
Gamma      ACCCAA
Delta      CCAACC
Epsilon    CCAAAA
    5     6
Alpha      AAAACC
Beta       ACCCCC
Gamma      AAAACC
Delta      CCCCAA
Epsilon    CAAACC
    5     6
Alpha      AACACC


  [Part of this file has been deleted for brevity]

Gamma      ACAAAA
Delta      CCCCCC
Epsilon    CCAAAA
    5     6
Alpha      AACAAC
Beta       AACCCC
Gamma      AACAAC
Delta      CCACCA
Epsilon    CCAAAC
    5     6
Alpha      AACAAA
Beta       AACCCC
Gamma      CCCAAA
Delta      CCACCC
Epsilon    CCAAAA
    5     6
Alpha      ACAAAA
Beta       ACCCCC
Gamma      CCAAAA
Delta      CACCCC
Epsilon    CAAAAA
    5     6
Alpha      CAAAAA
Beta       CCCCCC
Gamma      CAAAAA
Delta      ACCCCC
Epsilon    AAAAAA
    5     6
Alpha      CAACCC
Beta       CCCCCC
Gamma      CAACCC
Delta      ACCAAA
Epsilon    AAACCC
    5     6
Alpha      ACAACC
Beta       ACCCCC
Gamma      ACAACC
Delta      CACCAA
Epsilon    CAAACC
    5     6
Alpha      AAAAAA
Beta       AAAAAC
Gamma      ACCCCA
Delta      CCCCCC
Epsilon    CCCCCA
    5     6
Alpha      AACAAC
Beta       AACCCC
Gamma      CCCAAC
Delta      CCACCA
Epsilon    CCAAAC

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None PHYLIPNEW-3.69.650/emboss_doc/html/frestml.html0000664000175000017500000005551212171064331015717 00000000000000 EMBOSS: frestml
frestml

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Restriction site maximum likelihood method

Description

Estimation of phylogenies by maximum likelihood using restriction sites data (not restriction fragments but presence/absence of individual sites). It employs the Jukes-Cantor symmetrical model of nucleotide change, which does not allow for differences of rate between transitions and transversions. This program is very slow.

Algorithm

This program implements a maximum likelihood method for restriction sites data (not restriction fragment data). This program is one of the slowest programs in this package, and can be very tedious to run. It is possible to have the program search for the maximum likelihood tree. It will be more practical for some users (those that do not have fast machines) to use the U (User Tree) option, which takes less run time, optimizing branch lengths and computing likelihoods for particular tree topologies suggested by the user. The model used here is essentially identical to that used by Smouse and Li (1987) who give explicit expressions for computing the likelihood for three-species trees. It does not place prior probabilities on trees as they do. The present program extends their approach to multiple species by a technique which, while it does not give explicit expressions for likelihoods, does enable their computation and the iterative improvement of branch lengths. It also allows for multiple restriction enzymes. The algorithm has been described in a paper (Felsenstein, 1992). Another relevant paper is that of DeBry and Slade (1985).

The assumptions of the present model are:

  1. Each restriction site evolves independently.
  2. Different lineages evolve independently.
  3. Each site undergoes substitution at an expected rate which we specify.
  4. Substitutions consist of replacement of a nucleotide by one of the other three nucleotides, chosen at random.

Note that if the existing base is, say, an A, the chance of it being replaced by a G is 1/3, and so is the chance that it is replaced by a T. This means that there can be no difference in the (expected) rate of transitions and transversions. Users who are upset at this might ponder the fact that a version allowing different rates of transitions and transversions would run an estimated 16 times slower. If it also allowed for unequal frequencies of the four bases, it would run about 300,000 times slower! For the moment, until a better method is available, I guess I'll stick with this one!

Subject to these assumptions, the program is an approximately correct maximum likelihood method.

Usage

Here is a sample session with frestml


% frestml 
Restriction site maximum likelihood method
Input file: restml.dat
Phylip tree file (optional): 
Phylip restml program output file [restml.frestml]: 

numseqs: 1

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Output written to file "restml.frestml"

Tree also written onto file "restml.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Restriction site maximum likelihood method
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-data]              discretestates File containing one or more sets of
                                  restriction data
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.frestml] Phylip restml program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]allsites       boolean    [Y] All sites detected
*  -lengths            boolean    [N] Use lengths from user trees
   -sitelength         integer    [6] Site length (Integer from 1 to 8)
*  -global             boolean    [N] Global rearrangements
*  -[no]rough          boolean    [Y] Speedier but rougher analysis
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.frestml] Phylip tree output file
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-data]
(Parameter 1)
discretestates File containing one or more sets of restriction data Discrete states file  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip restml program output file Output file <*>.frestml
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]allsites boolean All sites detected Boolean value Yes/No Yes
-lengths boolean Use lengths from user trees Boolean value Yes/No No
-sitelength integer Site length Integer from 1 to 8 6
-global boolean Global rearrangements Boolean value Yes/No No
-[no]rough boolean Speedier but rougher analysis Boolean value Yes/No Yes
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file Output file <*>.frestml
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

frestml input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites, but there is also a third number, which is the number of different restriction enzymes that were used to detect the restriction sites. Thus a data set with 10 species and 35 different sites, representing digestion with 4 different enzymes, would have the first line of the data file look like this:


   10   35    4

The first line of the data file will also contain a letter W following these numbers (and separated from them by a space) if the Weights option is being used. As with all programs using the weights option, a line or lines must then follow, before the data, with the weights for each site.

The site data are in standard form. Each species starts with a species name whose maximum length is given by the constant "nmlngth" (whose value in the program as distributed is 10 characters). The name should, as usual, be padded out to that length with blanks if necessary. The sites data then follows, one character per site (any blanks will be skipped and ignored). Like the DNA and protein sequence data, the restriction sites data may be either in the "interleaved" form or the "sequential" form. Note that if you are analyzing restriction sites data with the programs DOLLOP or MIX or other discrete character programs, at the moment those programs do not use the "aligned" or "interleaved" data format. Therefore you may want to avoid that format when you have restriction sites data that you will want to feed into those programs.

The presence of a site is indicated by a "+" and the absence by a "-". I have also allowed the use of "1" and "0" as synonyms for "+" and "-", for compatibility with MIX and DOLLOP which do not allow "+" and "-". If the presence of the site is unknown (for example, if the DNA containing it has been deleted so that one does not know whether it would have contained the site) then the state "?" can be used to indicate that the state of this site is unknown.

User-defined trees may follow the data in the usual way. The trees must be unrooted, which means that at their base they must have a trifurcation.

Input files for usage example

File: restml.dat

   5   13   2
Alpha     ++-+-++--+++-
Beta      ++++--+--+++-
Gamma     -+--+-++-+-++
Delta     ++-+----++---
Epsilon   ++++----++---

Output file format

frestml outputs a graph to the specified graphics device. outputs a report format file. The default format is ...

Output files for usage example

File: restml.frestml


Restriction site Maximum Likelihood method, version 3.69.650


  Recognition sequences all 6 bases long

Sites absent from all species are assumed to have been omitted




  +----Gamma     
  |  
  |  +Beta      
  1--2  
  |  |  +Epsilon   
  |  +--3  
  |     +Delta     
  |  
  +Alpha     


remember: this is an unrooted tree!

Ln Likelihood =   -40.47082

 
 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------
   1          Gamma           0.10794     (  0.01144,     0.21872) **
   1             2            0.01244     (     zero,     0.04712)
   2          Beta            0.00100     (     zero,    infinity)
   2             3            0.05878     (     zero,     0.12675) **
   3          Epsilon         0.00022     (     zero,    infinity)
   3          Delta           0.01451     (     zero,     0.04459) **
   1          Alpha           0.01244     (     zero,     0.04717)

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01


File: restml.treefile

(Gamma:0.10794,(Beta:0.00100,(Epsilon:0.00022,
Delta:0.01451):0.05878):0.01244,Alpha:0.01244);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdolpenny.html0000664000175000017500000011006712171064331016236 00000000000000 EMBOSS: fdolpenny
fdolpenny

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Penny algorithm Dollo or polymorphism

Description

Finds all most parsimonious phylogenies for discrete-character data with two states, for the Dollo or polymorphism parsimony criteria using the branch-and-bound method of exact search. May be impractical (depending on the data) for more than 10-11 species.

Algorithm

DOLPENNY is a program that will find all of the most parsimonious trees implied by your data when the Dollo or polymorphism parsimony criteria are employed. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion).

There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW.

The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times).

The Algorithm

The search strategy used by DOLPENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of losses. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated.

Actually the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. Its number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups):

     Make tree of first two species     (A,B)
          Add C in first place     ((A,B),C)
               Add D in first place     (((A,D),B),C)
               Add D in second place     ((A,(B,D)),C)
               Add D in third place     (((A,B),D),C)
               Add D in fourth place     ((A,B),(C,D))
               Add D in fifth place     (((A,B),C),D)
          Add C in second place: ((A,C),B)
               Add D in first place     (((A,D),C),B)
               Add D in second place     ((A,(C,D)),B)
               Add D in third place     (((A,C),D),B)
               Add D in fourth place     ((A,C),(B,D))
               Add D in fifth place     (((A,C),B),D)
          Add C in third place     (A,(B,C))
               Add D in first place     ((A,D),(B,C))
               Add D in second place     (A,((B,D),C))
               Add D in third place     (A,(B,(C,D)))
               Add D in fourth place     (A,((B,C),D))
               Add D in fifth place     ((A,(B,C)),D)

Among these fifteen trees you will find all of the four-species rooted bifurcating trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of losses (or retentions of polymorphism) is evaluated.

The point of this is that if a previously-found tree such as ((A,B),(C,D)) required fewer losses, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther.

The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time.

The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible.

The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer losses than these, it clears out the list and restarts the list with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems.

Controlling Run Times

Among the quantities available to be set at the beginning of a run of DOLPENNY, two (howoften and howmany) are of particular importance. As DOLPENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 300, the default settings. Every time 100 trees have been examined, DOLPENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far.

When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften X howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to pay for (or wait for). In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften and nothing will be printed out except some headings and then the final trees.

The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate.

A constant that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if "maxtrees" is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If "maxtrees" is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that "maxtrees" be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, "maxtrees" is set to 100 in the distribution copy.

Methods and Options

The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in DOLLOP, and thus the remainder of this document will be nearly identical to the DOLLOP document. The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary.

The assumptions of this method are in effect:

  1. We know which state is the ancestral one (state 0).
  2. The characters are evolving independently.
  3. Different lineages evolve independently.
  4. The probability of a forward change (0-->1) is small over the evolutionary times involved.
  5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change.
  6. Retention of polymorphism for both states (0 and 1) is highly improbable.
  7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment.

That these are the assumptions is established in several of my papers (1973a, 1978b, 1979, 1981b, 1983). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred.

The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained.

The assumptions of the polymorphism parsimony method are in effect:

  1. The ancestral state (state 0) is known in each character.
  2. The characters are evolving independently of each other.
  3. Different lineages are evolving independently.
  4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group.
  5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change.
  6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism.
  7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Usage

Here is a sample session with fdolpenny


% fdolpenny 
Penny algorithm Dollo or polymorphism
Phylip character discrete states file: dolpenny.dat
Phylip dolpenny program output file [dolpenny.fdolpenny]: 


How many
trees looked                                       Approximate
at so far      Length of        How many           percentage
(multiples     shortest tree    trees this long    searched
of  100):      found so far     found so far       so far
----------     ------------     ------------       ------------
     1           3.00000                1                0.95

Output written to file "dolpenny.fdolpenny"

Trees also written onto file "dolpenny.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Penny algorithm Dollo or polymorphism
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing one or more data sets
  [-outfile]           outfile    [*.fdolpenny] Phylip dolpenny program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -ancfile            properties Ancestral states file
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1] Threshold value (Number 0.000 or more)
   -howmany            integer    [1000] How many groups of trees (Any integer
                                  value)
   -howoften           integer    [100] How often to report, in trees (Any
                                  integer value)
   -[no]simple         boolean    [Y] Branch and bound is simple
   -method             menu       [d] Parsimony method (Values: d (Dollo); p
                                  (Polymorphism))
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdolpenny] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -ancseq             boolean    [N] Print states at all nodes of tree
   -stepbox            boolean    [N] Print out steps in each character

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing one or more data sets Discrete states file  
[-outfile]
(Parameter 2)
outfile Phylip dolpenny program output file Output file <*>.fdolpenny
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-ancfile properties Ancestral states file Property value(s)  
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 0.000 or more 1
-howmany integer How many groups of trees Any integer value 1000
-howoften integer How often to report, in trees Any integer value 100
-[no]simple boolean Branch and bound is simple Boolean value Yes/No Yes
-method list Parsimony method
d (Dollo)
p (Polymorphism)
d
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdolpenny
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-ancseq boolean Print states at all nodes of tree Boolean value Yes/No No
-stepbox boolean Print out steps in each character Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdolpenny reads discrete character data with "?", "P", "B" states allowed. .

(0,1) Discrete character data

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

Input files for usage example

File: dolpenny.dat

    7    6
Alpha1    110110
Alpha2    110110
Beta1     110000
Beta2     110000
Gamma1    100110
Delta     001001
Epsilon   001110

Output file format

fdolpenny output format is standard. It includes a rooted tree and, if the user selects option 4, a table of the numbers of reversions or retentions of polymorphism necessary in each character. If any of the ancestral states has been specified to be unknown, a table of reconstructed ancestral states is also provided. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the tabulated counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1.

A table is available to be printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the best tree. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use DOLMOVE to display the tree and examine its interior states, as the algorithm in DOLMOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in DOLPENNY gives up more easily on displaying these states.

If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: dolpenny.fdolpenny


Penny algorithm for Dollo or polymorphism parsimony, version 3.69.650
 branch-and-bound to find all most parsimonious trees


requires a total of              3.000

    3 trees in all found




  +-----------------Delta     
  !  
--2  +--------------Epsilon   
  !  !  
  +--3  +-----------Gamma1    
     !  !  
     +--6  +--------Alpha2    
        !  !  
        +--1     +--Beta2     
           !  +--5  
           +--4  +--Beta1     
              !  
              +-----Alpha1    





  +-----------------Delta     
  !  
--2  +--------------Epsilon   
  !  !  
  +--3  +-----------Gamma1    
     !  !  
     +--6        +--Beta2     
        !  +-----5  
        !  !     +--Beta1     
        +--4  
           !     +--Alpha2    
           +-----1  
                 +--Alpha1    





  +-----------------Delta     
  !  
--2  +--------------Epsilon   
  !  !  
  +--3  +-----------Gamma1    
     !  !  
     !  !        +--Beta2     
     +--6     +--5  
        !  +--4  +--Beta1     
        !  !  !  
        +--1  +-----Alpha2    
           !  
           +--------Alpha1    


File: dolpenny.treefile

(Delta,(Epsilon,(Gamma1,(Alpha2,((Beta2,Beta1),Alpha1)))))[0.3333];
(Delta,(Epsilon,(Gamma1,((Beta2,Beta1),(Alpha2,Alpha1)))))[0.3333];
(Delta,(Epsilon,(Gamma1,(((Beta2,Beta1),Alpha2),Alpha1))))[0.3333];

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdollop Dollo and polymorphism parsimony algorithm
ffactor Multistate to binary recoding program
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fmix.html0000664000175000017500000007210312171064331015201 00000000000000 EMBOSS: fmix
fmix

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Mixed parsimony algorithm

Description

Estimates phylogenies by some parsimony methods for discrete character data with two states (0 and 1). Allows use of the Wagner parsimony method, the Camin-Sokal parsimony method, or arbitrary mixtures of these. Also reconstructs ancestral states and allows weighting of characters (does not infer branch lengths).

Algorithm

MIX is a general parsimony program which carries out the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified separately. The program defaults to carrying out Wagner parsimony.

The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin-Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969).

Here are the assumptions of these two methods:

  1. Ancestral states are known (Camin-Sokal) or unknown (Wagner).
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner).
  5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Usage

Here is a sample session with fmix


% fmix 
Mixed parsimony algorithm
Phylip character discrete states file: mix.dat
Phylip tree file (optional): 
Phylip mix program output file [mix.fmix]: 

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements
  !---------!
   .........


Output written to file "mix.fmix"

Trees also written onto file "mix.treefile"


Go to the input files for this example
Go to the output files for this example

Example 2


% fmix -printdata -ancfile mixancfile.dat 
Mixed parsimony algorithm
Phylip character discrete states file: mix.dat
Phylip tree file (optional): 
Phylip mix program output file [mix.fmix]: 

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements
  !---------!
   .........


Output written to file "mix.fmix"

Trees also written onto file "mix.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Mixed parsimony algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing one or more data sets
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fmix] Phylip mix program output file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -ancfile            properties Ancestral states file
   -mixfile            properties Mixture file
   -method             menu       [Wagner] Choose the method to use (Values: w
                                  (Wagner); c (Camin-Sokal); m (Mixed))
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -threshold          float      [$(infile.discretesize)] Threshold value
                                  (Number 1.000 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fmix] Phylip tree output file (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -ancseq             boolean    [N] Print states at all nodes of tree
   -stepbox            boolean    [N] Print out steps in each character

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing one or more data sets Discrete states file  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip mix program output file Output file <*>.fmix
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-ancfile properties Ancestral states file Property value(s)  
-mixfile properties Mixture file Property value(s)  
-method list Choose the method to use
w (Wagner)
c (Camin-Sokal)
m (Mixed)
Wagner
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-threshold float Threshold value Number 1.000 or more $(infile.discretesize)
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fmix
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-ancseq boolean Print states at all nodes of tree Boolean value Yes/No No
-stepbox boolean Print out steps in each character Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fmix reads discrete character data. States "?", "P", and "B" are allowed.

(0,1) Discrete character data

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

Input files for usage example

File: mix.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Input files for usage example 2

File: mixancfile.dat

001??1

Output file format

fmix output is standard: a list of equally parsimonious trees, which will be printed as rooted or unrooted depending on which is appropriate, and, if the user chooses, a table of the number of changes of state required in each character. If the Wagner option is in force for a character, it may not be possible to unambiguously locate the places on the tree where the changes occur, as there may be multiple possibilities. If the user selects menu option 5, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the Camin-Sokal parsimony method is invoked and the Ancestors option is also used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest state changes. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use MOVE to display the tree and examine its interior states, as the algorithm in MOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in MIX gives up more easily on displaying these states.

If the A option is not used, then the program will assume 0 as the ancestral state for those characters following the Camin-Sokal method, and will assume that the ancestral state is unknown for those characters following Wagner parsimony. If any characters have unknown ancestral states, and if the resulting tree is rooted (even by outgroup), a table will also be printed out showing the best guesses of which are the ancestral states in each character. You will find it useful to understand the difference between the Camin-Sokal parsimony criterion with unknown ancestral state and the Wagner parsimony criterion.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one.

If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: mix.fmix


Mixed parsimony algorithm, version 3.69.650

Wagner parsimony method

               


     4 trees in all found




           +--Epsilon   
     +-----4  
     !     +--Gamma     
  +--2  
  !  !     +--Delta     
--1  +-----3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000





     +--------Gamma     
     !  
  +--2     +--Epsilon   
  !  !  +--4  
  !  +--3  +--Delta     
--1     !  
  !     +-----Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000





     +--------Epsilon   
  +--4  
  !  !  +-----Gamma     
  !  +--2  
--1     !  +--Delta     
  !     +--3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000





     +--------Gamma     
  +--2  
  !  !  +-----Epsilon   
  !  +--4  
--1     !  +--Delta     
  !     +--3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      9.000


File: mix.treefile

(((Epsilon,Gamma),(Delta,Beta)),Alpha)[0.2500];
((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.2500];
((Epsilon,(Gamma,(Delta,Beta))),Alpha)[0.2500];
((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.2500];

Output files for usage example 2

File: mix.fmix


Mixed parsimony algorithm, version 3.69.650

5 species, 6 characters

Wagner parsimony method


Name         Characters
----         ----------

Alpha        11011 0
Beta         11000 0
Gamma        10011 0
Delta        00100 1
Epsilon      00111 0


    Ancestral states:
             001?? 1


One most parsimonious tree found:




  +-----------Delta     
--3  
  !  +--------Epsilon   
  +--4  
     !  +-----Gamma     
     +--2  
        !  +--Beta      
        +--1  
           +--Alpha     


requires a total of      8.000

best guesses of ancestral states:
       0 1 2 3 4 5 6 7 8 9
     *--------------------
    0!   0 0 1 ? ? 1      


File: mix.treefile

(Delta,(Epsilon,(Gamma,(Beta,Alpha))));

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdollop Dollo and polymorphism parsimony algorithm
fdolpenny Penny algorithm Dollo or polymorphism
ffactor Multistate to binary recoding program
fmove Interactive mixed method parsimony
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fseqboot.html0000664000175000017500000010445012171064331016061 00000000000000 EMBOSS: fseqboot
fseqboot

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Bootstrapped sequences algorithm

Description

Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development

Algorithm

SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format.

To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis.

This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does.

If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input.

The resampling methods available are:

  • The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.
  • The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values.
  • Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Künsch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3.
  • Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters.
  • Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters.
  • Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained.
  • Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species).
  • Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test).
  • Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species.
  • Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats:
    Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there.
    MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects.
    BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format

Usage

Here is a sample session with fseqboot


% fseqboot -seed 3 
Bootstrapped sequences algorithm
Input (aligned) sequence set: seqboot.dat
Phylip seqboot_seq program output file [seqboot.fseqboot]: 


completed replicate number   10
completed replicate number   20
completed replicate number   30
completed replicate number   40
completed replicate number   50
completed replicate number   60
completed replicate number   70
completed replicate number   80
completed replicate number   90
completed replicate number  100

Output written to file "seqboot.fseqboot"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Bootstrapped sequences algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqset     (Aligned) sequence set filename and optional
                                  format, or reference (input USA)
  [-outfile]           outfile    [*.fseqboot] Phylip seqboot_seq program
                                  output file

   Additional (Optional) qualifiers (* if not always prompted):
   -categories         properties File of input categories
   -weights            properties Weights file
   -test               menu       [b] Choose test (Values: b (Bootstrap); j
                                  (Jackknife); c (Permute species for each
                                  character); o (Permute character order); s
                                  (Permute within species); r (Rewrite data))
*  -regular            toggle     [N] Altered sampling fraction
*  -fracsample         float      [100.0] Samples as percentage of sites
                                  (Number from 0.100 to 100.000)
*  -rewriteformat      menu       [p] Output format (Values: p (PHYLIP); n
                                  (NEXUS); x (XML))
*  -seqtype            menu       [d] Output format (Values: d (dna); p
                                  (protein); r (rna))
*  -blocksize          integer    [1] Block size for bootstraping (Integer 1
                                  or more)
*  -reps               integer    [100] How many replicates (Integer 1 or
                                  more)
*  -justweights        menu       [d] Write out datasets or just weights
                                  (Values: d (Datasets); w (Weights))
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -printdata          boolean    [N] Print out the data at start of run
*  -[no]dotdiff        boolean    [Y] Use dot-differencing
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqset (Aligned) sequence set filename and optional format, or reference (input USA) Readable set of sequences Required
[-outfile]
(Parameter 2)
outfile Phylip seqboot_seq program output file Output file <*>.fseqboot
Additional (Optional) qualifiers
-categories properties File of input categories Property value(s)  
-weights properties Weights file Property value(s)  
-test list Choose test
b (Bootstrap)
j (Jackknife)
c (Permute species for each character)
o (Permute character order)
s (Permute within species)
r (Rewrite data)
b
-regular toggle Altered sampling fraction Toggle value Yes/No No
-fracsample float Samples as percentage of sites Number from 0.100 to 100.000 100.0
-rewriteformat list Output format
p (PHYLIP)
n (NEXUS)
x (XML)
p
-seqtype list Output format
d (dna)
p (protein)
r (rna)
d
-blocksize integer Block size for bootstraping Integer 1 or more 1
-reps integer How many replicates Integer 1 or more 100
-justweights list Write out datasets or just weights
d (Datasets)
w (Weights)
d
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-printdata boolean Print out the data at start of run Boolean value Yes/No No
-[no]dotdiff boolean Use dot-differencing Boolean value Yes/No Yes
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqset qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fseqboot data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs.

Input files for usage example

File: seqboot.dat

    5    6
Alpha     AACAAC
Beta      AACCCC
Gamma     ACCAAC
Delta     CCACCA
Epsilon   CCAAAC

Output file format

fseqboot output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters.

The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group.

The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor.

The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file.

Output files for usage example

File: seqboot.fseqboot

    5     6
Alpha      AAACCA
Beta       AAACCC
Gamma      ACCCCA
Delta      CCCAAC
Epsilon    CCCAAA
    5     6
Alpha      AAACAA
Beta       AAACCC
Gamma      ACCCAA
Delta      CCCACC
Epsilon    CCCAAA
    5     6
Alpha      AAAAAC
Beta       AAACCC
Gamma      AACAAC
Delta      CCCCCA
Epsilon    CCCAAC
    5     6
Alpha      CCCCCA
Beta       CCCCCC
Gamma      CCCCCA
Delta      AAAAAC
Epsilon    AAAAAA
    5     6
Alpha      AAAACC
Beta       AAACCC
Gamma      AACACC
Delta      CCCCAA
Epsilon    CCCACC
    5     6
Alpha      AAAACC
Beta       ACCCCC
Gamma      AAAACC
Delta      CCCCAA
Epsilon    CAAACC
    5     6
Alpha      AACCAA
Beta       AACCCC
Gamma      ACCCAA
Delta      CCAACC
Epsilon    CCAAAA
    5     6
Alpha      AAAACC
Beta       ACCCCC
Gamma      AAAACC
Delta      CCCCAA
Epsilon    CAAACC
    5     6
Alpha      AACACC


  [Part of this file has been deleted for brevity]

Gamma      ACAAAA
Delta      CCCCCC
Epsilon    CCAAAA
    5     6
Alpha      AACAAC
Beta       AACCCC
Gamma      AACAAC
Delta      CCACCA
Epsilon    CCAAAC
    5     6
Alpha      AACAAA
Beta       AACCCC
Gamma      CCCAAA
Delta      CCACCC
Epsilon    CCAAAA
    5     6
Alpha      ACAAAA
Beta       ACCCCC
Gamma      CCAAAA
Delta      CACCCC
Epsilon    CAAAAA
    5     6
Alpha      CAAAAA
Beta       CCCCCC
Gamma      CAAAAA
Delta      ACCCCC
Epsilon    AAAAAA
    5     6
Alpha      CAACCC
Beta       CCCCCC
Gamma      CAACCC
Delta      ACCAAA
Epsilon    AAACCC
    5     6
Alpha      ACAACC
Beta       ACCCCC
Gamma      ACAACC
Delta      CACCAA
Epsilon    CAAACC
    5     6
Alpha      AAAAAA
Beta       AAAAAC
Gamma      ACCCCA
Delta      CCCCCC
Epsilon    CCCCCA
    5     6
Alpha      AACAAC
Beta       AACCCC
Gamma      CCCAAC
Delta      CCACCA
Epsilon    CCAAAC

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None PHYLIPNEW-3.69.650/emboss_doc/html/fdollop.html0000664000175000017500000006674012171064331015707 00000000000000 EMBOSS: fdollop
fdollop

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Dollo and polymorphism parsimony algorithm

Description

Estimates phylogenies by the Dollo or polymorphism parsimony criteria for discrete character data with two states (0 and 1). Also reconstructs ancestral states and allows weighting of characters. Dollo parsimony is particularly appropriate for restriction sites data; with ancestor states specified as unknown it may be appropriate for restriction fragments data.

Algorithm

This program carries out the Dollo and polymorphism parsimony methods. The Dollo parsimony method was first suggested in print in verbal form by Le Quesne (1974) and was first well-specified by Farris (1977). The method is named after Louis Dollo since he was one of the first to assert that in evolution it is harder to gain a complex feature than to lose it. The algorithm explains the presence of the state 1 by allowing up to one forward change 0-->1 and as many reversions 1-->0 as are necessary to explain the pattern of states seen. The program attempts to minimize the number of 1-->0 reversions necessary. The assumptions of this method are in effect:
  1. We know which state is the ancestral one (state 0).
  2. The characters are evolving independently.
  3. Different lineages evolve independently.
  4. The probability of a forward change (0-->1) is small over the evolutionary times involved.
  5. The probability of a reversion (1-->0) is also small, but still far larger than the probability of a forward change, so that many reversions are easier to envisage than even one extra forward change.
  6. Retention of polymorphism for both states (0 and 1) is highly improbable.
  7. The lengths of the segments of the true tree are not so unequal that two changes in a long segment are as probable as one in a short segment.

One problem can arise when using additive binary recoding to represent a multistate character as a series of two-state characters. Unlike the Camin-Sokal, Wagner, and Polymorphism methods, the Dollo method can reconstruct ancestral states which do not exist. An example is given in my 1979 paper. It will be necessary to check the output to make sure that this has not occurred.

The polymorphism parsimony method was first used by me, and the results published (without a clear specification of the method) by Inger (1967). The method was independently published by Farris (1978a) and by me (1979). The method assumes that we can explain the pattern of states by no more than one origination (0-->1) of state 1, followed by retention of polymorphism along as many segments of the tree as are necessary, followed by loss of state 0 or of state 1 where necessary. The program tries to minimize the total number of polymorphic characters, where each polymorphism is counted once for each segment of the tree in which it is retained.

The assumptions of the polymorphism parsimony method are in effect:

  1. The ancestral state (state 0) is known in each character.
  2. The characters are evolving independently of each other.
  3. Different lineages are evolving independently.
  4. Forward change (0-->1) is highly improbable over the length of time involved in the evolution of the group.
  5. Retention of polymorphism is also improbable, but far more probable that forward change, so that we can more easily envisage much polymorhism than even one additional forward change.
  6. Once state 1 is reached, reoccurrence of state 0 is very improbable, much less probable than multiple retentions of polymorphism.
  7. The lengths of segments in the true tree are not so unequal that we can more easily envisage retention events occurring in both of two long segments than one retention in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Usage

Here is a sample session with fdollop


% fdollop 
Dollo and polymorphism parsimony algorithm
Phylip character discrete states file: dollop.dat
Phylip tree file (optional): 
Phylip dollop program output file [dollop.fdollop]: 


Dollo and polymorphism parsimony algorithm, version 3.69.650

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements
  !---------!
   .........
   .........

Output written to file "dollop.fdollop"

Trees also written onto file "dollop.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Dollo and polymorphism parsimony algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing one or more data sets
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fdollop] Phylip dollop program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Phylip weights file (optional)
   -ancfile            properties Ancestral states file
   -method             menu       [d] Parsimony method (Values: d (Dollo); p
                                  (Polymorphism))
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -threshold          float      [$(infile.discretesize)] Threshold value
                                  (Number 0.000 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdollop] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -ancseq             boolean    [N] Print states at all nodes of tree
   -stepbox            boolean    [N] Print out steps in each character

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing one or more data sets Discrete states file  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip dollop program output file Output file <*>.fdollop
Additional (Optional) qualifiers
-weights properties Phylip weights file (optional) Property value(s)  
-ancfile properties Ancestral states file Property value(s)  
-method list Parsimony method
d (Dollo)
p (Polymorphism)
d
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-threshold float Threshold value Number 0.000 or more $(infile.discretesize)
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdollop
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-ancseq boolean Print states at all nodes of tree Boolean value Yes/No No
-stepbox boolean Print out steps in each character Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdollop reads discrete character data with "?", "P", "B" states allowed. .

(0,1) Discrete character data

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

Input files for usage example

File: dollop.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Output file format

fdollop output is standard: a list of equally parsimonious trees, and, if the user selects menu option 4, a table of the numbers of reversions or retentions of polymorphism necessary in each character. If any of the ancestral states has been specified to be unknown, a table of reconstructed ancestral states is also provided. When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the tabulated counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1.

If the user selects menu option 5, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" there may be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand.

If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the best tree. If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and these will all be printed as ".". If this has happened and you want to know more about the states at the internal nodes, you will find helpful to use DOLMOVE to display the tree and examine its interior states, as the algorithm in DOLMOVE shows all that can be known in this case about the interior states, including where there is and is not amibiguity. The algorithm in DOLLOP gives up more easily on displaying these states.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences invented by Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across characters. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the highest one, the variance of that quantity as determined by the step differences at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the binary characters are evolving independently, which is unlikely to be true for many suites of morphological characters.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one.

If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: dollop.fdollop


Dollo and polymorphism parsimony algorithm, version 3.69.650

Dollo parsimony method


One most parsimonious tree found:




  +-----------Delta     
--3  
  !  +--------Epsilon   
  +--4  
     !  +-----Gamma     
     +--2  
        !  +--Beta      
        +--1  
           +--Alpha     


requires a total of      3.000

File: dollop.treefile

(Delta,(Epsilon,(Gamma,(Beta,Alpha))));

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdolpenny Penny algorithm Dollo or polymorphism
ffactor Multistate to binary recoding program
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdrawtree.1.drawgram.fdrawtree.gif0000664000175000017500000000674112171064331021753 00000000000000GIF89adð!ùÿÿ,dþŒ©Ëí£œ´Ú‹³Þ¼û†âH–扦êʶî ÇòL×öçúÎ÷þ ‡Ä¢ñˆL*—̦ó J§ÔªõŠÍj·Ü®÷ ‹Çä²ùŒN«×ì¶û ËçôºýŽÏë÷ü¾ÿ(8HXhxˆ˜¨¸ÈØèø)9IYiy‰™©¹ÉÙéù *:JZjzŠšªºÊÚêú +;K[k{‹›«»ËÛëû ,N^n~Žž®¾ÎÞîþ/?O_oŸ¯¿Ïßïÿ0 À <ˆ0¡Â… :|1¢Ä‰+Z¼ˆ1£Æþ;zü2¤È‘$Kš<‰2¥Ê•,[º| 3¦Ì™4kÚ¼‰3§Î<{úü 4¨Ð¡D‹=Š4©Ò¥L›:} 5ªÔ©T«Z½Š5«Ö­\»zý رbˆ%‹ŠÙµlÛ¦}»¤­\·p뙋׬ݽAòÎå ø‡^gŒÇa‹4N ÆãÉ#[V1yÁàËœWPV°¹³ht^;:5i¶Žñª~=‚uk¹°kƒmZ¶íÝ%póþ Y)R„–©R›’Ø©¦|j©¦žŠjªª®Êj«®¾ k¬²ÎJk­¶:Dê­8ý©k+¼²áb¯¾P™Ç¢Âö²þ"‹,±uËl³eÎ[fÑò’å³òiv-¶ÎRË­Ýæ’,Ö 8.¹åÊñºéâ²nù-øë»¢dK­n”ÚkK¼òšˆ)¿³øûo‰Ó ì ¾}Ô‹ð½ß6,Á¿$ñ¹NÜIÅz\Œñ& ÃqhÇ?ÜÆ²"Ç¢ñ“ž,KÊe¬Ì2,.“q0¨1'y&‹=8Ü™'²¸Ûèyþù!¡«µs餜näÍM°·ê…°>Ù4»ìƒlîâ,œîš»ÞCŒHÈ\îÂ+K|Á?¨…ËûLûjŒfvõÓß¡}Ïk}óÛÂû ’>éã3¹ ߣOªúëÛÑ=ô×Ç–«òóç+¾õ¾ïïŒúÝÎx”FùnC@诀ëߨȃ2}ò³ÐýŠ'Á ÒO€çCAú*(» Z®}è` >(ÂÉp ´3! >¨Aý/¡sa Þ–B¡A‹y³¡ùFÃ$ùÐÔÒ eP· b)dˆøÙ•È„j…Ëtâ{"þ£¸­,ήbVtBcv.ý/l]|ÂYF3QÑYeDÛ%Æ1^©f$¡î楢3~.ïÃBUWš}É‘‰tóÚ§d»Ýõñ Œ¡oþȽFú‚W×"ŽÈȀ鱒%K$ãéNZ“U ¤(£ÖD:ïI+\ß& é@$ü¯•„#¤"ciPªñ”K´% !™KÑ2n0»¥›@²a®­˜¿Ä%2ä̲áΘÑÄ`µ^©ÃÔ5Ó™¢K6{VM«©2˜Ü^8WÎOç›Ó&Ç—)]‘Cì¬#<ŸyO¾©ž^Ìg?yG_ËŸ¨ã'ÂLf1‚Šîh:þ_ÆPz&T¡¼Œ¢;)xΊ>SÀÔ¨Î@GQbq)çPÉ’šTl(}VHWšf²ë¥0•eCÿEÓš’S oè¨N‹¶Cœô§4ƒÿ†JTU¨HM*8|êTn@5ª%YN©ZÊž^«;•$:UÊÕ,ÜTœ` k&ƒJÖ¦š•shýjF×jÓ¥¦U­påÂ4•¶Õºá­ø¤«^ éP¿þ•nr}ÙT«‰îÓ ½É+b‡Ó§A2že}¬:­ÈÓ»8Ö²½k«€dÚ—Ãrö²‹üìØB[ÙÑJs[‚á*S«Ú…¶Ô®Eâfc»ƒpÖÖ3¢Å-øÚËìy°·¾ý­bÒþ‡?Ø×·»!r¨Üåªe¶î{îïn+Ý×fÖ|Ölt³kQÏÖ`·F.x÷:ÖÜ>—¸ç]­xCÞö®á¢6íš|{Zؾšò¾Ëð*ˆÝÿ±'ü®€3iW8¦tÔï‚ç»]®øÁܰ„Káð6ØžæÍðx5Ä)¶vØÃµÓ¢Í`9bÇ ¦1UüÂ)Æ1± †±_ç®cØÆF¤–\Ìã=êk²¤ ²éÅ^ôîØÈBxä„ý÷b&¯8ʼ]²”Sñä+{XYÖ²¨üa+{Y½\ó˜_ˆ©ýÒØÌg®²—6\з¹„lT³XkÚï=²²]íQ˺Ô|þô´­)ìú^ûÛæ"6¹©¹ísw2Üê¦f»ÇîwÙòÆ«¹ëÍzã[Úû^ݽûQ€û;ÞxÁA¡ïƒk[áŸH8ÃüpN8<âk7Å~ñáý;ãe¶8ÇuìñW<Ý"/rÉÙ¶ñ“.å*§Ë[9‚ÃܰŸ9˜”Ülþ›ß°×DÖy˜»]; û¼Õ¥m]чîÜlÛvȤDz•.™éÒéU¦®ÑËÕsª76¿?ŸÖÔµ¾u.ë`‡ŸØYÛô²Y±W'á×ÕŽ>«OÙd‡;¡þôX¾Ýîw·0f€¹w¾S´ÕíaÚ?@¿'kG¼q¸žt+×Ýñ2~üÜ͸l4Øh€B¸vD˜K£'HHZÈ[œÕZ.8ÖApöS¸#…Ÿ—…è_óÔ„gAN8…LuðwHs”^2Dr7@…Agõ…P·…J ƒ4 E&VQë„<B†¨æƒ_ù4X'“S’8£w·ç‹±/¼¢0Ù0«ö bAúa'–c\©2ËØa¥“¦#– ¨Q.S–ÿ) J j Š  ª  Ê  ê  ¡*¡J¡j¡Š¡ª¡Ê¡ê¡ ¢!*¢#J¢%j¢'Š¢)ª¢+Ê¢-ê¢/ £1*£3J£5j£7Š£9ª£;Ê£=ê£? ¤A*¤CJ¤Ej¤GФIª¤KʤMê¤O ¥Q*¥SJ¥Uj¥WŠ¥Yª¥[Ê¥]ê¥_ ¦a*¦cJ¦ej¦gЦiª¦kʦmê¦oJb;PHYLIPNEW-3.69.650/emboss_doc/html/fdnamove.html0000664000175000017500000005536212171064331016045 00000000000000 EMBOSS: fdnamove
fdnamove

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Interactive DNA parsimony

Description

Interactive construction of phylogenies from nucleic acid sequences, with their evaluation by parsimony and compatibility and the display of reconstructed ancestral bases. This can be used to find parsimony or compatibility estimates by hand.

Algorithm

DNAMOVE is an interactive DNA parsimony program, inspired by Wayne Maddison and David and Wayne Maddison's marvellous program MacClade, which is written for Macintosh computers. DNAMOVE reads in a data set which is prepared in almost the same format as one for the DNA parsimony program DNAPARS. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different sites and the way the nucleotide states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different sites are affected by changes in the tree topology.

This program uses graphic characters that show the tree to best advantage on some computer systems. Its graphic characters will work best on MSDOS systems or MSDOS windows in Windows, and to any system whose screen or terminals emulate ANSI standard terminals such as old Digital VT100 terminals, Telnet programs, or VT100-compatible windows in the X windowing system. For any other screen types, (such as Macintosh windows) there is a generic option which does not make use of screen graphics characters. The program will work well in those cases, but the tree it displays will look a bit uglier.

This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree.

The assumptions of this method are exactly analogous to those of MIX:

  1. Each site evolves independently.
  2. Different lineages evolve independently.
  3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change.

Usage

Here is a sample session with fdnamove


% fdnamove 
Interactive DNA parsimony
Input (aligned) nucleotide sequence set(s): dnamove.dat
Phylip tree file (optional): 
NEXT (R # + - S . T U W O F H J K L C ? X Q) (? for Help): Q
Do you want to write out the tree to a file? (Y or N): Y

 5 species,  13  sites

Computing steps needed for compatibility in sites ...


  (unrooted)                          19.0 Steps            11 sites compatible
                            
  ,-----------5:Epsilon   
--9  
  !  ,--------4:Delta     
  `--8  
     !  ,-----3:Gamma     
     `--7  
        !  ,--2:Beta      
        `--6  
           `--1:Alpha     


Tree written to file "dnamove.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Interactive DNA parsimony
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  (Aligned) nucleotide sequence set(s)
                                  filename and optional format, or reference
                                  (input USA)
  [-intreefile]        tree       Phylip tree file (optional)

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file - ignore sites with weight zero
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1] Threshold value (Number 1.000 or more)
   -initialtree        menu       [Arbitary] Initial tree (Values: a
                                  (Arbitary); u (User); s (Specify))
   -screenwidth        integer    [80] Width of terminal screen in characters
                                  (Any integer value)
   -screenlines        integer    [24] Number of lines on screen (Any integer
                                  value)
   -outtreefile        outfile    [*.fdnamove] Phylip tree output file
                                  (optional)

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall (Aligned) nucleotide sequence set(s) filename and optional format, or reference (input USA) Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
Additional (Optional) qualifiers
-weights properties Weights file - ignore sites with weight zero Property value(s)  
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 1.000 or more 1
-initialtree list Initial tree
a (Arbitary)
u (User)
s (Specify)
Arbitary
-screenwidth integer Width of terminal screen in characters Any integer value 80
-screenlines integer Number of lines on screen Any integer value 24
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdnamove
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnamove reads any normal sequence USAs.

Input files for usage example

File: dnamove.dat

   5   13
Alpha     AACGUGGCCA AAU
Beta      AAGGUCGCCA AAC
Gamma     CAUUUCGUCA CAA
Delta     GGUAUUUCGG CCU
Epsilon   GGGAUCUCGG CCC

Output file format

fdnamove outputs a graph to the specified graphics device. outputs a report format file. The default format is ...

Output files for usage example

File: dnamove.treefile

(Epsilon,(Delta,(Gamma,(Beta,Alpha))));

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fcontrast.html0000664000175000017500000006201512171064331016242 00000000000000 EMBOSS: fcontrast
fcontrast

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Continuous character contrasts

Description

Reads a tree from a tree file, and a data set with continuous characters data, and produces the independent contrasts for those characters, for use in any multivariate statistics package. Will also produce covariances, regressions and correlations between characters for those contrasts. Can also correct for within-species sampling variation when individual phenotypes are available within a population.

Algorithm

This program implements the contrasts calculation described in my 1985 paper on the comparative method (Felsenstein, 1985d). It reads in a data set of the standard quantitative characters sort, and also a tree from the treefile. It then forms the contrasts between species that, according to that tree, are statistically independent. This is done for each character. The contrasts are all standardized by branch lengths (actually, square roots of branch lengths).

The method is explained in the 1985 paper. It assumes a Brownian motion model. This model was introduced by Edwards and Cavalli-Sforza (1964; Cavalli-Sforza and Edwards, 1967) as an approximation to the evolution of gene frequencies. I have discussed (Felsenstein, 1973b, 1981c, 1985d, 1988b) the difficulties inherent in using it as a model for the evolution of quantitative characters. Chief among these is that the characters do not necessarily evolve independently or at equal rates. This program allows one to evaluate this, if there is independent information on the phylogeny. You can compute the variance of the contrasts for each character, as a measure of the variance accumulating per unit branch length. You can also test covariances of characters.

The statistics that are printed out include the covariances between all pairs of characters, the regressions of each character on each other (column j is regressed on row i), and the correlations between all pairs of characters. In assessing degress of freedom it is important to realize that each contrast was taken to have expectation zero, which is known because each contrast could as easily have been computed xi-xj instead of xj-xi. Thus there is no loss of a degree of freedom for estimation of a mean. The degrees of freedom is thus the same as the number of contrasts, namely one less than the number of species (tips). If you feed these contrasts into a multivariate statistics program make sure that it knows that each variable has expectation exactly zero.

Within-species variation

With the W option selected, CONTRAST analyzes data sets with variation within species, using a model like that proposed by Michael Lynch (1990). The method is described in vague terms in my book (Felsenstein, 2004, pp. 441). If you select the W option for within-species variation, the data set should have this structure (on the left are the data, on the right my comments:

   10    5                           number of species, number of characters
Alpha        2                       name of 1st species, # of individuals
 2.01 5.3 1.5  -3.41 0.3             data for individual #1
 1.98 4.3 2.1  -2.98 0.45            data for individual #2
Gammarus     3                       name of 2nd species, # of individuals
 6.57 3.1 2.0  -1.89 0.6             data for individual #1
 7.62 3.4 1.9  -2.01 0.7             data for individual #2
 6.02 3.0 1.9  -2.03 0.6             data for individual #3
...                                  (and so on)


The covariances, correlations, and regressions for the "additive" (between-species evolutionary variation) and "environmental" (within-species phenotypic variation) are printed out (the maximum likelihood estimates of each). The program also estimates the within-species phenotypic variation in the case where the between-species evolutionary covariances are forced to be zero. The log-likelihoods of these two cases are compared and a likelihood ratio test (LRT) is carried out. The program prints the result of this test as a chi-square variate, and gives the number of degrees of freedom of the LRT. You have to look up the chi-square variable on a table of the chi-square distribution. The A option is available (if the W option is invoked) to allow you to turn off the doing of this test if you want to.

The log-likelihood of the data under the models with and without between-species For the moment the program cannot handle the case where within-species variation is to be taken into account but where only species means are available. (It can handle cases where some species have only one member in their sample).

We hope to fix this soon. We are also on our way to incorporating full-sib, half-sib, or clonal groups within species, so as to do one analysis for within-species genetic and between-species phylogenetic variation.

The data set used as an example below is the example from a paper by Michael Lynch (1990), his characters having been log-transformed. In the case where there is only one specimen per species, Lynch's model is identical to our model of within-species variation (for multiple individuals per species it is not a subcase of his model).

Usage

Here is a sample session with fcontrast


% fcontrast 
Continuous character contrasts
Input file: contrast.dat
Phylip tree file (optional): contrast.tree
Phylip contrast program output file [contrast.fcontrast]: 


Output written to file "contrast.fcontrast"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Continuous character contrasts
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            frequencies File containing one or more sets of data
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fcontrast] Phylip contrast program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -varywithin         boolean    [N] Within-population variation in data
*  -[no]reg            boolean    [Y] Print out correlations and regressions
*  -writecont          boolean    [N] Print out contrasts
*  -[no]nophylo        boolean    [Y] LRT test of no phylogenetic component,
                                  with and without VarA
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
frequencies File containing one or more sets of data Frequency value(s)  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip contrast program output file Output file <*>.fcontrast
Additional (Optional) qualifiers
-varywithin boolean Within-population variation in data Boolean value Yes/No No
-[no]reg boolean Print out correlations and regressions Boolean value Yes/No Yes
-writecont boolean Print out contrasts Boolean value Yes/No No
-[no]nophylo boolean LRT test of no phylogenetic component, with and without VarA Boolean value Yes/No Yes
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fcontrast reads continuous character data.

Continuous character data

The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny.

When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions:

  1. Different lineages evolve independently.
  2. After two lineages split, their characters change independently.
  3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method).
  4. Different loci or characters drift independently.

How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c).

The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option:

     5    3
2 3 2
Alpha      0.90 0.80 0.10 0.56
Beta       0.72 0.54 0.30 0.20
Gamma      0.38 0.10 0.05  0.98
Delta      0.42 0.40 0.43 0.97
Epsilon    0.10 0.30 0.70 0.62

whereas here is what it would have to look like if the A option were invoked:

     5    3
2 3 2
Alpha      0.90 0.10 0.80 0.10 0.10 0.56 0.44
Beta       0.72 0.28 0.54 0.30 0.16 0.20 0.80
Gamma      0.38 0.62 0.10 0.05 0.85  0.98 0.02
Delta      0.42 0.58 0.40 0.43 0.17 0.97 0.03
Epsilon    0.10 0.90 0.30 0.70 0.00 0.62 0.38

The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name).

If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message.

While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points.

CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables.

CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set.

In the quantitative characters mode, a typical small data set would be:

     5   6
Alpha      0.345 0.467 1.213  2.2  -1.2 1.0
Beta       0.457 0.444 1.1    1.987 -0.2 2.678
Gamma      0.6 0.12 0.97 2.3  -0.11 1.54
Delta      0.68  0.203 0.888 2.0  1.67
Epsilon    0.297  0.22 0.90 1.9 1.74

Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale.

For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs.

Input files for usage example

File: contrast.dat

    5   2
Homo        4.09434  4.74493
Pongo       3.61092  3.33220
Macaca      2.37024  3.36730
Ateles      2.02815  2.89037
Galago     -1.46968  2.30259

File: contrast.tree

((((Homo:0.21,Pongo:0.21):0.28,Macaca:0.49):0.13,Ateles:0.62):0.38,Galago:1.00);

Output file format

fcontrast statistics that are printed out include the covariances between all pairs of characters, the regressions of each character on each other (column j is regressed on row i), and the correlations between all pairs of characters. In assessing degress of freedom it is important to realize that each contrast was taken to have expectation zero, which is known because each contrast could as easily have been computed xi-xj instead of xj-xi. Thus there is no loss of a degree of freedom for estimation of a mean. The degrees of freedom is thus the same as the number of contrasts, namely one less than the number of species (tips). If you feed these contrasts into a multivariate statistics program make sure that it knows that each variable has expectation exactly zero. With the W option selected, the covariances, correlations, and regressions for the "additive" (between-species evolutionary variation) and "environmental" (within-species phenotypic variation) are printed out (the maximum likelihood estimates of each). The program also estimates the within-species phenotypic variation in the case where the between-species evolutionary covariances are forced to be zero. The log-likelihoods of these two cases are compared and a likelihood ratio test (LRT) is carried out. The program prints the result of this test as a chi-square variate, and gives the number of degrees of freedom of the LRT. You have to look up the chi-square variable on a table of the chi-square distribution. The A option is available (if the W option is invoked) to allow you to turn off the doing of this test if you want to.

Output files for usage example

File: contrast.fcontrast


Covariance matrix
---------- ------

    3.9423    1.7028
    1.7028    1.7062

Regressions (columns on rows)
----------- -------- -- -----

    1.0000    0.4319
    0.9980    1.0000

Correlations
------------

    1.0000    0.6566
    0.6566    1.0000

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
econtml Continuous character maximum likelihood method
econtrast Continuous character contrasts

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnacomp.html0000664000175000017500000007376012171064331016037 00000000000000 EMBOSS: fdnacomp
fdnacomp

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

DNA compatibility algorithm

Description

Estimates phylogenies from nucleic acid sequence data using the compatibility criterion, which searches for the largest number of sites which could have all states (nucleotides) uniquely evolved on the same tree. Compatibility is particularly appropriate when sites vary greatly in their rates of evolution, but we do not know in advance which are the less reliable ones.

Algorithm

This program implements the compatibility method for DNA sequence data. For a four-state character without a character-state tree, as in DNA sequences, the usual clique theorems cannot be applied. The approach taken in this program is to directly evaluate each tree topology by counting how many substitutions are needed in each site, comparing this to the minimum number that might be needed (one less than the number of bases observed at that site), and then evaluating the number of sites which achieve the minimum number. This is the evaluation of the tree (the number of compatible sites), and the topology is chosen so as to maximize that number.

Compatibility methods originated with Le Quesne's (1969) suggestion that one ought to look for trees supported by the largest number of perfectly fitting (compatible) characters. Fitch (1975) showed by counterexample that one could not use the pairwise compatibility methods used in CLIQUE to discover the largest clique of jointly compatible characters.

The assumptions of this method are similar to those of CLIQUE. In a paper in the Biological Journal of the Linnean Society (1981b) I discuss this matter extensively. In effect, the assumptions are that:

  1. Each character evolves independently.
  2. Different lineages evolve independently.
  3. The ancestral base at each site is unknown.
  4. The rates of change in most sites over the time spans involved in the the divergence of the group are very small.
  5. A few of the sites have very high rates of change.
  6. We do not know in advance which are the high and which the low rate sites.

That these are the assumptions of compatibility methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that arguments such as mine are invalid and that parsimony (and perhaps compatibility) methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986).

There is, however, some reason to believe that the present criterion is not the proper way to correct for the presence of some sites with high rates of change in nucleotide sequence data. It can be argued that sites showing more than two nucleotide states, even if those are compatible with the other sites, are also candidates for sites with high rates of change. It might then be more proper to use DNAPARS with the Threshold option with a threshold value of 2.

Change from an occupied site to a gap is counted as one change. Reversion from a gap to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a gap N bases long is N separate events. This may be an overcorrection. When we have nonoverlapping gaps, we could instead code a gap as a single event by changing all but the first "-" in the gap into "?" characters. In this way only the first base of the gap causes the program to infer a change.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of weighted compatibility differences between trees, taken across sites. If the two trees compatibilities are more than 1.96 standard deviations different then the trees are declared significantly different.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of weighted compatibilities of sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected compatibility, compatibilities for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest compatibility exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the compatibility of each tree, the differences of each from the highest one, the variance of that quantity as determined by the compatibility differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one.

The algorithm is a straightforward modification of DNAPARS, but with some extra machinery added to calculate, as each species is added, how many base changes are the minimum which could be required at that site. The program runs fairly quickly.

Usage

Here is a sample session with fdnacomp


% fdnacomp -ancseq -stepbox -printdata 
DNA compatibility algorithm
Input (aligned) nucleotide sequence set(s): dnacomp.dat
Phylip tree file (optional): 
Phylip weights file (optional): 
Phylip dnacomp program output file [dnacomp.fdnacomp]: 

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements
  !---------!
   .........

Output written to file "dnacomp.fdnacomp"

Trees also written onto file "dnacomp.treefile"


Go to the input files for this example
Go to the output files for this example

Example 2


% fdnacomp 
DNA compatibility algorithm
Input (aligned) nucleotide sequence set(s): dnacomp.dat
Phylip tree file (optional): dnacomptree.dat
Phylip weights file (optional): 
Phylip dnacomp program output file [dnacomp.fdnacomp]: 

Output written to file "dnacomp.fdnacomp"

Trees also written onto file "dnacomp.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

DNA compatibility algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
   -weights            properties Phylip weights file (optional)
  [-outfile]           outfile    [*.fdnacomp] Phylip dnacomp program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdnacomp] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -stepbox            boolean    [N] Print steps & compatibility at sites
   -ancseq             boolean    [N] Print sequences at all nodes of tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
-weights properties Phylip weights file (optional) Property value(s)  
[-outfile]
(Parameter 3)
outfile Phylip dnacomp program output file Output file <*>.fdnacomp
Additional (Optional) qualifiers
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdnacomp
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-stepbox boolean Print steps & compatibility at sites Boolean value Yes/No No
-ancseq boolean Print sequences at all nodes of tree Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnacomp reads any normal sequence USAs.

Input files for usage example

File: dnacomp.dat

    5   13
Alpha     AACGUGGCCAAAU
Beta      AAGGUCGCCAAAC
Gamma     CAUUUCGUCACAA
Delta     GGUAUUUCGGCCU
Epsilon   GGGAUCUCGGCCC

Input files for usage example 2

File: dnacomptree.dat

((((Epsilon,Delta),Gamma),Beta),Alpha);

Output file format

fdnacomp output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each character. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a gap may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: dnacomp.fdnacomp


DNA compatibility algorithm, version 3.69.650

 5 species,  13  sites

Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         AAGGTCGCCA AAC
Gamma        CATTTCGTCA CAA
Delta        GGTATTTCGG CCT
Epsilon      GGGATCTCGG CCC



One most parsimonious tree found:




           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


total number of compatible sites is       11.0

steps in each site:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0|       2   1   3   2   0   2   1   1   1
   10|   1   1   1   3                        

 compatibility (Y or N) of each site with this tree:

      0123456789
     *----------
   0 ! YYNYYYYYY
  10 !YYYN      

From    To     Any Steps?    State at upper node
                            
          1                AABGTSGCCA AAY
   1      2        maybe   AABGTCGCCA AAY
   2      3         yes    VAKDTCGCCA CAY
   3      4         yes    GGKATCTCGG CCY
   4   Epsilon     maybe   GGGATCTCGG CCC
   4   Delta        yes    GGTATTTCGG CCT
   3   Gamma        yes    CATTTCGTCA CAA
   2   Beta        maybe   AAGGTCGCCA AAC
   1   Alpha       maybe   AACGTGGCCA AAT


File: dnacomp.treefile

((((Epsilon,Delta),Gamma),Beta),Alpha);

Output files for usage example 2

File: dnacomp.fdnacomp


DNA compatibility algorithm, version 3.69.650

User-defined tree:



           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


total number of compatible sites is       11.0


Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdrawgram.1.drawgram.fdrawgram.gif0000664000175000017500000001326212171064331021725 00000000000000GIF89adð!ùÿÿ,dþŒ©Ëí£œ´Ú‹³Þ¼û†âH–扦êʶî ÇòL×öçúÎ÷þ ‡Ä¢ñˆL*—̦ó J§ÔªõŠÍj·Ü®÷ ‹Çä²ùŒN«×ì¶û ËçôºýŽÏë÷ü¾ÿ(8HXhxˆ˜¨¸ÈØèø)9IYiy‰™©¹ÉÙéù *:JZjzŠšªºÊÚêú +;K[k{‹›«»ËÛëû ,N^n~Žž®¾ÎÞîþ/?O_oŸ¯¿Ïßïÿ0 À <ˆ0¡Â… :|1¢Ä‰+Z¼ˆ1£Æþ;zü2¤È‘$Kš<‰2¥Ê•,[º| 3¦Ì™4kÚ¼‰3§Î<{úü 4¨Ð¡D‹=Š4©Ò¥L›:} 5ªÔ©T«Z½Š5«Ö­*à V×°dËxýZ6í—³jƒ¢ÕsömÛžrÉĽ‹7/Û¹<ÇšÕ 0ß” ÿ-ŒØïà‘‰Ûmœx1cÈSÖ+yòeÌcö&¨«@qf›+‹ñütêÑ!K >­ú@l¢YkÄ›¡¶Ý´g@mãÝ ¾³ð¦ áxð‰q9Ç¢üwqàË)6'ÞyºöçÕR¿ó<úõî¿Û‰ÀüpòÑGà~…soиٗwÿ~ü¼©þ#Û¿eëÍgÚb€Ÿqõõ÷šV`ÞZã…Ö ƒ¹V¡Û h!Dò^„.ôá‚#ž(A‰î¡Šÿ¨(b-ºØOŒØ8ÇŒ4î“`=¡ãŽúùY8 é‘È¡Q"’N2øá“RÂø£”äQY¥•Õa©¤–ö¶ˆféå—‰5¡d–)OiLy#›axFšt)ç:M†gž)›˜Ùõéç‹&Þh§„„ÊÏ¡ï­)¤²1Ê#œ.º„¤¿á£©…–.ЩšbêM¨ö} *©™*iê7¨Žø*…uîyO¬ž.Ú**rj«ƒ¦æš•äÀ˜ç¯ªþ¥8ÉÊj¯MpβÅv,²Òv­Ÿ£V[„£pŒ°ŒÆºânΦÎwÚQèŒÞv‘(„®‡¸ÝÅ{i¹ævzl¸¾…×ï•´Ê+¢ùÙ»%"lM½É,ðfoB|M½Û±+eh:7ïÁSlÃÁžÉƨé>Œ¨È©<²|%ã r/Ǧok,ËÌj̼¸œq¾7t.2ÙÎ{(d'áÛðÐ¥ šqk¤€»Fë­cO‹Ë‡Î?(M´rN;t-‹R¬ìÇ5+Äu?÷ö;H£ µ\’œqZc,7oë*87 °qŒ·Úôî½_ß_‡ÑçÝÔ(~Žx(n8ψþžãæˆwqÞ‘K~å…Ó=®ùægŸ7¤ååXüqá(¬-÷å {G8Aû 2ëU˜ŽKÜÞžªí+°ŒûÁÓÒ6Ü©÷îûï]™<ñXÃî(uï>¡õð;âÂÏC¿¢Ñéê.8ŸS{½Õ]c¢õ4LÏ;ØUû-ö àã1{Ë&Ktöáp%‹õ#ñ¶÷´Çl”‘ü^õ?]OQ‰#Ie¤¾Œ®}ˆÛÞ¾ƒáqâŸk,ƒAÑ,mð:W%X¼žÐ}|óZ ·ÀA7X°‚0óP—𗦵k~ÌšÞ<–†û±P8<›¶@0h°‡²2XsfC»­p£{!þøU7Qð1š"7¶åEÄÈVKÜ`ÿæ£ÌÝNtGLÃøIÑBTâ瀸F6N° rŒáô—ÜÀ‘¨KYèô8ÇŠé1DœØëìˆGÙ%’ˆÜcD÷‡Af}ðÚâ¹H7nNY<,áñ|J6’Èp¤,åu(=KŽ,‡š\ ·qKvn«žû8‰-+–æ»Ýò~ ?BR“D¡¾ŠI_zîœc 外Æjj©™JàüìˆÅ °Œ!Ô‚7¿¹¯3ž‡œ¢Ò»òMp&É{xÓØ)|fãƒ/©eW6¤7ÒR Ð!(!¹ƒš ³áfþÕYA"*tuÀ,ÉD©ø¯é9ô¡€Œ¨DÅ÷®6:ð¢bÌ(Ò6ê‚:BR¢$-)ûD8§Jêsk eJS Å, ²¥!xiïb*Sž~À…oêP…© )Õ9²¼ÔŸ‚ºT¦J2€QUª+‡ÉÒ›´O *¡THØÌ0˜¤!Ìªš ¨¢•¥"tVÿ9V²²R¬g]ë-  Ò—*3RHåkä@ÈV ‰ì­¿Ë*=è¦tæò•u}*-gRm¾Ònf½*c¥Éxéd;ªX~N죙Õì1OZ&2o§žt‡4QÛH2Hø!l9»eWÒ"Kµµím‰à[Q¨5·Ç˜).ƒþ µ"¤fÆ]•aÑÜ´â•.ny™Ië> ¹…õêÊ<›înUc/«(GH\jy·«zÕ.íšWÑò/›éÍj ö wβð¥«y;A÷²m°õ•:ŠØÁ5u™ÿ5§dÃ+Þ…Æ,¿Qo) ‚ï°Àˆkh©ŠaŒR”½«yXˆ=àS,4¥õoŠ\;]˜ÃÂ/Œé8¸ƒó:3ÖV@kLcs²k­.Kl¶L’/U@rÊÎ*Û&fS@R~²’‘Yeú:™¾˜õ¨u|Ù,OuË9\²˜‘Gæ©vy¥i<¥<Ï|XÿEÎm]1k½,I7+ŠÉB.°r§Iþg$¦YÍm³1Á áA÷‡Ï”+"çŒä1ŸÅCvb4Õ¥(K§Ï~ž. {£å〙nå°äb‰_ê©ff¢6¢§)—€‚7ÌjâΤÇYz²ÔO¾u¨Ciýˆ&žš6r»¬ÿhš¨ ¶–L™Ó¿V² Ï<ð‘…æTAŸ¾VFwSó«o ¨ÀÂý{AÿzÉjöA†çX@L{Àë~I$viÔe|S¿þNÂ/jãaßÝ»O ¨›×\‡y+}aŸÒ3Öï¯ë˜ñ+Ò^êW/ðC/}¸þzÿë®ÿ°|Ú_ vâÍ5®uÞäŸG®÷~v8û7 ød€]4€cò5ý÷}{Wv  X} ø} HQñWxzírÆ0XkÈyèj¨|c~YaÖ}sç €ˆ‚&æŽÖT$ø ~3˜5˜‚7¨gÒBу£÷ƒÈ„A·f‚€7văÎP;K¸‚@t S…Mˆƒ48…6Ò…Ë0S¨…8ws~:`)×|ÙfHXæj©¤J¡óxXˆ†Yn²ffs¨ƒuHVJø|ëUY¹ö†Ò5aøl‡ä|ƒ¸ˆyÕ3°6„ènr•a«—O—`äóþˆ[n©ä~c(€ŸèHT…yF‰£v}äuèGŠhRE§˜oN÷Š”È‚­èЍhxê‚£ðgi8‰G(RgG‹º$&rˆ«ð‹N8bU×tP°†*ær.(sšÕ“f}Ck–>¸ØËÈWÊ£K-—nêÖ¢¸3WH„÷s•ç`â(8è¸ ÀÓ‹³åŽïhpô“Œª@çCŒI•úèXy\i‡véoC É é ‘)‘I‘i‘‰‘©‘É‘é‘ ’!)’#I’%i’NR(veQã)Y\Ñ8Žþ#δh>·o3¹I˜(m.Y‚02ÞØ“ñ”Âþè“·¨“Y”«x”¸”ÈF”I© 8iyO ŠúÈ“øfQB©•Xi•:•5I†\YˆM9ŠbI~_™†R¹Od¹‡f©|jé‰hŠl‰ T ‹rYŒp9t|Éi~9nÙŽa ˜PW˜¿¶•YY–„ɘ^i—ë§—]'˜Ö˜çH—B3™ãx™Vµ™Yõ˜ƒ÷™ù—™Ø™;™×¨˜o™˜§™{¡ù„®™‚¬ÙN©¹J´I™ŽYš (›Ûe›¢Ô›¤y˜’–›Ô÷›a8œ$7šÆœjW™ëTœAùœFÙœL¹š°)aÕ •¸¹œ>sœþ¢Wi™ÝÙvÓIÉéŒß锨y‡ì)ˆþîÙƒè)Üižv·›¼ž`IŸû9ž×©žgIž“WŸÓ˜Ÿh'ŸÖÉŸÎyŸÎµ fR s z7 =w ¼ŸøŸqy¡±÷ {éŸ:uzŒ*™$ªu&ÚšÚ—"úxioz‰(:› j?0zT2Js8ª@:Ê 6š–>Z—*šŽ<ÚM*ž ¤LE¤Õ¸¤d¤úÙŸ j3Mêl4ê¤TjFXš‹ *rOº–BŠk,º‹`ª .ZŠdŠ˜RŠuZš_^ ¡bÚfÊ_Iú’lºmnê¡jjŸtj rª=VZ¥|jš‚ª›„: ~ ‰zJ €š¥ŒªYxZ¢Ž \Šš’z§vêþZ”š¢\Ѝó¥¨Ñ©Cù©#j©™ˆ©ß¨©3j¨?¹ªH‰¦ÂÉ¥°ª]¥µŠ¤¯zW©Ê›¥ºx¼ ³º˜#uª¿ª«;ê«45¬Èz¬ê¬ª9ª-Z¬=Š«¶Ð¬ƒù¬cÊ©Ñê Ë:`Éú{ÜÊÕz›š®×£­5:­åy­!®š™­ÞÚ­åʆçJí œpš“é* Â*¯9`¯Ê‰¯UÙªÀð¯éù®ý:jô¨úªz Û¨  ›—Ër»¥ËY{K]ð®;©k®";¯${: {©‹c&›°,»±(›©.»²*[‹ ¶:¥4»>0Û«»¢ë*<þK¬2[³B«¬>û—бgz°H¦üê´†¹´sªUµd³ʰ! ´;\]Û‰9µiгkêµekŒ`›µµ—¶WK‡fë¶ÏÈ®Fû­a «„ù¶w[sqK´;»·4I·j‡·+wÿ&·ñú·ÞÙ´‚«¸÷øl ¥·«‹+¹—'Žû¥‰«»÷±Ãš¹]Ú·2`¹oJ­…{“¤;²:ë·ikª¨K¸Ÿ¡›§gºP»¶³ë¯¶ ¹m»‘z¸ê*»®{¸û²ÀûPÄ›¯«'Â;³½»¯¿Ëº±»•ʼ:ºÆ{Ñ»©SK ؋ܻ«Ó«µÎ ¾ g½U¾Ù7¾«›¾þ¨z¾ Y»íKµÈˬÊ[´ðûšªÛ³øËÞk¬ãË¿‰ð¿4À芹¹j¿WsÀÙù¼;¹ÀqJ¿1ÀWÚÀ8•À¢š¼°\½|½œº,¾ú›¼üžïËÁ+ÂóKŸwÂè›Â†ûÂè€ÁkÂë;´-l º:üº<Œ­ºëÃÅ‹ÃãçkÃ#<Ä÷ —ÄÜÄ ì¿A|¼@¼ÂºÄ [ŵùÄíYÃE\²[ kQœÅ1 ÆÁzÁRÜ3|±|Ä*\ÆV¬½¾ˆÆí1Ç.|Å"8ƾùƺÇdÜÆ0,¿WÇî{ÇžÛÇz,Ƈü£^|»y¼\ƒLÄ…,«Œþ?”›‰üÇ›«È¡Æ!»ÉJúÉóyÆŽ¤’ü ܨ¬¾]ÈšœÉcÉPÌÊÊßEÊ ŒÉŒÜ²µŒµ¦Lª¯üÅÀ|²·Ì™¾Ü ªœP±ÌÅÆÌ È¬Î|´³Ìƺ<Ð<·Ò­ ËÄì™ØÌÄÂ<,Ê\Â̬ŒâÌ=¹LÎühÎ aÍ€ǵÎZŒÎïœ\ñ 7Ü<¨ÞŒÅ¼,‹ôL«ßLÍ­ÄÏÄ9ЬÐõ[І‚Ï…ªÏxœÐˆ7Ï[‹ í\º MÐè<¹­Ñÿ˜mĬ¼Ñ!-Òà\Íö|£  #­ÒMÅ(­¡ ½Ò1­¸-Ñã)Ó7·4ÝÐ{ŠÓ=þý¶:Î\ëÓCݵ@íÏ=IÔI ˆjáÑÁ|ЋJÒçiÑ­Í]O&½ÈU=¶ ÙÔìÒÐÕ1†Õ¥Ì¤_ݽcËOM¡SMh]ÌeÕëÖݬչ ‘a=¼f½xMqsÏuMzm|Ðq Õjݺ‚-¶mâ×@AØ|«ØˆðØ[1Ù;ÌÖPÙÝØØÐzÙ;‘Ù lØkÙ3ÚB<Ú¿ÜÙÁûÙ/ Ød›ÚлÙT ׫­·¥¸µýÚrÝÚ?;§mÀƳLÄÝÆ±õŠÜ:!Üæ‹Û…ÐÜSÝ„¬Û9Ý‘\Ýw½Ü¾Íؽ}ÖÞý´¿½ÝßýÜÍÛÝþåÝOãÞçÛ¬ÞáËÞ¶´à-µÊMß5qÝJœÝ™ß—,ßBmÝê]ßâ}ß,à‹ý¨nÚ^·öýÞ©ÌàÌéàí}Û5ß» ÁžÛþßt¬àNàÎý-ËNÚ>ßNâõüá1±âã¼ßùâ,Œâ®Ý-nÞ .âÉŒãÔâ5îá*®áÐ=ä01ãpÌá&žØ®ã@þ¢;³;þÌEîãà=ßMÞáOÔ®ƒåþåGÞ bŽd~ÒNδW.ä_¾[TîfžÕJ-çs~tç¥Lçy®ç\w[wžÖ{èŽæ·æ‚nè®æ}苞ԉÓŒþé8íè˜é•.Ó“Þp–®é1-‘~.šlžž>pR¾¢~€nΦ. ªN#¬ ®Ž"°n…¨>²Þ ¶Þ!¸>– .ºîm´êÀ>¤¤>¾ž ÆþÈ^§ÄÎÊÞ§Â^êÐ^¦ÒÞìÔž;ÖnÔ2áìÃì×ÎëÑþíÓîÕ>îÞÞíÙîâØÎÔêžÍ'é܃¾½ì.ÖåNËðþÛ®pòNø~¨úÞ×ô>ÍJ®ÙßîîNÝÚîï`ÁïÈIðó~îoðŸî ¿ï ?Éÿï_ðïßßÌOÙ ï $O°"?ðÐ&¯r¯ð(ÿñ0Ï._±4ò8þ?ò2ßò<¯Î:ŸòöÏ@63DßóHÿó*_ñJŸ Fßë>ÿôROP˜N_õT V_ìZo \OîLïÐBß`oîdÏf_ï[.jñßñ ŽõÒíõ¥àö£,ö†ŒöKq÷jsÍ,Ü›.øƒ?Wp¿Ì„ø‰O^†?Ίïøß÷Mù“Oùl•ù“Ïø‡Ÿùoé›Ïùž/ú†úKnù¥ìuú˜9÷«Ÿú­ïú˾÷±ÿòyOû¯®ú·Ïð¶¯û·žû½_û³üˆÈûÃÿúÅoü²úÉóÌïüÏýÑ/ýÓOýÕoý×ýÙ¯ýÛÏýÝïýßþá/þŸãOþåoþçþé¯þëÏþíïþïÿñ/ÿóOÿõoÿ÷ÿù¯ÿûÏÿýïÿÿOñ1u¹ýa”“V{qÖ›wÿÁPÉÒ<ÑT]ÙÖ}áXžéÚ¾ñ\ßùÞÿAáX4‘Iå’Ùt>¡Qé”Zµ^±Yí–Ûõ~Áañ˜\6ŸÑiõšÝv¿áqùœ^·ßñyýžß÷ÿ  ;PHYLIPNEW-3.69.650/emboss_doc/html/fcontml.html0000664000175000017500000010034112171064331015674 00000000000000 EMBOSS: fcontml
fcontml

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Gene frequency and continuous character maximum likelihood

Description

Estimates phylogenies from gene frequency data by maximum likelihood under a model in which all divergence is due to genetic drift in the absence of new mutations. Does not assume a molecular clock. An alternative method of analyzing this data is to compute Nei's genetic distance and use one of the distance matrix programs. This program can also do maximum likelihood analysis of continuous characters that evolve by a Brownian Motion model, but it assumes that the characters evolve at equal rates and in an uncorrelated fashion, so that it does not take into account the usual correlations of characters.

Algorithm

This program estimates phylogenies by the restricted maximum likelihood method based on the Brownian motion model. It is based on the model of Edwards and Cavalli-Sforza (1964; Cavalli-Sforza and Edwards, 1967). Gomberg (1966), Felsenstein (1973b, 1981c) and Thompson (1975) have done extensive further work leading to efficient algorithms. CONTML uses restricted maximum likelihood estimation (REML), which is the criterion used by Felsenstein (1973b). The actual algorithm is an iterative EM Algorithm (Dempster, Laird, and Rubin, 1977) which is guaranteed to always give increasing likelihoods. The algorithm is described in detail in a paper of mine (Felsenstein, 1981c), which you should definitely consult if you are going to use this program. Some simulation tests of it are given by Rohlf and Wooten (1988) and Kim and Burgman (1988).

The default (gene frequency) mode treats the input as gene frequencies at a series of loci, and square-root-transforms the allele frequencies (constructing the frequency of the missing allele at each locus first). This enables us to use the Brownian motion model on the resulting coordinates, in an approximation equivalent to using Cavalli-Sforza and Edwards's (1967) chord measure of genetic distance and taking that to give distance between particles undergoing pure Brownian motion. It assumes that each locus evolves independently by pure genetic drift.

The alternative continuous characters mode (menu option C) treats the input as a series of coordinates of each species in N dimensions. It assumes that we have transformed the characters to remove correlations and to standardize their variances.

A word about microsatellite data

Many current users of CONTML use it to analyze microsatellite data. There are three ways to do this:

  • Coding each copy number as an allele, and feeding in the frequencies of these alleles. As CONTML's gene frequency mode assumes that all change is by genetic drift, this means that no copy number arises by mutation during the divergence of the populations. Since microsatellite loci have very high mutation rates, this is questionable.
  • Use some other program, one not in the PHYLIP package, to compute distances among the populations. Some of the programs that can do this are RSTCalc, poptrfdos, Microsat, and Populations. Links to them can be found at my Phylogeny Programs web site at http://evolution.gs.washington.edu/phylip/software.html.

    Those distance measures allow for mutation during the divergence of the populations. But even they are not perfect -- they do not allow us to use all the information contained in the gene frequency differences of within a copy number allele. There is a need for a more complete statistical treatment of inference of phylogenies from microsatellite models, ones that take both mutation and genetic drift fully into account.

  • Alternatively, there is the Brownian motion approximation to mean population copy number. This is described in my book (Felsenstein, 2004, Chapter 15, pp. 242-245), and it is implicit also in the microsatellite distances. Each locus is coded as a single continuous character, the mean of the copy number in at that microsatellite locus in that species. Thus if the species (or population) has frequencies 0.10, 0.24, 0.60, and 0.06 of alleles that have 18, 19, 20, and 21 copies, it is coded as having

    0.10 X 18 + 0.24 X 19 + 0.60 X 20 + 0.06 X 21   =  19.62 
    

    copies. These values can, I believe, be calculated by a spreadsheet program. Each microsatellite is represented by one character, and the continuous character mode of CONTML is used (not the gene frequencies mode). This coding allows for mutation that changes copy number. It does not make complete use of all data, but neither does the treatment of microsatellite gene frequencies as changing only genetic drift. frequency

Usage

Here is a sample session with fcontml


% fcontml -printdata 
Gene frequency and continuous character maximum likelihood
Input file: contml.dat
Phylip tree file (optional): 
Phylip contml program output file [contml.fcontml]: 

Adding species:
   1. European  
   2. African   
   3. Chinese   
   4. American  
   5. Australian

Output written to file "contml.fcontml"

Tree also written onto file "contml.treefile"


Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Gene frequency and continuous character maximum likelihood
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            frequencies File containing one or more sets of data
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fcontml] Phylip contml program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -datatype           menu       [g] Input type in infile (Values: g (Gene
                                  frequencies); i (Continuous characters))
*  -lengths            boolean    [N] Use branch lengths from user trees
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
*  -global             boolean    [N] Global rearrangements
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fcontml] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
frequencies File containing one or more sets of data Frequency value(s)  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip contml program output file Output file <*>.fcontml
Additional (Optional) qualifiers
-datatype list Input type in infile
g (Gene frequencies)
i (Continuous characters)
g
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-global boolean Global rearrangements Boolean value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fcontml
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fcontml reads continuous character data.

Continuous character data

The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny.

When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions:

  1. Different lineages evolve independently.
  2. After two lineages split, their characters change independently.
  3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method).
  4. Different loci or characters drift independently.

How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c).

The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option:

     5    3
2 3 2
Alpha      0.90 0.80 0.10 0.56
Beta       0.72 0.54 0.30 0.20
Gamma      0.38 0.10 0.05  0.98
Delta      0.42 0.40 0.43 0.97
Epsilon    0.10 0.30 0.70 0.62

whereas here is what it would have to look like if the A option were invoked:

     5    3
2 3 2
Alpha      0.90 0.10 0.80 0.10 0.10 0.56 0.44
Beta       0.72 0.28 0.54 0.30 0.16 0.20 0.80
Gamma      0.38 0.62 0.10 0.05 0.85  0.98 0.02
Delta      0.42 0.58 0.40 0.43 0.17 0.97 0.03
Epsilon    0.10 0.90 0.30 0.70 0.00 0.62 0.38

The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name).

If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message.

While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points.

CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables.

CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set.

In the quantitative characters mode, a typical small data set would be:

     5   6
Alpha      0.345 0.467 1.213  2.2  -1.2 1.0
Beta       0.457 0.444 1.1    1.987 -0.2 2.678
Gamma      0.6 0.12 0.97 2.3  -0.11 1.54
Delta      0.68  0.203 0.888 2.0  1.67
Epsilon    0.297  0.22 0.90 1.9 1.74

Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale.

For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs.

Input files for usage example

File: contml.dat

    5    10
2 2 2 2 2 2 2 2 2 2
European   0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205
0.8055 0.5043
African    0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600
0.7582 0.6207
Chinese    0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726
0.7482 0.7334
American   0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000
0.8086 0.8636
Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396
0.9097 0.2976

Output file format

fcontml output has a standard appearance. The topology of the tree is given by an unrooted tree diagram. The lengths (in time or in expected amounts of variance) are given in a table below the topology, and a rough confidence interval given for each length. Negative lower bounds on length indicate that rearrangements may be acceptable.

The units of length are amounts of expected accumulated variance (not time). The log likelihood (natural log) of each tree is also given, and it is indicated how many topologies have been tried. The tree does not necessarily have all tips contemporary, and the log likelihood may be either positive or negative (this simply corresponds to whether the density function does or does not exceed 1) and a negative log likelihood does not indicate any error. The log likelihood allows various formal likelihood ratio hypothesis tests. The description of the tree includes approximate standard errors on the lengths of segments of the tree. These are calculated by considering only the curvature of the likelihood surface as the length of the segment is varied, holding all other lengths constant. As such they are most probably underestimates of the variance, and hence may give too much confidence in the given tree.

One should use caution in interpreting the likelihoods that are printed out. If the model is wrong, it will not be possible to use the likelihoods to make formal statistical statements. Thus, if gene frequencies are being analyzed, but the gene frequencies change not only by genetic drift, but also by mutation, the model is not correct. It would be as well-justified in this case to use GENDIST to compute the Nei (1972) genetic distance and then use FITCH, KITSCH or NEIGHBOR to make a tree. If continuous characters are being analyzed, but if the characters have not been transformed to new coordinates that evolve independently and at equal rates, then the model is also violated and no statistical analysis is possible. Doing such a transformation is not easy, and usually not even possible.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across loci. If the two trees means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. The version used here is a multivariate normal approximation to their test; it is due to Shimodaira (1998). The variances and covariances of the sum of log likelihoods across loci are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one.

One problem which sometimes arises is that the program is fed two species (or populations) with identical transformed gene frequencies: this can happen if sample sizes are small and/or many loci are monomorphic. In this case the program "gets its knickers in a twist" and can divide by zero, usually causing a crash. If you suspect that this has happened, check for two species with identical coordinates. If you find them, eliminate one from the problem: the two must always show up as being at the same point on the tree anyway.

Output files for usage example

File: contml.fcontml


Continuous character Maximum Likelihood method version 3.69.650


   5 Populations,   10 Loci

Numbers of alleles at the loci:
------- -- ------- -- --- -----

   2   2   2   2   2   2   2   2   2   2

Name                 Gene Frequencies
----                 ---- -----------

  locus:         1         1         2         2         3         3
                 4         4         5         5         6         6
                 7         7         8         8         9         9
                10        10

European     0.28680   0.71320   0.56840   0.43160   0.44220   0.55780
             0.42860   0.57140   0.38280   0.61720   0.72850   0.27150
             0.63860   0.36140   0.02050   0.97950   0.80550   0.19450
             0.50430   0.49570
African      0.13560   0.86440   0.48400   0.51600   0.06020   0.93980
             0.03970   0.96030   0.59770   0.40230   0.96750   0.03250
             0.95110   0.04890   0.06000   0.94000   0.75820   0.24180
             0.62070   0.37930
Chinese      0.16280   0.83720   0.59580   0.40420   0.72980   0.27020
             1.00000   0.00000   0.38110   0.61890   0.79860   0.20140
             0.77820   0.22180   0.07260   0.92740   0.74820   0.25180
             0.73340   0.26660
American     0.01440   0.98560   0.69900   0.30100   0.32800   0.67200
             0.74210   0.25790   0.66060   0.33940   0.86030   0.13970
             0.79240   0.20760   0.00000   1.00000   0.80860   0.19140
             0.86360   0.13640
Australian   0.12110   0.87890   0.22740   0.77260   0.58210   0.41790
             1.00000   0.00000   0.20180   0.79820   0.90000   0.10000
             0.98370   0.01630   0.03960   0.96040   0.90970   0.09030
             0.29760   0.70240


  +-----------------------------------------------------------African   
  !  
  !             +-------------------------------Australian
  1-------------3  
  !             !     +-----------------------American  
  !             +-----2  
  !                   +Chinese   
  !  
  +European  


remember: this is an unrooted tree!

Ln Likelihood =    38.71914

Between     And             Length      Approx. Confidence Limits
-------     ---             ------      ------- ---------- ------
  1       African        0.09693444   (  0.03123910,  0.19853604)
  1          3           0.02252816   (  0.00089799,  0.05598045)
  3       Australian     0.05247405   (  0.01177094,  0.11542374)
  3          2           0.00945315   ( -0.00897717,  0.03795670)
  2       American       0.03806240   (  0.01095938,  0.07997877)
  2       Chinese        0.00208822   ( -0.00960622,  0.02017433)
  1       European       0.00000000   ( -0.01627246,  0.02516630)


File: contml.treefile

(African:0.09693444,(Australian:0.05247405,(American:0.03806240,Chinese:0.00208822):0.00945315):0.02252816,
European:0.00000000);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
egendist Genetic distance matrix program
fgendist Compute genetic distances from gene frequencies

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/ftreedistpair.html0000664000175000017500000006134712171064331017113 00000000000000 EMBOSS: ftreedistpair
ftreedistpair

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Calculate distance between two sets of trees

Description

Computes the Branch Score distance between trees, which allows for differences in tree topology and which also makes use of branch lengths. Also computes the Robinson-Foulds symmetric difference distance between trees, which allows for differences in tree topology but does not use branch lengths.

Algorithm

This program computes distances between trees. Two distances are computed, the Branch Score Distance of Kuhner and Felsenstein (1994), and the more widely known Symmetric Difference of Robinson and Foulds (1981). The Branch Score Distance uses branch lengths, and can only be calculated when the trees have lengths on all branches. The Symmetric Difference does not use branch length information, only the tree topologies. It must also be borne in mind that neither distance has any immediate statistical interpretation -- we cannot say whether a larger distance is significantly larger than a smaller one.

These distances are computed by considering all possible branches that could exist on the the two trees. Each branch divides the set of species into two groups -- the ones connected to one end of the branch and the ones connected to the other. This makes a partition of the full set of species. (in Newick notation)

  ((A,C),(D,(B,E))) 

has two internal branches. One induces the partition {A, C | B, D, E} and the other induces the partition {A, C, D | B, E}. A different tree with the same set of species,

  (((A,D),C),(B,E)) 

has internal branches that correspond to the two partitions {A, C, D | B, E} and {A, D | B, C, E}. Note that the other branches, all of which are external branches, induce partitions that separate one species from all the others. Thus there are 5 partitions like this: {C | A, B, D, E} on each of these trees. These are always present on all trees, provided that each tree has each species at the end of its own branch.

In the case of the Branch Score distance, each partition that does exist on a tree also has a branch length associated with it. Thus if the tree is

  (((A:0.1,D:0.25):0.05,C:0.01):0.2,(B:0.3,E:0.8):0.2) 

The list of partitions and their branch lengths is:

{A  |  B, C, D, E}     0.1 
{D  |  A, B, C, E}     0.25 
{A, D  |  B, C, E}     0.05 
{C  |  A, B, D, E}     0.01 
{A, D, C  |  B, E}     0.4 
{B  |  A, C, D, E}     0.3 
{E  |  A, B, C, D}     0.8 

Note that the tree is being treated as unrooted here, so that the branch lengths on either side of the rootmost node are summed up to get a branch length of 0.4.

The Branch Score Distance imagines us as having made a list of all possible partitions, the ones shown above and also all 7 other possible partitions, which correspond to branches that are not found in this tree. These are assigned branch lengths of 0. For two trees, we imagine constructing these lists, and then summing the squared differences between the branch lengths. Thus if both trees have branches {A, D | B, C, E}, the sum contains the square of the difference between the branch lengths. If one tree has the branch and the other doesn't, it contains the square of the difference between the branch length and zero (in other words, the square of that branch length). If both trees do not have a particular branch, nothing is added to the sum because the difference is then between 0 and 0.

The Branch Score Distance takes this sum of squared differences and computes its square root. Note that it has some desirable properties. When small branches differ in tree topology, it is not very big. When branches are both present but differ in length, it is affected.

The Symmetric Difference is simply a count of how many partitions there are, among the two trees, that are on one tree and not on the other. In the example above there are two partitions, {A, C | B, D, E} and {A, D | B, C, E}, each of which is present on only one of the two trees. The Symmetric Difference between the two trees is therefore 2. When the two trees are fully resolved bifurcating trees, their symmetric distance must be an even number; it can range from 0 to twice the number of internal branches, which for n species is 4n-6.

Note the relationship between the two distances. If all trees have all their branches have length 1.0, the Branch Score Distance is the square of the Symmetric Difference, as each branch that is present in one but not in the other results in 1.0 being added to the sum of squared differences.

We have assumed that nothing is lost if the trees are treated as unrooted trees. It is easy to define a counterpart to the Branch Score Distance and one to the Symmetric Difference for these rooted trees. Each branch then defines a set of species, namely the clade defined by that branch. Thus if the first of the two trees above were considered as a rooted tree it would define the three clades {A, C}, {B, D, E}, and {B, E}. The Branch Score Distance is computed from the branch lengths for all possible sets of species, with 0 put for each set that does not occur on that tree. The table above will be nearly the same, but with two entries instead of one for the sets on either side of the root, {A C D} and {B E}. The Symmetric Difference between two rooted trees is simply the count of the number of clades that are defined by one but not by the other. For the second tree the clades would be {A, D}, {B, C, E}, and {B, E}. The Symmetric Difference between thee two rooted trees would then be 4.

Although the examples we have discussed have involved fully bifurcating trees, the input trees can have multifurcations. This does not cause any complication for the Branch Score Distance. For the Symmetric Difference, it can lead to distances that are odd numbers.

However, note one strong restriction. The trees should all have the same list of species. If you use one set of species in the first two trees, and another in the second two, and choose distances for adjacent pairs, the distances will be incorrect and will depend on the order of these pairs in the input tree file, in odd ways.

Usage

Here is a sample session with ftreedistpair


% ftreedistpair -style s 
Calculate distance between two sets of trees
Phylip tree file: treedist.dat
Second phylip tree file: treedist.dat
Phylip treedist program output file [treedist.ftreedistpair]: 

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Calculate distance between two sets of trees
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-intreefile]        tree       Phylip tree file
  [-bintreefile]       tree       Second phylip tree file
  [-outfile]           outfile    [*.ftreedistpair] Phylip treedist program
                                  output file

   Additional (Optional) qualifiers:
   -dtype              menu       [b] Distance type (Values: s (Symmetric
                                  difference); b (Branch score distance))
   -pairing            menu       [l] Tree pairing method (Values: c
                                  (Distances between corresponding pairs each
                                  tree file); l (Distances between all
                                  possible pairs in each tree file))
   -style              menu       [v] Distances output option (Values: f
                                  (Full_matrix); v (Verbose, one pair per
                                  line); s (Sparse, one pair per line))
   -noroot             boolean    [N] Trees to be treated as rooted
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -progress           boolean    [N] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-intreefile]
(Parameter 1)
tree Phylip tree file Phylogenetic tree  
[-bintreefile]
(Parameter 2)
tree Second phylip tree file Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip treedist program output file Output file <*>.ftreedistpair
Additional (Optional) qualifiers
-dtype list Distance type
s (Symmetric difference)
b (Branch score distance)
b
-pairing list Tree pairing method
c (Distances between corresponding pairs each tree file)
l (Distances between all possible pairs in each tree file)
l
-style list Distances output option
f (Full_matrix)
v (Verbose, one pair per line)
s (Sparse, one pair per line)
v
-noroot boolean Trees to be treated as rooted Boolean value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-progress boolean Print indications of progress of run Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

ftreedistpair reads two input tree files. The tree files may either have the number of trees on their first line, or not. If the number of trees is given, it is actually ignored and all trees in the tree file are considered, even if there are more trees than indicated by the number. There is no maximum number of trees that can be processed but, if you feed in too many, there may be an error message about running out of memory. The problem is particularly acute if you choose the option to examine all possible pairs of trees one from each of two input tree files. Thus if there are 1,000 trees in the input tree file, keep in mind that all possible pairs means 1,000,000 pairs to be examined!

Input files for usage example

File: treedist.dat

(A:0.1,(B:0.1,(H:0.1,(D:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(D:0.1,((J:0.1,H:0.1):0.1,(((G:0.1,E:0.1):0.1,
(F:0.1,I:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1,
D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,(((J:0.1,H:0.1):0.1,
D:0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1,
D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);

Output file format

If any of the four types of analysis are selected, the user must specify how they want the results presented.

The Full matrix (choice F) is a table showing all distances. It is written onto the output file. The table is presented as groups of 10 columns. Here is the Full matrix for the 12 trees in the input tree file which is given as an example at the end of this page.

Tree distance program, version 3.6

Symmetric differences between all pairs of trees in tree file:



          1     2     3     4     5     6     7     8     9    10 
      \------------------------------------------------------------
    1 |   0     4     2    10    10    10    10    10    10    10  
    2 |   4     0     2    10     8    10     8    10     8    10  
    3 |   2     2     0    10    10    10    10    10    10    10  
    4 |  10    10    10     0     2     2     4     2     4     0  
    5 |  10     8    10     2     0     4     2     4     2     2  
    6 |  10    10    10     2     4     0     2     2     4     2  
    7 |  10     8    10     4     2     2     0     4     2     4  
    8 |  10    10    10     2     4     2     4     0     2     2  
    9 |  10     8    10     4     2     4     2     2     0     4  
   10 |  10    10    10     0     2     2     4     2     4     0  
   11 |   2     2     0    10    10    10    10    10    10    10  
   12 |  10    10    10     2     4     2     4     0     2     2  

         11    12 
      \------------
    1 |   2    10  
    2 |   2    10  
    3 |   0    10  
    4 |  10     2  
    5 |  10     4  
    6 |  10     2  
    7 |  10     4  
    8 |  10     0  
    9 |  10     2  
   10 |  10     2  
   11 |   0    10  
   12 |  10     0  

The Full matrix is only available for analyses P and L (not for A or C).

Option V (Verbose) writes one distance per line. The Verbose output is the default. Here it is for the example data set given below:

Tree distance program, version 3.6

Symmetric differences between adjacent pairs of trees:

Trees 1 and 2:    4
Trees 3 and 4:    10
Trees 5 and 6:    4
Trees 7 and 8:    4
Trees 9 and 10:    4
Trees 11 and 12:    10

Option S (Sparse or terse) is similar except that all that is given on each line are the numbers of the two trees and the distance, separated by blanks. This may be a convenient format if you want to write a program to read these numbers in, and you want to spare yourself the effort of having the program wade through the words on each line in the Verbose output. The first four lines of the Sparse output are titles that your program would want to skip past. Here is the Sparse output for the example trees.

1 2 4
3 4 10
5 6 4
7 8 4
9 10 4
11 12 10

Output files for usage example

File: treedist.ftreedistpair

1 13 0.000000e+00
1 14 2.000000e-01
1 15 1.414214e-01
1 16 3.162278e-01
1 17 3.162278e-01
1 18 3.162278e-01
1 19 3.162278e-01
1 20 3.162278e-01
1 21 3.162278e-01
1 22 3.162278e-01
1 23 1.414214e-01
1 24 3.162278e-01
2 13 2.000000e-01
2 14 0.000000e+00
2 15 1.414214e-01
2 16 3.162278e-01
2 17 2.828427e-01
2 18 3.162278e-01
2 19 2.828427e-01
2 20 3.162278e-01
2 21 2.828427e-01
2 22 3.162278e-01
2 23 1.414214e-01
2 24 3.162278e-01
3 13 1.414214e-01
3 14 1.414214e-01
3 15 0.000000e+00
3 16 3.162278e-01
3 17 3.162278e-01
3 18 3.162278e-01
3 19 3.162278e-01
3 20 3.162278e-01
3 21 3.162278e-01
3 22 3.162278e-01
3 23 0.000000e+00
3 24 3.162278e-01
4 13 3.162278e-01
4 14 3.162278e-01
4 15 3.162278e-01
4 16 0.000000e+00
4 17 1.414214e-01
4 18 1.414214e-01
4 19 2.000000e-01
4 20 1.414214e-01
4 21 2.000000e-01
4 22 0.000000e+00
4 23 3.162278e-01
4 24 1.414214e-01
5 13 3.162278e-01
5 14 2.828427e-01


  [Part of this file has been deleted for brevity]

20 10 1.414214e-01
20 11 3.162278e-01
20 12 0.000000e+00
21 1 3.162278e-01
21 2 2.828427e-01
21 3 3.162278e-01
21 4 2.000000e-01
21 5 1.414214e-01
21 6 2.000000e-01
21 7 1.414214e-01
21 8 1.414214e-01
21 9 0.000000e+00
21 10 2.000000e-01
21 11 3.162278e-01
21 12 1.414214e-01
22 1 3.162278e-01
22 2 3.162278e-01
22 3 3.162278e-01
22 4 0.000000e+00
22 5 1.414214e-01
22 6 1.414214e-01
22 7 2.000000e-01
22 8 1.414214e-01
22 9 2.000000e-01
22 10 0.000000e+00
22 11 3.162278e-01
22 12 1.414214e-01
23 1 1.414214e-01
23 2 1.414214e-01
23 3 0.000000e+00
23 4 3.162278e-01
23 5 3.162278e-01
23 6 3.162278e-01
23 7 3.162278e-01
23 8 3.162278e-01
23 9 3.162278e-01
23 10 3.162278e-01
23 11 0.000000e+00
23 12 3.162278e-01
24 1 3.162278e-01
24 2 3.162278e-01
24 3 3.162278e-01
24 4 1.414214e-01
24 5 2.000000e-01
24 6 1.414214e-01
24 7 2.000000e-01
24 8 0.000000e+00
24 9 1.414214e-01
24 10 1.414214e-01
24 11 3.162278e-01
24 12 0.000000e+00

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
econsense Majority-rule and strict consensus tree
fconsense Majority-rule and strict consensus tree
ftreedist Calculate distances between trees

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnapenny.html0000664000175000017500000011273312171064331016224 00000000000000 EMBOSS: fdnapenny
fdnapenny

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Penny algorithm for DNA

Description

Finds all most parsimonious phylogenies for nucleic acid sequences by branch-and-bound search. This may not be practical (depending on the data) for more than 10-11 species or so.

Algorithm

DNAPENNY is a program that will find all of the most parsimonious trees implied by your data when the nucleic acid sequence parsimony criterion is employed. It does so not by examining all possible trees, but by using the more sophisticated "branch and bound" algorithm, a standard computer science search strategy first applied to phylogenetic inference by Hendy and Penny (1982). (J. S. Farris [personal communication, 1975] had also suggested that this strategy, which is well-known in computer science, might be applied to phylogenies, but he did not publish this suggestion).

There is, however, a price to be paid for the certainty that one has found all members of the set of most parsimonious trees. The problem of finding these has been shown (Graham and Foulds, 1982; Day, 1983) to be NP-complete, which is equivalent to saying that there is no fast algorithm that is guaranteed to solve the problem in all cases (for a discussion of NP-completeness, see the Scientific American article by Lewis and Papadimitriou, 1978). The result is that this program, despite its algorithmic sophistication, is VERY SLOW.

The program should be slower than the other tree-building programs in the package, but useable up to about ten species. Above this it will bog down rapidly, but exactly when depends on the data and on how much computer time you have (it may be more effective in the hands of someone who can let a microcomputer grind all night than for someone who has the "benefit" of paying for time on the campus mainframe computer). IT IS VERY IMPORTANT FOR YOU TO GET A FEEL FOR HOW LONG THE PROGRAM WILL TAKE ON YOUR DATA. This can be done by running it on subsets of the species, increasing the number of species in the run until you either are able to treat the full data set or know that the program will take unacceptably long on it. (Making a plot of the logarithm of run time against species number may help to project run times).

The Algorithm

The search strategy used by DNAPENNY starts by making a tree consisting of the first two species (the first three if the tree is to be unrooted). Then it tries to add the next species in all possible places (there are three of these). For each of the resulting trees it evaluates the number of base substitutions. It adds the next species to each of these, again in all possible spaces. If this process would continue it would simply generate all possible trees, of which there are a very large number even when the number of species is moderate (34,459,425 with 10 species). Actually it does not do this, because the trees are generated in a particular order and some of them are never generated.

This is because the order in which trees are generated is not quite as implied above, but is a "depth-first search". This means that first one adds the third species in the first possible place, then the fourth species in its first possible place, then the fifth and so on until the first possible tree has been produced. For each tree the number of steps is evaluated. Then one "backtracks" by trying the alternative placements of the last species. When these are exhausted one tries the next placement of the next-to-last species. The order of placement in a depth-first search is like this for a four-species case (parentheses enclose monophyletic groups):

     Make tree of first two species:     (A,B)
          Add C in first place:     ((A,B),C)
               Add D in first place:     (((A,D),B),C)
               Add D in second place:     ((A,(B,D)),C)
               Add D in third place:     (((A,B),D),C)
               Add D in fourth place:     ((A,B),(C,D))
               Add D in fifth place:     (((A,B),C),D)
          Add C in second place:     ((A,C),B)
               Add D in first place:     (((A,D),C),B)
               Add D in second place:     ((A,(C,D)),B)
               Add D in third place:     (((A,C),D),B)
               Add D in fourth place:     ((A,C),(B,D))
               Add D in fifth place:     (((A,C),B),D)
          Add C in third place:     (A,(B,C))
               Add D in first place:     ((A,D),(B,C))
               Add D in second place:     (A,((B,D),C))
               Add D in third place:     (A,(B,(C,D)))
               Add D in fourth place:     (A,((B,C),D))
               Add D in fifth place:     ((A,(B,C)),D)

Among these fifteen trees you will find all of the four-species rooted trees, each exactly once (the parentheses each enclose a monophyletic group). As displayed above, the backtracking depth-first search algorithm is just another way of producing all possible trees one at a time. The branch and bound algorithm consists of this with one change. As each tree is constructed, including the partial trees such as (A,(B,C)), its number of steps is evaluated. In addition a prediction is made as to how many steps will be added, at a minimum, as further species are added.

This is done by counting how many sites which are invariant in the data up the most recent species added will ultimately show variation when further species are added. Thus if 20 sites vary among species A, B, and C and their root, and if tree ((A,C),B) requires 24 steps, then if there are 8 more sites which will be seen to vary when species D is added, we can immediately say that no matter how we add D, the resulting tree can have no less than 24 + 8 = 32 steps. The point of all this is that if a previously-found tree such as ((A,B),(C,D)) required only 30 steps, then we know that there is no point in even trying to add D to ((A,C),B). We have computed the bound that enables us to cut off a whole line of inquiry (in this case five trees) and avoid going down that particular branch any farther.

The branch-and-bound algorithm thus allows us to find all most parsimonious trees without generating all possible trees. How much of a saving this is depends strongly on the data. For very clean (nearly "Hennigian") data, it saves much time, but on very messy data it will still take a very long time.

The algorithm in the program differs from the one outlined here in some essential details: it investigates possibilities in the order of their apparent promise. This applies to the order of addition of species, and to the places where they are added to the tree. After the first two-species tree is constructed, the program tries adding each of the remaining species in turn, each in the best possible place it can find. Whichever of those species adds (at a minimum) the most additional steps is taken to be the one to be added next to the tree. When it is added, it is added in turn to places which cause the fewest additional steps to be added. This sounds a bit complex, but it is done with the intention of eliminating regions of the search of all possible trees as soon as possible, and lowering the bound on tree length as quickly as possible. This process of evaluating which species to add in which order goes on the first time the search makes a tree; thereafter it uses that order.

The program keeps a list of all the most parsimonious trees found so far. Whenever it finds one that has fewer losses than these, it clears out the list and restarts it with that tree. In the process the bound tightens and fewer possibilities need be investigated. At the end the list contains all the shortest trees. These are then printed out. It should be mentioned that the program CLIQUE for finding all largest cliques also works by branch-and-bound. Both problems are NP-complete but for some reason CLIQUE runs far faster. Although their worst-case behavior is bad for both programs, those worst cases occur far more frequently in parsimony problems than in compatibility problems.

Controlling Run Times

Among the quantities available to be set from the menu of DNAPENNY, two (howoften and howmany) are of particular importance. As DNAPENNY goes along it will keep count of how many trees it has examined. Suppose that howoften is 100 and howmany is 1000, the default settings. Every time 100 trees have been examined, DNAPENNY will print out a line saying how many multiples of 100 trees have now been examined, how many steps the most parsimonious tree found so far has, how many trees of with that number of steps have been found, and a very rough estimate of what fraction of all trees have been looked at so far. When the number of these multiples printed out reaches the number howmany (say 1000), the whole algorithm aborts and prints out that it has not found all most parsimonious trees, but prints out what is has got so far anyway. These trees need not be any of the most parsimonious trees: they are simply the most parsimonious ones found so far. By setting the product (howoften times howmany) large you can make the algorithm less likely to abort, but then you risk getting bogged down in a gigantic computation. You should adjust these constants so that the program cannot go beyond examining the number of trees you are reasonably willing to pay for (or wait for). In their initial setting the program will abort after looking at 100,000 trees. Obviously you may want to adjust howoften in order to get more or fewer lines of intermediate notice of how many trees have been looked at so far. Of course, in small cases you may never even reach the first multiple of howoften, and nothing will be printed out except some headings and then the final trees. The indication of the approximate percentage of trees searched so far will be helpful in judging how much farther you would have to go to get the full search. Actually, since that fraction is the fraction of the set of all possible trees searched or ruled out so far, and since the search becomes progressively more efficient, the approximate fraction printed out will usually be an underestimate of how far along the program is, sometimes a serious underestimate.

A constant at the beginning of the program that affects the result is "maxtrees", which controls the maximum number of trees that can be stored. Thus if maxtrees is 25, and 32 most parsimonious trees are found, only the first 25 of these are stored and printed out. If maxtrees is increased, the program does not run any slower but requires a little more intermediate storage space. I recommend that maxtrees be kept as large as you can, provided you are willing to look at an output with that many trees on it! Initially, maxtrees is set to 100 in the distribution copy.

Method and Options

The counting of the length of trees is done by an algorithm nearly identical to the corresponding algorithms in DNAPARS, and thus the remainder of this document will be nearly identical to the DNAPARS document.

This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are exactly analogous to those of DNAPARS:

  1. Each site evolves independently.
  2. Different lineages evolve independently.
  3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986). Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a deletion N bases long is N separate events.

Usage

Here is a sample session with fdnapenny


% fdnapenny 
Penny algorithm for DNA
Input (aligned) nucleotide sequence set(s): dnapenny.dat
Phylip dnapenny program output file [dnapenny.fdnapenny]: 

justweights: false
numwts: 0

How many
trees looked                                       Approximate
at so far      Length of        How many           percentage
(multiples     shortest tree    trees this short   searched
of  100):      found so far     found so far       so far
----------     ------------     ------------       ------------
      1             9.0                2                0.11
      2             8.0                3                6.67
      3             8.0                9               20.00
      4             8.0                9               86.67

Output written to file "dnapenny.fdnapenny"

Trees also written onto file "dnapenny.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Penny algorithm for DNA
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-outfile]           outfile    [*.fdnapenny] Phylip dnapenny program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties (no help text) properties value
   -howoften           integer    [100] How often to report, in trees (Any
                                  integer value)
   -howmany            integer    [1000] How many groups of trees (Any integer
                                  value)
   -[no]simple         boolean    [Y] Branch and bound is simple
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1.0] Threshold value (Number 1.000 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdnapenny] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -stepbox            boolean    [N] Print out steps in each site
   -ancseq             boolean    [N] Print sequences at all nodes of tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-outfile]
(Parameter 2)
outfile Phylip dnapenny program output file Output file <*>.fdnapenny
Additional (Optional) qualifiers
-weights properties (no help text) properties value Property value(s)  
-howoften integer How often to report, in trees Any integer value 100
-howmany integer How many groups of trees Any integer value 1000
-[no]simple boolean Branch and bound is simple Boolean value Yes/No Yes
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 1.000 or more 1.0
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdnapenny
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-stepbox boolean Print out steps in each site Boolean value Yes/No No
-ancseq boolean Print sequences at all nodes of tree Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnapenny reads any normal sequence USAs

Input files for usage example

File: dnapenny.dat

    8    6
Alpha1    AAGAAG
Alpha2    AAGAAG
Beta1     AAGGGG
Beta2     AAGGGG
Gamma1    AGGAAG
Gamma2    AGGAAG
Delta     GGAGGA
Epsilon   GGAAAG

Output file format

fdnapenny output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each character. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a deletion may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: dnapenny.fdnapenny


Penny algorithm for DNA, version 3.69.650
 branch-and-bound to find all most parsimonious trees


requires a total of              8.000

     9 trees in all found




  +--------------------Alpha1    
  !  
  !        +-----------Alpha2    
  !        !  
  1  +-----4        +--Epsilon   
  !  !     !  +-----6  
  !  !     !  !     +--Delta     
  !  !     +--5  
  +--2        !     +--Gamma2    
     !        +-----7  
     !              +--Gamma1    
     !  
     !              +--Beta2     
     +--------------3  
                    +--Beta1     

  remember: this is an unrooted tree!





  +--------------------Alpha1    
  !  
  !        +-----------Alpha2    
  !        !  
  1  +-----4  +--------Gamma2    
  !  !     !  !  
  !  !     +--7     +--Epsilon   
  !  !        !  +--6  
  +--2        +--5  +--Delta     
     !           !  
     !           +-----Gamma1    
     !  
     !              +--Beta2     
     +--------------3  
                    +--Beta1     



  [Part of this file has been deleted for brevity]

              +--5  +--Delta     
                 !  
                 +-----Gamma1    

  remember: this is an unrooted tree!





  +--------------------Alpha1    
  !  
  !              +-----Alpha2    
  1  +-----------2  
  !  !           !  +--Beta2     
  !  !           +--3  
  !  !              +--Beta1     
  +--4  
     !           +-----Gamma2    
     !        +--7  
     !        !  !  +--Epsilon   
     +--------5  +--6  
              !     +--Delta     
              !  
              +--------Gamma1    

  remember: this is an unrooted tree!





  +--------------------Alpha1    
  !  
  !              +-----Alpha2    
  1  +-----------2  
  !  !           !  +--Beta2     
  !  !           +--3  
  !  !              +--Beta1     
  +--4  
     !              +--Epsilon   
     !        +-----6  
     !        !     +--Delta     
     +--------5  
              !     +--Gamma2    
              +-----7  
                    +--Gamma1    

  remember: this is an unrooted tree!


File: dnapenny.treefile

(Alpha1,((Alpha2,((Epsilon,Delta),(Gamma2,Gamma1))),(Beta2,Beta1)))[0.1111];
(Alpha1,((Alpha2,(Gamma2,((Epsilon,Delta),Gamma1))),(Beta2,Beta1)))[0.1111];
(Alpha1,((Alpha2,((Gamma2,(Epsilon,Delta)),Gamma1)),(Beta2,Beta1)))[0.1111];
(Alpha1,(Alpha2,((Gamma2,((Epsilon,Delta),Gamma1)),(Beta2,Beta1))))[0.1111];
(Alpha1,(Alpha2,(((Epsilon,Delta),(Gamma2,Gamma1)),(Beta2,Beta1))))[0.1111];
(Alpha1,(Alpha2,(((Gamma2,(Epsilon,Delta)),Gamma1),(Beta2,Beta1))))[0.1111];
(Alpha1,((Alpha2,(Beta2,Beta1)),(Gamma2,((Epsilon,Delta),Gamma1))))[0.1111];
(Alpha1,((Alpha2,(Beta2,Beta1)),((Gamma2,(Epsilon,Delta)),Gamma1)))[0.1111];
(Alpha1,((Alpha2,(Beta2,Beta1)),((Epsilon,Delta),(Gamma2,Gamma1))))[0.1111];

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/ftreedist.html0000664000175000017500000006040312171064331016227 00000000000000 EMBOSS: ftreedist
ftreedist

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Calculate distances between trees

Description

Computes the Branch Score distance between trees, which allows for differences in tree topology and which also makes use of branch lengths. Also computes the Robinson-Foulds symmetric difference distance between trees, which allows for differences in tree topology but does not use branch lengths.

Algorithm

This program computes distances between trees. Two distances are computed, the Branch Score Distance of Kuhner and Felsenstein (1994), and the more widely known Symmetric Difference of Robinson and Foulds (1981). The Branch Score Distance uses branch lengths, and can only be calculated when the trees have lengths on all branches. The Symmetric Difference does not use branch length information, only the tree topologies. It must also be borne in mind that neither distance has any immediate statistical interpretation -- we cannot say whether a larger distance is significantly larger than a smaller one.

These distances are computed by considering all possible branches that could exist on the the two trees. Each branch divides the set of species into two groups -- the ones connected to one end of the branch and the ones connected to the other. This makes a partition of the full set of species. (in Newick notation)

  ((A,C),(D,(B,E))) 

has two internal branches. One induces the partition {A, C | B, D, E} and the other induces the partition {A, C, D | B, E}. A different tree with the same set of species,

  (((A,D),C),(B,E)) 

has internal branches that correspond to the two partitions {A, C, D | B, E} and {A, D | B, C, E}. Note that the other branches, all of which are external branches, induce partitions that separate one species from all the others. Thus there are 5 partitions like this: {C | A, B, D, E} on each of these trees. These are always present on all trees, provided that each tree has each species at the end of its own branch.

In the case of the Branch Score distance, each partition that does exist on a tree also has a branch length associated with it. Thus if the tree is

  (((A:0.1,D:0.25):0.05,C:0.01):0.2,(B:0.3,E:0.8):0.2) 

The list of partitions and their branch lengths is:

{A  |  B, C, D, E}     0.1 
{D  |  A, B, C, E}     0.25 
{A, D  |  B, C, E}     0.05 
{C  |  A, B, D, E}     0.01 
{A, D, C  |  B, E}     0.4 
{B  |  A, C, D, E}     0.3 
{E  |  A, B, C, D}     0.8 

Note that the tree is being treated as unrooted here, so that the branch lengths on either side of the rootmost node are summed up to get a branch length of 0.4.

The Branch Score Distance imagines us as having made a list of all possible partitions, the ones shown above and also all 7 other possible partitions, which correspond to branches that are not found in this tree. These are assigned branch lengths of 0. For two trees, we imagine constructing these lists, and then summing the squared differences between the branch lengths. Thus if both trees have branches {A, D | B, C, E}, the sum contains the square of the difference between the branch lengths. If one tree has the branch and the other doesn't, it contains the square of the difference between the branch length and zero (in other words, the square of that branch length). If both trees do not have a particular branch, nothing is added to the sum because the difference is then between 0 and 0.

The Branch Score Distance takes this sum of squared differences and computes its square root. Note that it has some desirable properties. When small branches differ in tree topology, it is not very big. When branches are both present but differ in length, it is affected.

The Symmetric Difference is simply a count of how many partitions there are, among the two trees, that are on one tree and not on the other. In the example above there are two partitions, {A, C | B, D, E} and {A, D | B, C, E}, each of which is present on only one of the two trees. The Symmetric Difference between the two trees is therefore 2. When the two trees are fully resolved bifurcating trees, their symmetric distance must be an even number; it can range from 0 to twice the number of internal branches, which for n species is 4n-6.

Note the relationship between the two distances. If all trees have all their branches have length 1.0, the Branch Score Distance is the square of the Symmetric Difference, as each branch that is present in one but not in the other results in 1.0 being added to the sum of squared differences.

We have assumed that nothing is lost if the trees are treated as unrooted trees. It is easy to define a counterpart to the Branch Score Distance and one to the Symmetric Difference for these rooted trees. Each branch then defines a set of species, namely the clade defined by that branch. Thus if the first of the two trees above were considered as a rooted tree it would define the three clades {A, C}, {B, D, E}, and {B, E}. The Branch Score Distance is computed from the branch lengths for all possible sets of species, with 0 put for each set that does not occur on that tree. The table above will be nearly the same, but with two entries instead of one for the sets on either side of the root, {A C D} and {B E}. The Symmetric Difference between two rooted trees is simply the count of the number of clades that are defined by one but not by the other. For the second tree the clades would be {A, D}, {B, C, E}, and {B, E}. The Symmetric Difference between thee two rooted trees would then be 4.

Although the examples we have discussed have involved fully bifurcating trees, the input trees can have multifurcations. This does not cause any complication for the Branch Score Distance. For the Symmetric Difference, it can lead to distances that are odd numbers.

However, note one strong restriction. The trees should all have the same list of species. If you use one set of species in the first two trees, and another in the second two, and choose distances for adjacent pairs, the distances will be incorrect and will depend on the order of these pairs in the input tree file, in odd ways.

Usage

Here is a sample session with ftreedist


% ftreedist 
Calculate distances between trees
Phylip tree file: treedist.dat
Phylip treedist program output file [treedist.ftreedist]: 


Output written to file "treedist.ftreedist"

Done.


Go to the input files for this example
Go to the output files for this example

Example 2


% ftreedist -dtype s 
Calculate distances between trees
Phylip tree file: treedist2.dat
Phylip treedist program output file [treedist2.ftreedist]: 


Output written to file "treedist2.ftreedist"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Calculate distances between trees
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-intreefile]        tree       Phylip tree file
  [-outfile]           outfile    [*.ftreedist] Phylip treedist program output
                                  file

   Additional (Optional) qualifiers:
   -dtype              menu       [b] Distance type (Values: s (Symmetric
                                  difference); b (Branch score distance))
   -pairing            menu       [a] Tree pairing method (Values: a
                                  (Distances between adjacent pairs in tree
                                  file); p (Distances between all possible
                                  pairs in tree file))
   -style              menu       [v] Distances output option (Values: f (Full
                                  matrix); v (Verbose, one pair per line); s
                                  (Sparse, one pair per line))
   -noroot             boolean    [N] Trees to be treated as rooted
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-intreefile]
(Parameter 1)
tree Phylip tree file Phylogenetic tree  
[-outfile]
(Parameter 2)
outfile Phylip treedist program output file Output file <*>.ftreedist
Additional (Optional) qualifiers
-dtype list Distance type
s (Symmetric difference)
b (Branch score distance)
b
-pairing list Tree pairing method
a (Distances between adjacent pairs in tree file)
p (Distances between all possible pairs in tree file)
a
-style list Distances output option
f (Full matrix)
v (Verbose, one pair per line)
s (Sparse, one pair per line)
v
-noroot boolean Trees to be treated as rooted Boolean value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

ftreedist reads one input tree file. If the number of trees is given, it is actually ignored and all trees in the tree file are considered, even if there are more trees than indicated by the number. There is no maximum number of trees that can be processed but, if you feed in too many, there may be an error message about running out of memory. The problem is particularly acute if you choose the option to examine all possible pairs of trees in an input tree file. Thus if there are 1,000 trees in the input tree file, keep in mind that all possible pairs means 1,000,000 pairs to be examined!

Input files for usage example

File: treedist.dat

(A:0.1,(B:0.1,(H:0.1,(D:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(D:0.1,((J:0.1,H:0.1):0.1,(((G:0.1,E:0.1):0.1,
(F:0.1,I:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((F:0.1,I:0.1):0.1,(G:0.1,(((J:0.1,H:0.1):0.1,D:0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1,
D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,(((J:0.1,H:0.1):0.1,
D:0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,(G:0.1,((F:0.1,I:0.1):0.1,((J:0.1,(H:0.1,D:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(D:0.1,(H:0.1,(J:0.1,(((G:0.1,E:0.1):0.1,(F:0.1,I:0.1):0.1):0.1,
C:0.1):0.1):0.1):0.1):0.1):0.1);
(A:0.1,(B:0.1,(E:0.1,((G:0.1,(F:0.1,I:0.1):0.1):0.1,((J:0.1,(H:0.1,
D:0.1):0.1):0.1,C:0.1):0.1):0.1):0.1):0.1);

Input files for usage example 2

File: treedist2.dat

(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));
(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));
(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));
(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));
(A,(B,(E,(G,((F,I),(((J,H),D),C))))));
(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));
(A,(B,(E,((F,I),(G,(((J,H),D),C))))));
(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));
(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));
(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));
(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));
(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));

Output file format

If any of the four types of analysis are selected, the user must specify how they want the results presented.

The Full matrix (choice F) is a table showing all distances. It is written onto the output file. The table is presented as groups of 10 columns. Here is the Full matrix for the 12 trees in the input tree file which is given as an example at the end of this page.

Tree distance program, version 3.6

Symmetric differences between all pairs of trees in tree file:



          1     2     3     4     5     6     7     8     9    10 
      \------------------------------------------------------------
    1 |   0     4     2    10    10    10    10    10    10    10  
    2 |   4     0     2    10     8    10     8    10     8    10  
    3 |   2     2     0    10    10    10    10    10    10    10  
    4 |  10    10    10     0     2     2     4     2     4     0  
    5 |  10     8    10     2     0     4     2     4     2     2  
    6 |  10    10    10     2     4     0     2     2     4     2  
    7 |  10     8    10     4     2     2     0     4     2     4  
    8 |  10    10    10     2     4     2     4     0     2     2  
    9 |  10     8    10     4     2     4     2     2     0     4  
   10 |  10    10    10     0     2     2     4     2     4     0  
   11 |   2     2     0    10    10    10    10    10    10    10  
   12 |  10    10    10     2     4     2     4     0     2     2  

         11    12 
      \------------
    1 |   2    10  
    2 |   2    10  
    3 |   0    10  
    4 |  10     2  
    5 |  10     4  
    6 |  10     2  
    7 |  10     4  
    8 |  10     0  
    9 |  10     2  
   10 |  10     2  
   11 |   0    10  
   12 |  10     0  

The Full matrix is only available for analyses P and L (not for A or C).

Option V (Verbose) writes one distance per line. The Verbose output is the default. Here it is for the example data set given below:

Tree distance program, version 3.6

Symmetric differences between adjacent pairs of trees:

Trees 1 and 2:    4
Trees 3 and 4:    10
Trees 5 and 6:    4
Trees 7 and 8:    4
Trees 9 and 10:    4
Trees 11 and 12:    10

Option S (Sparse or terse) is similar except that all that is given on each line are the numbers of the two trees and the distance, separated by blanks. This may be a convenient format if you want to write a program to read these numbers in, and you want to spare yourself the effort of having the program wade through the words on each line in the Verbose output. The first four lines of the Sparse output are titles that your program would want to skip past. Here is the Sparse output for the example trees.

1 2 4
3 4 10
5 6 4
7 8 4
9 10 4
11 12 10

Output files for usage example

File: treedist.ftreedist


Tree distance program, version 3.69.650

Branch score distances between adjacent pairs of trees:

Trees 1 and 2:    2.000000e-01
Trees 3 and 4:    3.162278e-01
Trees 5 and 6:    2.000000e-01
Trees 7 and 8:    2.000000e-01
Trees 9 and 10:    2.000000e-01
Trees 11 and 12:    3.162278e-01

Output files for usage example 2

File: treedist2.ftreedist


Tree distance program, version 3.69.650

Symmetric differences between adjacent pairs of trees:

Trees 1 and 2:    4
Trees 3 and 4:    10
Trees 5 and 6:    4
Trees 7 and 8:    4
Trees 9 and 10:    4
Trees 11 and 12:    10

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
econsense Majority-rule and strict consensus tree
fconsense Majority-rule and strict consensus tree
ftreedistpair Calculate distance between two sets of trees

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fneighbor.html0000664000175000017500000005265612171064331016214 00000000000000 EMBOSS: fneighbor
fneighbor

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Phylogenies from distance matrix by N-J or UPGMA method

Description

An implementation by Mary Kuhner and John Yamato of Saitou and Nei's "Neighbor Joining Method," and of the UPGMA (Average Linkage clustering) method. Neighbor Joining is a distance matrix method producing an unrooted tree without the assumption of a clock. UPGMA does assume a clock. The branch lengths are not optimized by the least squares criterion but the methods are very fast and thus can handle much larger data sets.

Algorithm

This program implements the Neighbor-Joining method of Saitou and Nei (1987) and the UPGMA method of clustering. The program was written by Mary Kuhner and Jon Yamato, using some code from program FITCH. An important part of the code was translated from FORTRAN code from the neighbor-joining program written by Naruya Saitou and by Li Jin, and is used with the kind permission of Drs. Saitou and Jin.

NEIGHBOR constructs a tree by successive clustering of lineages, setting branch lengths as the lineages join. The tree is not rearranged thereafter. The tree does not assume an evolutionary clock, so that it is in effect an unrooted tree. It should be somewhat similar to the tree obtained by FITCH. The program cannot evaluate a User tree, nor can it prevent branch lengths from becoming negative. However the algorithm is far faster than FITCH or KITSCH. This will make it particularly effective in their place for large studies or for bootstrap or jackknife resampling studies which require runs on multiple data sets.

The UPGMA option constructs a tree by successive (agglomerative) clustering using an average-linkage method of clustering. It has some relationship to KITSCH, in that when the tree topology turns out the same, the branch lengths with UPGMA will turn out to be the same as with the P = 0 option of KITSCH.

The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that:

  • Each distance is measured independently from the others: no item of data contributes to more than one distance.
  • The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation.

These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious.

THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b).

For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better.

Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST.

For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true).

FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time.

Usage

Here is a sample session with fneighbor


% fneighbor 
Phylogenies from distance matrix by N-J or UPGMA method
Phylip distance matrix file: neighbor.dat
Phylip neighbor program output file [neighbor.fneighbor]: 



Cycle   4: species 1 (   0.91769) joins species 2 (   0.76891)
Cycle   3: node 1 (   0.42027) joins species 3 (   0.35793)
Cycle   2: species 6 (   0.15168) joins species 7 (   0.11752)
Cycle   1: node 1 (   0.04648) joins species 4 (   0.28469)
last cycle:
 node 1  (   0.02696) joins species 5  (   0.15393) joins node 6  (   0.03982)

Output written on file "neighbor.fneighbor"

Tree written on file "neighbor.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Phylogenies from distance matrix by N-J or UPGMA method
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-datafile]          distances  Phylip distance matrix file
  [-outfile]           outfile    [*.fneighbor] Phylip neighbor program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -matrixtype         menu       [s] Type of data matrix (Values: s (Square);
                                  u (Upper triangular); l (Lower triangular))
   -treetype           menu       [n] Neighbor-joining or UPGMA tree (Values:
                                  n (Neighbor-joining); u (UPGMA))
*  -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -jumble             toggle     [N] Randomise input order of species
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -replicates         boolean    [N] Subreplicates
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fneighbor] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-datafile]
(Parameter 1)
distances Phylip distance matrix file Distance matrix  
[-outfile]
(Parameter 2)
outfile Phylip neighbor program output file Output file <*>.fneighbor
Additional (Optional) qualifiers
-matrixtype list Type of data matrix
s (Square)
u (Upper triangular)
l (Lower triangular)
s
-treetype list Neighbor-joining or UPGMA tree
n (Neighbor-joining)
u (UPGMA)
n
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-jumble toggle Randomise input order of species Toggle value Yes/No No
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-replicates boolean Subreplicates Boolean value Yes/No No
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fneighbor
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fneighbor reads any normal sequence USAs.

Input files for usage example

File: neighbor.dat

    7
Bovine      0.0000  1.6866  1.7198  1.6606  1.5243  1.6043  1.5905
Mouse       1.6866  0.0000  1.5232  1.4841  1.4465  1.4389  1.4629
Gibbon      1.7198  1.5232  0.0000  0.7115  0.5958  0.6179  0.5583
Orang       1.6606  1.4841  0.7115  0.0000  0.4631  0.5061  0.4710
Gorilla     1.5243  1.4465  0.5958  0.4631  0.0000  0.3484  0.3083
Chimp       1.6043  1.4389  0.6179  0.5061  0.3484  0.0000  0.2692
Human       1.5905  1.4629  0.5583  0.4710  0.3083  0.2692  0.0000

Output file format

fneighbor output consists of an tree (rooted if UPGMA, unrooted if Neighbor-Joining) and the lengths of the interior segments. The Average Percent Standard Deviation is not computed or printed out. If the tree found by Neighbor is fed into FITCH as a User Tree, it will compute this quantity if one also selects the N option of FITCH to ensure that none of the branch lengths is re-estimated.

As NEIGHBOR runs it prints out an account of the successive clustering levels, if you allow it to. This is mostly for reassurance and can be suppressed using menu option 2. In this printout of cluster levels the word "OTU" refers to a tip species, and the word "NODE" to an interior node of the resulting tree.

Output files for usage example

File: neighbor.fneighbor


Neighbor-Joining/UPGMA method version 3.69.650


   7 Populations

 Neighbor-joining method

 Negative branch lengths allowed


  +---------------------------------------------Mouse     
  ! 
  !                        +---------------------Gibbon    
  1------------------------2 
  !                        !  +----------------Orang     
  !                        +--4 
  !                           ! +--------Gorilla   
  !                           +-5 
  !                             ! +--------Chimp     
  !                             +-3 
  !                               +------Human     
  ! 
  +------------------------------------------------------Bovine    


remember: this is an unrooted tree!

Between        And            Length
-------        ---            ------
   1          Mouse           0.76891
   1             2            0.42027
   2          Gibbon          0.35793
   2             4            0.04648
   4          Orang           0.28469
   4             5            0.02696
   5          Gorilla         0.15393
   5             3            0.03982
   3          Chimp           0.15168
   3          Human           0.11752
   1          Bovine          0.91769


File: neighbor.treefile

(Mouse:0.76891,(Gibbon:0.35793,(Orang:0.28469,(Gorilla:0.15393,
(Chimp:0.15168,Human:0.11752):0.03982):0.02696):0.04648):0.42027,Bovine:0.91769);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
efitch Fitch-Margoliash and least-squares distance methods
ekitsch Fitch-Margoliash method with contemporary tips
eneighbor Phylogenies from distance matrix by N-J or UPGMA method
ffitch Fitch-Margoliash and least-squares distance methods
fkitsch Fitch-Margoliash method with contemporary tips

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fprotdist.html0000664000175000017500000011063412171064331016256 00000000000000 EMBOSS: fprotdist
fprotdist

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Protein distance algorithm

Description

Computes a distance measure for protein sequences, using maximum likelihood estimates based on the Dayhoff PAM matrix, the JTT matrix model, the PBM model, Kimura's 1983 approximation to these, or a model based on the genetic code plus a constraint on changing to a different category of amino acid. The distances can also be corrected for gamma-distributed and gamma-plus-invariant-sites-distributed rates of change in different sites. Rates of evolution can vary among sites in a prespecified way, and also according to a Hidden Markov model. The program can also make a table of percentage similarity among sequences. The distances can be used in the distance matrix programs.

Algorithm

This program uses protein sequences to compute a distance matrix, under four different models of amino acid replacement. It can also compute a table of similarity between the amino acid sequences. The distance for each pair of species estimates the total branch length between the two species, and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR. This is an alternative to use of the sequence data itself in the parsimony program PROTPARS.

The program reads in protein sequences and writes an output file containing the distance matrix or similarity table. The five models of amino acid substitution are one which is based on the Jones, Taylor and Thornton (1992) model of amino acid change, the PMB model (Veerassamy, Smith and Tillier, 2004) which is derived from the Blocks database of conserved protein motifs, one based on the PAM matrixes of Margaret Dayhoff, one due to Kimura (1983) which approximates it based simply on the fraction of similar amino acids, and one based on a model in which the amino acids are divided up into groups, with change occurring based on the genetic code but with greater difficulty of changing between groups. The program correctly takes into account a variety of sequence ambiguities.

The five methods are:

(1) The Dayhoff PAM matrix. This uses Dayhoff's PAM 001 matrix from Dayhoff (1979), page 348. The PAM model is an empirical one that scales probabilities of change from one amino acid to another in terms of a unit which is an expected 1% change between two amino acid sequences. The PAM 001 matrix is used to make a transition probability matrix which allows prediction of the probability of changing from any one amino acid to any other, and also predicts equilibrium amino acid composition. The program assumes that these probabilities are correct and bases its computations of distance on them. The distance that is computed is scaled in units of expected fraction of amino acids changed. This is a unit such that 1.0 is 100 PAM's.

(2) The Jones-Taylor-Thornton model. This is similar to the Dayhoff PAM model, except that it is based on a recounting of the number of observed changes in amino acids by Jones, Taylor, and Thornton (1992). They used a much larger sample of protein sequences than did Dayhoff. The distance is scaled in units of the expected fraction of amino acids changed (100 PAM's). Because its sample is so much larger this model is to be preferred over the original Dayhoff PAM model. It is the default model in this program.

(3) The PMB (Probability Matrix from Blocks) model. This is derived using the Blocks database of conserved protein motifs. It will be described in a paper by Veerassamy, Smith and Tillier (2004). Elisabeth Tillier kindly made the matrices available for this model.

(4) Kimura's distance. This is a rough-and-ready distance formula for approximating PAM distance by simply measuring the fraction of amino acids, p, that differs between two sequences and computing the distance as (Kimura, 1983) D = - loge ( 1 - p - 0.2 p2 ). This is very quick to do but has some obvious limitations. It does not take into account which amino acids differ or to what amino acids they change, so some information is lost. The units of the distance measure are fraction of amino acids differing, as also in the case of the PAM distance. If the fraction of amino acids differing gets larger than 0.8541 the distance becomes infinite.

(5) The Categories distance. This is my own concoction. I imagined a nucleotide sequence changing according to Kimura's 2-parameter model, with the exception that some changes of amino acids are less likely than others. The amino acids are grouped into a series of categories. Any base change that does not change which category the amino acid is in is allowed, but if an amino acid changes category this is allowed only a certain fraction of the time. The fraction is called the "ease" and there is a parameter for it, which is 1.0 when all changes are allowed and near 0.0 when changes between categories are nearly impossible.

In this option I have allowed the user to select the Transition/Transversion ratio, which of several genetic codes to use, and which categorization of amino acids to use. There are three of them, a somewhat random sample:

  1. The George-Hunt-Barker (1988) classification of amino acids,
  2. A classification provided by my colleague Ben Hall when I asked him for one,
  3. One I found in an old "baby biochemistry" book (Conn and Stumpf, 1963), which contains most of the biochemistry I was ever taught, and all that I ever learned.

Interestingly enough, all of them are consisten with the same linear ordering of amino acids, which they divide up in different ways. For the Categories model I have set as default the George/Hunt/Barker classification with the "ease" parameter set to 0.457 which is approximately the value implied by the empirical rates in the Dayhoff PAM matrix.

The method uses, as I have noted, Kimura's (1980) 2-parameter model of DNA change. The Kimura "2-parameter" model allows for a difference between transition and transversion rates. Its transition probability matrix for a short interval of time is:


              To:     A        G        C        T
                   ---------------------------------
               A  | 1-a-2b     a         b       b
       From:   G  |   a      1-a-2b      b       b
               C  |   b        b       1-a-2b    a
               T  |   b        b         a     1-a-2b

where a is u dt, the product of the rate of transitions per unit time and dt is the length dt of the time interval, and b is v dt, the product of half the rate of transversions (i.e., the rate of a specific transversion) and the length dt of the time interval.

Each distance that is calculated is an estimate, from that particular pair of species, of the divergence time between those two species. The Kimura distance is straightforward to compute. The other two are considerably slower, and they look at all positions, and find that distance which makes the likelihood highest. This likelihood is in effect the length of the internal branch in a two-species tree that connects these two species. Its likelihood is just the product, under the model, of the probabilities of each position having the (one or) two amino acids that are actually found. This is fairly slow to compute.

The computation proceeds from an eigenanalysis (spectral decomposition) of the transition probability matrix. In the case of the PAM 001 matrix the eigenvalues and eigenvectors are precomputed and are hard-coded into the program in over 400 statements. In the case of the Categories model the program computes the eigenvalues and eigenvectors itself, which will add a delay. But the delay is independent of the number of species as the calculation is done only once, at the outset.

The actual algorithm for estimating the distance is in both cases a bisection algorithm which tries to find the point at which the derivative os the likelihood is zero. Some of the kinds of ambiguous amino acids like "glx" are correctly taken into account. However, gaps are treated as if they are unkown nucleotides, which means those positions get dropped from that particular comparison. However, they are not dropped from the whole analysis. You need not eliminate regions containing gaps, as long as you are reasonably sure of the alignment there.

Note that there is an assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias the distances by making them too large, and that in turn would cause the distances to misinterpret the meaning of those positions that had changed.

The program can now correct distances for unequal rates of change at different amino acid positions. This correction, which was introduced for DNA sequences by Jin and Nei (1990), assumes that the distribution of rates of change among amino acid positions follows a Gamma distribution. The user is asked for the value of a parameter that determines the amount of variation of rates among amino acid positions. Instead of the more widely-known coefficient alpha, PROTDIST uses the coefficient of variation (ratio of the standard deviation to the mean) of rates among amino acid positions. . So if there is 20% variation in rates, the CV is is 0.20. The square of the C.V. is also the reciprocal of the better-known "shape parameter", alpha, of the Gamma distribution, so in this case the shape parameter alpha = 1/(0.20*0.20) = 25. If you want to achieve a particular value of alpha, such as 10, you will want to use a CV of 1/sqrt(100) = 1/10 = 0.1.

In addition to the five distance calculations, the program can also compute a table of similarities between amino acid sequences. These values are the fractions of amino acid positions identical between the sequences. The diagonal values are 1.0000. No attempt is made to count similarity of nonidentical amino acids, so that no credit is given for having (for example) different hydrophobic amino acids at the corresponding positions in the two sequences. This option has been requested by many users, who need it for descriptive purposes. It is not intended that the table be used for inferring the tree.

Usage

Here is a sample session with fprotdist


% fprotdist 
Protein distance algorithm
Input (aligned) protein sequence set(s): protdist.dat
Phylip distance matrix output file [protdist.fprotdist]: 


Computing distances:
  Alpha        
  Beta         .
  Gamma        ..
  Delta        ...
  Epsilon      ....

Output written to file "protdist.fprotdist"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Protein distance algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-outfile]           outfile    [*.fprotdist] Phylip distance matrix output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -ncategories        integer    [1] Number of substitution rate categories
                                  (Integer from 1 to 9)
*  -rate               array      Rate for each category
*  -categories         properties File of substitution rate categories
   -weights            properties Weights file
   -method             menu       [j] Choose the method to use (Values: j
                                  (Jones-Taylor-Thornton matrix); h
                                  (Henikoff/Tiller PMB matrix); d (Dayhoff PAM
                                  matrix); k (Kimura formula); s (Similarity
                                  table); c (Categories model))
*  -gammatype          menu       [c] Rate variation among sites (Values: g
                                  (Gamma distributed rates); i
                                  (Gamma+invariant sites); c (Constant rate))
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -invarcoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -aacateg            menu       [G] Choose the category to use (Values: G
                                  (George/Hunt/Barker (Cys), (Met Val Leu
                                  Ileu), (Gly Ala Ser Thr Pro)); C (Chemical
                                  (Cys Met), (Val Leu Ileu Gly Ala Ser Thr),
                                  (Pro)); H (Hall (Cys), (Met Val Leu Ileu),
                                  (Gly Ala Ser Thr),(Pro)))
*  -whichcode          menu       [u] Which genetic code (Values: u
                                  (Universal); c (Ciliate); m (Universal
                                  mitochondrial); v (Vertebrate
                                  mitochondrial); f (Fly mitochondrial); y
                                  (Yeast mitochondrial))
*  -ease               float      [0.457] Prob change category (1.0=easy)
                                  (Number from 0.000 to 1.000)
*  -ttratio            float      [2.0] Transition/transversion ratio (Number
                                  0.000 or more)
*  -basefreq           array      [0.25 0.25 0.25 0.25] Base frequencies for A
                                  C G T/U (use blanks to separate)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-outfile]
(Parameter 2)
outfile Phylip distance matrix output file Output file <*>.fprotdist
Additional (Optional) qualifiers
-ncategories integer Number of substitution rate categories Integer from 1 to 9 1
-rate array Rate for each category List of floating point numbers  
-categories properties File of substitution rate categories Property value(s)  
-weights properties Weights file Property value(s)  
-method list Choose the method to use
j (Jones-Taylor-Thornton matrix)
h (Henikoff/Tiller PMB matrix)
d (Dayhoff PAM matrix)
k (Kimura formula)
s (Similarity table)
c (Categories model)
j
-gammatype list Rate variation among sites
g (Gamma distributed rates)
i (Gamma+invariant sites)
c (Constant rate)
c
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-invarcoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-aacateg list Choose the category to use
G (George/Hunt/Barker (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr Pro))
C (Chemical (Cys Met), (Val Leu Ileu Gly Ala Ser Thr), (Pro))
H (Hall (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr),(Pro))
G
-whichcode list Which genetic code
u (Universal)
c (Ciliate)
m (Universal mitochondrial)
v (Vertebrate mitochondrial)
f (Fly mitochondrial)
y (Yeast mitochondrial)
u
-ease float Prob change category (1.0=easy) Number from 0.000 to 1.000 0.457
-ttratio float Transition/transversion ratio Number 0.000 or more 2.0
-basefreq array Base frequencies for A C G T/U (use blanks to separate) List of floating point numbers 0.25 0.25 0.25 0.25
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fprotdist reads any normal sequence USAs.

Input files for usage example

File: protdist.dat

   5   13
Alpha     AACGTGGCCACAT
Beta      AAGGTCGCCACAC
Gamma     CAGTTCGCCACAA
Delta     GAGATTTCCGCCT
Epsilon   GAGATCTCCGCCC

Output file format

fprotdist output contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. The distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs.

If the similarity table is selected, the table that is produced is not in a format that can be used as input to the distance matrix programs. it has a heading, and the species names are also put at the tops of the columns of the table (or rather, the first 8 characters of each species name is there, the other two characters omitted to save space). There is not an option to put the table into a format that can be read by the distance matrix programs, nor is there one to make it into a table of fractions of difference by subtracting the similarity values from 1. This is done deliberately to make it more difficult for the use to use these values to construct trees. The similarity values are not corrected for multiple changes, and their use to construct trees (even after converting them to fractions of difference) would be wrong, as it would lead to severe conflict between the distant pairs of sequences and the close pairs of sequences.

If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters, and the identity of the distance measure that is being used.

In the Categories model of substitution, the distances printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change is set to 1.0. For the Dayhoff PAM and Kimura models the distance are scaled in terms of the expected numbers of amino acid substitutions per site. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes may occur in the same site and overlie or even reverse each other. The branch lengths estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the protein (or nucleotide) sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

One problem that can arise is that two or more of the species can be so dissimilar that the distance between them would have to be infinite, as the likelihood rises indefinitely as the estimated divergence time increases. For example, with the Kimura model, if the two sequences differ in 85.41% or more of their positions then the estimate of divergence time would be infinite. Since there is no way to represent an infinite distance in the output file, the program regards this as an error, issues a warning message indicating which pair of species are causing the problem, and computes a distance of -1.0.

Output files for usage example

File: protdist.fprotdist

    5
Alpha       0.000000  0.331834  0.628142  1.036660  1.365098
Beta        0.331834  0.000000  0.377406  1.102689  0.682218
Gamma       0.628142  0.377406  0.000000  0.979550  0.866781
Delta       1.036660  1.102689  0.979550  0.000000  0.227515
Epsilon     1.365098  0.682218  0.866781  0.227515  0.000000

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnainvar.html0000664000175000017500000007424312171064331016215 00000000000000 EMBOSS: fdnainvar
fdnainvar

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Nucleic acid sequence invariants method

Description

For nucleic acid sequence data on four species, computes Lake's and Cavender's phylogenetic invariants, which test alternative tree topologies. The program also tabulates the frequencies of occurrence of the different nucleotide patterns. Lake's invariants are the method which he calls "evolutionary parsimony".

Algorithm

This program reads in nucleotide sequences for four species and computes the phylogenetic invariants discovered by James Cavender (Cavender and Felsenstein, 1987) and James Lake (1987). Lake's method is also called by him "evolutionary parsimony". I prefer Cavender's more mathematically precise term "invariants", as the method bears somewhat more relationship to likelihood methods than to parsimony. The invariants are mathematical formulas (in the present case linear or quadratic) in the EXPECTED frequencies of site patterns which are zero for all trees of a given tree topology, irrespective of branch lengths. We can consider at a given site that if there are no ambiguities, we could have for four species the nucleotide patterns (considering the same site across all four species) AAAA, AAAC, AAAG, ... through TTTT, 256 patterns in all.

The invariants are formulas in the expected pattern frequencies, not the observed pattern frequencies. When they are computed using the observed pattern frequencies, we will usually find that they are not precisely zero even when the model is correct and we have the correct tree topology. Only as the number of nucleotides scored becomes infinite will the observed pattern frequencies approach their expectations; otherwise, we must do a statistical test of the invariants.

Some explanation of invariants will be found in the above papers, and also in my recent review article on statistical aspects of inferring phylogenies (Felsenstein, 1988b). Although invariants have some important advantages, their validity also depends on symmetry assumptions that may not be satisfied. In the discussion below suppose that the possible unrooted phylogenies are I: ((A,B),(C,D)), II: ((A,C),(B,D)), and III: ((A,D),(B,C)).

Lake's Invariants, Their Testing and Assumptions

Lake's invariants are fairly simple to describe: the patterns involved are only those in which there are two purines and two pyrimidines at a site. Thus a site with AACT would affect the invariants, but a site with AAGG would not. Let us use (as Lake does) the symbols 1, 2, 3, and 4, with the proviso that 1 and 2 are either both of the purines or both of the pyrimidines; 3 and 4 are the other two nucleotides. Thus 1 and 2 always differ by a transition; so do 3 and 4. Lake's invariants, expressed in terms of expected frequencies, are the three quantities:

(1)      P(1133) + P(1234) - P(1134) - P(1233), 

(2)      P(1313) + P(1324) - P(1314) - P(1323), 

(3)      P(1331) + P(1342) - P(1341) - P(1332), 

He showed that invariants (2) and (3) are zero under Topology I, (1) and (3) are zero under topology II, and (1) and (2) are zero under Topology III. If, for example, we see a site with pattern ACGC, we can start by setting 1=A. Then 2 must be G. We can then set 3=C (so that 4 is T). Thus its pattern type, making those substitutions, is 1323. P(1323) is the expected probability of the type of pattern which includes ACGC, TGAG, GTAT, etc.

Lake's invariants are easily tested with observed frequencies. For example, the first of them is a test of whether there are as many sites of types 1133 and 1234 as there are of types 1134 and 1233; this is easily tested with a chi-square test or, as in this program, with an exact binomial test. Note that with several invariants to test, we risk overestimating the significance of results if we simply accept the nominal 95% levels of significance (Li and Guoy, 1990).

Lake's invariants assume that each site is evolving independently, and that starting from any base a transversion is equally likely to end up at each of the two possible bases (thus, an A undergoing a transversion is equally likely to end up as a C or a T, and similarly for the other four bases from which one could start. Interestingly, Lake's results do not assume that rates of evolution are the same at all sites. The result that the total of 1133 and 1234 is expected to be the same as the total of 1134 and 1233 is unaffected by the fact that we may have aggregated the counts over classes of sites evolving at different rates.

Cavender's Invariants, Their Testing and Assumptions

Cavender's invariants (Cavender and Felsenstein, 1987) are for the case of a character with two states. In the nucleic acid case we can classify nucleotides into two states, R and Y (Purine and Pyrimidine) and then use the two-state results. Cavender starts, as before, with the pattern frequencies. Coding purines as R and pyrimidines as Y, the patterns types are RRRR, RRRY, and so on until YYYY, a total of 16 types. Cavender found quadratic functions of the expected frequencies of these 16 types that were expected to be zero under a given phylogeny, irrespective of branch lengths. Two invariants (called K and L) were found for each tree topology. The L invariants are particularly easy to understand. If we have the tree topology ((A,B),(C,D)), then in the case of two symmetric states, the event that A and B have the same state should be independent of whether C and D have the same state, as the events determining these happen in different parts of the tree. We can set up a contingency table:

                                 C = D         C =/= D
                           ------------------------------
                          |
                   A = B  |   YYYY, YYRR,     YYYR, YYRY,
                          |   RRRR, RRYY      RRYR, RRRY
                          |
                 A =/= B  |   YRYY, YRRR,     YRYR, YRRY,
                          |   RYYY, RYRR      RYYR, RYRY

and we expect that the events C = D and A = B will be independent. Cavender's L invariant for this tree topology is simply the negative of the crossproduct difference,

      P(A=/=B and C=D) P(A=B and C=/=D) - P(A=B and C=D) P(A=/=B and C=/=D). 

One of these L invariants is defined for each of the three tree topologies. They can obviously be tested simply by doing a chi-square test on the contingency table. The one corresponding to the correct topology should be statistically indistinguishable from zero. Again, there is a possible multiple tests problem if all three are tested at a nominal value of 95%.

The K invariants are differences between the L invariants. When one of the tables is expected to have crossproduct difference zero, the other two are expected to be nonzero, and also to be equal. So the difference of their crossproduct differences can be taken; this is the K invariant. It is not so easily tested.

The assumptions of Cavender's invariants are different from those of Lake's. One obviously need not assume anything about the frequencies of, or transitions among, the two different purines or the two different pyrimidines. However one does need to assume independent events at each site, and one needs to assume that the Y and R states are symmetric, that the probability per unit time that a Y changes into an R is the same as the probability that an R changes into a Y, so that we expect equal frequencies of the two states. There is also an assumption that all sites are changing between these two states at the same expected rate. This assumption is not needed for Lake's invariants, since expectations of sums are equal to sums of expectations, but for Cavender's it is, since products of expectations are not equal to expectations of products.

It is helpful to have both sorts of invariants available; with further work we may appreciate what other invaraints there are for various models of nucleic acid change.

Usage

Here is a sample session with fdnainvar


% fdnainvar -printdata 
Nucleic acid sequence invariants method
Input (aligned) nucleotide sequence set(s): dnainvar.dat
Phylip weights file (optional): 
Phylip dnainvar program output file [dnainvar.fdnainvar]: 


Output written to output file "dnainvar.fdnainvar"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Nucleic acid sequence invariants method
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
   -weights            properties Phylip weights file (optional)
  [-outfile]           outfile    [*.fdnainvar] Phylip dnainvar program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -printdata          boolean    [N] Print data at start of run
*  -[no]dotdiff        boolean    [Y] Use dot-differencing to display results
   -[no]printpattern   boolean    [Y] Print counts of patterns
   -[no]printinvariant boolean    [Y] Print invariants
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
-weights properties Phylip weights file (optional) Property value(s)  
[-outfile]
(Parameter 2)
outfile Phylip dnainvar program output file Output file <*>.fdnainvar
Additional (Optional) qualifiers
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]dotdiff boolean Use dot-differencing to display results Boolean value Yes/No Yes
-[no]printpattern boolean Print counts of patterns Boolean value Yes/No Yes
-[no]printinvariant boolean Print invariants Boolean value Yes/No Yes
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnainvar reads any normal sequence USAs.

Input files for usage example

File: dnainvar.dat

   4   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT

Output file format

fdnainvar output consists first (if option 1 is selected) of a reprinting of the input data, then (if option 2 is on) tables of observed pattern frequencies and pattern type frequencies. A table will be printed out, in alphabetic order AAAA through TTTT of all the patterns that appear among the sites and the number of times each appears. This table will be invaluable for computation of any other invariants. There follows another table, of pattern types, using the 1234 notation, in numerical order 1111 through 1234, of the number of times each type of pattern appears. In this computation all sites at which there are any ambiguities or deletions are omitted. Cavender's invariants could actually be computed from sites that have only Y or R ambiguities; this will be done in the next release of this program.

If option 3 is on the invariants are then printed out, together with their statistical tests. For Lake's invariants the two sums which are expected to be equal are printed out, and then the result of an one-tailed exact binomial test which tests whether the difference is expected to be this positive or more. The P level is given (but remember the multiple-tests problem!).

For Cavender's L invariants the contingency tables are given. Each is tested with a one-tailed chi-square test. It is possible that the expected numbers in some categories could be too small for valid use of this test; the program does not check for this. It is also possible that the chi-square could be significant but in the wrong direction; this is not tested in the current version of the program. To check it beware of a chi-square greater than 3.841 but with a positive invariant. The invariants themselves are computed, as the difference of cross-products. Their absolute magnitudes are not important, but which one is closest to zero may be indicative. Significantly nonzero invariants should be negative if the model is valid. The K invariants, which are simply differences among the L invariants, are also printed out without any test on them being conducted. Note that it is possible to use the bootstrap utility SEQBOOT to create multiple data sets, and from the output from sunning all of these get the empirical variability of these quadratic invariants.

Output files for usage example

File: dnainvar.fdnainvar


Nucleic acid sequence Invariants method, version 3.69.650

 4 species,  13  sites

Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         ..G..C.... ..C
Gamma        C.TT.C.T.. C.A
Delta        GGTA.TT.GG CC.



   Pattern   Number of times

     AAAC         1
     AAAG         2
     AACC         1
     AACG         1
     CCCG         1
     CCTC         1
     CGTT         1
     GCCT         1
     GGGT         1
     GGTA         1
     TCAT         1
     TTTT         1


Symmetrized patterns (1, 2 = the two purines  and  3, 4 = the two pyrimidines
                  or  1, 2 = the two pyrimidines  and  3, 4 = the two purines)

     1111         1
     1112         2
     1113         3
     1121         1
     1132         2
     1133         1
     1231         1
     1322         1
     1334         1

Tree topologies (unrooted): 

    I:  ((Alpha,Beta),(Gamma,Delta))
   II:  ((Alpha,Gamma),(Beta,Delta))
  III:  ((Alpha,Delta),(Beta,Gamma))



  [Part of this file has been deleted for brevity]

different purine:pyrimidine ratios from 1:1.

  Tree I:

   Contingency Table

      2     8
      1     2

   Quadratic invariant =             4.0

   Chi-square =    0.23111 (not significant)


  Tree II:

   Contingency Table

      1     5
      1     6

   Quadratic invariant =            -1.0

   Chi-square =    0.01407 (not significant)


  Tree III:

   Contingency Table

      1     2
      6     4

   Quadratic invariant =             8.0

   Chi-square =    0.66032 (not significant)




Cavender's quadratic invariants (type K) using purines vs. pyrimidines
 (these are expected to be zero for the correct tree topology)
They will be misled if there are substantially
different evolutionary rate between sites, or
different purine:pyrimidine ratios from 1:1.
No statistical test is done on them here.

  Tree I:              -9.0
  Tree II:              4.0
  Tree III:             5.0

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fmove.html0000664000175000017500000004346612171064331015364 00000000000000 EMBOSS: fmove
fmove

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Interactive mixed method parsimony

Description

Interactive construction of phylogenies from discrete character data with two states (0 and 1). Evaluates parsimony and compatibility criteria for those phylogenies and displays reconstructed states throughout the tree. This can be used to find parsimony or compatibility estimates by hand.

Algorithm

MOVE is an interactive parsimony program, inspired by Wayne Maddison and David Maddison's marvellous program MacClade, which is written for Apple Macintosh computers. MOVE reads in a data set which is prepared in almost the same format as one for the mixed method parsimony program MIX. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different characters and the way their states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different characters are affected by changes in the tree topology.

This program is compatible with fewer computer systems than the other programs in PHYLIP. It can be adapted to MSDOS systems or to any system whose screen or terminals emulate DEC VT100 terminals (such as Telnet programs for logging in to remote computers over a TCP/IP network, VT100-compatible windows in the X windowing system, and any terminal compatible with ANSI standard terminals). For any other screen types, there is a generic option which does not make use of screen graphics characters to display the character states. This will be less effective, as the states will be less easy to see when displayed.

MOVE uses as its numerical criterion the Wagner and Camin-Sokal parsimony methods in mixture, where each character can have its method specified separately. The program defaults to carrying out Wagner parsimony.

The Camin-Sokal parsimony method explains the data by assuming that changes 0 --> 1 are allowed but not changes 1 --> 0. Wagner parsimony allows both kinds of changes. (This under the assumption that 0 is the ancestral state, though the program allows reassignment of the ancestral state, in which case we must reverse the state numbers 0 and 1 throughout this discussion). The criterion is to find the tree which requires the minimum number of changes. The Camin- Sokal method is due to Camin and Sokal (1965) and the Wagner method to Eck and Dayhoff (1966) and to Kluge and Farris (1969).

Here are the assumptions of these two methods:

  1. Ancestral states are known (Camin-Sokal) or unknown (Wagner).
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes 0 --> 1 are much more probable than changes 1 --> 0 (Camin-Sokal) or equally probable (Wagner).
  5. Both of these kinds of changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than 0 --> 1 changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Usage

Here is a sample session with fmove


% fmove 
Interactive mixed method parsimony
Phylip character discrete states file: move.dat
Phylip tree file (optional): 
NEXT? (R # + - S . T U W O F H J K L C ? X Q) (? for Help): Q
Do you want to write out the tree to a file? (Y or N): Y

 5 species,   6 characters

Wagner parsimony method


Computing steps needed for compatibility in characters...


(unrooted)                    8.0 Steps             4 chars compatible
                            
  ,-----------5:Epsilon   
--9  
  !  ,--------4:Delta     
  `--8  
     !  ,-----3:Gamma     
     `--7  
        !  ,--2:Beta      
        `--6  
           `--1:Alpha     


Tree written to file "move.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Interactive mixed method parsimony
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing data set
  [-intreefile]        tree       Phylip tree file (optional)

   Additional (Optional) qualifiers:
   -weights            properties Weights file
   -ancfile            properties Ancestral states file
   -factorfile         properties Factors file
   -method             menu       [Wagner] Choose the method to use (Values: w
                                  (Wagner); c (Camin-Sokal); m (Mixed))
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -threshold          float      [$(infile.discretesize)] Threshold value
                                  (Number 0.000 or more)
   -initialtree        menu       [Arbitary] Initial tree (Values: a
                                  (Arbitary); u (User); s (Specify))
   -screenwidth        integer    [80] Width of terminal screen in characters
                                  (Any integer value)
   -screenlines        integer    [24] Number of lines on screen (Any integer
                                  value)
   -outtreefile        outfile    [*.fmove] Phylip tree output file (optional)

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing data set Discrete states file  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-ancfile properties Ancestral states file Property value(s)  
-factorfile properties Factors file Property value(s)  
-method list Choose the method to use
w (Wagner)
c (Camin-Sokal)
m (Mixed)
Wagner
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-threshold float Threshold value Number 0.000 or more $(infile.discretesize)
-initialtree list Initial tree
a (Arbitary)
u (User)
s (Specify)
Arbitary
-screenwidth integer Width of terminal screen in characters Any integer value 80
-screenlines integer Number of lines on screen Any integer value 24
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fmove
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

The fmove input data file is set up almost identically to the data files for MIX.

Input files for usage example

File: move.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Output file format

fmove outputs a graph to the specified graphics device. outputs a report format file. The default format is ...

Output files for usage example

File: move.treefile

(Epsilon,(Delta,(Gamma,(Beta,Alpha))));

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdollop Dollo and polymorphism parsimony algorithm
fdolpenny Penny algorithm Dollo or polymorphism
ffactor Multistate to binary recoding program
fmix Mixed parsimony algorithm
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fkitsch.html0000664000175000017500000006503312171064331015675 00000000000000 EMBOSS: fkitsch
fkitsch

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Fitch-Margoliash method with contemporary tips

Description

Estimates phylogenies from distance matrix data under the "ultrametric" model which is the same as the additive tree model except that an evolutionary clock is assumed. The Fitch-Margoliash criterion and other least squares criteria, or the Minimum Evolution criterion are possible. This program will be useful with distances computed from molecular sequences, restriction sites or fragments distances, with distances from DNA hybridization measurements, and with genetic distances computed from gene frequencies.

Algorithm

This program carries out the Fitch-Margoliash and Least Squares methods, plus a variety of others of the same family, with the assumption that all tip species are contemporaneous, and that there is an evolutionary clock (in effect, a molecular clock). This means that branches of the tree cannot be of arbitrary length, but are constrained so that the total length from the root of the tree to any species is the same. The quantity minimized is the same weighted sum of squares described in the Distance Matrix Methods documentation file.

The programs FITCH, KITSCH, and NEIGHBOR are for dealing with data which comes in the form of a matrix of pairwise distances between all pairs of taxa, such as distances based on molecular sequence data, gene frequency genetic distances, amounts of DNA hybridization, or immunological distances. In analyzing these data, distance matrix programs implicitly assume that:

  • Each distance is measured independently from the others: no item of data contributes to more than one distance.
  • The distance between each pair of taxa is drawn from a distribution with an expectation which is the sum of values (in effect amounts of evolution) along the tree from one tip to the other. The variance of the distribution is proportional to a power p of the expectation.

These assumptions can be traced in the least squares methods of programs FITCH and KITSCH but it is not quite so easy to see them in operation in the Neighbor-Joining method of NEIGHBOR, where the independence assumptions is less obvious.

THESE TWO ASSUMPTIONS ARE DUBIOUS IN MOST CASES: independence will not be expected to be true in most kinds of data, such as genetic distances from gene frequency data. For genetic distance data in which pure genetic drift without mutation can be assumed to be the mechanism of change CONTML may be more appropriate. However, FITCH, KITSCH, and NEIGHBOR will not give positively misleading results (they will not make a statistically inconsistent estimate) provided that additivity holds, which it will if the distance is computed from the original data by a method which corrects for reversals and parallelisms in evolution. If additivity is not expected to hold, problems are more severe. A short discussion of these matters will be found in a review article of mine (1984a). For detailed, if sometimes irrelevant, controversy see the papers by Farris (1981, 1985, 1986) and myself (1986, 1988b).

For genetic distances from gene frequencies, FITCH, KITSCH, and NEIGHBOR may be appropriate if a neutral mutation model can be assumed and Nei's genetic distance is used, or if pure drift can be assumed and either Cavalli-Sforza's chord measure or Reynolds, Weir, and Cockerham's (1983) genetic distance is used. However, in the latter case (pure drift) CONTML should be better.

Restriction site and restriction fragment data can be treated by distance matrix methods if a distance such as that of Nei and Li (1979) is used. Distances of this sort can be computed in PHYLIp by the program RESTDIST.

For nucleic acid sequences, the distances computed in DNADIST allow correction for multiple hits (in different ways) and should allow one to analyse the data under the presumption of additivity. In all of these cases independence will not be expected to hold. DNA hybridization and immunological distances may be additive and independent if transformed properly and if (and only if) the standards against which each value is measured are independent. (This is rarely exactly true).

FITCH and the Neighbor-Joining option of NEIGHBOR fit a tree which has the branch lengths unconstrained. KITSCH and the UPGMA option of NEIGHBOR, by contrast, assume that an "evolutionary clock" is valid, according to which the true branch lengths from the root of the tree to each tip are the same: the expected amount of evolution in any lineage is proportional to elapsed time.

The method may be considered as providing an estimate of the phylogeny. Alternatively, it can be considered as a phenetic clustering of the tip species. This method minimizes an objective function, the sum of squares, not only setting the levels of the clusters so as to do so, but rearranging the hierarchy of clusters to try to find alternative clusterings that give a lower overall sum of squares. When the power option P is set to a value of P = 0.0, so that we are minimizing a simple sum of squares of the differences between the observed distance matrix and the expected one, the method is very close in spirit to Unweighted Pair Group Arithmetic Average Clustering (UPGMA), also called Average-Linkage Clustering. If the topology of the tree is fixed and there turn out to be no branches of negative length, its result should be the same as UPGMA in that case. But since it tries alternative topologies and (unless the N option is set) it combines nodes that otherwise could result in a reversal of levels, it is possible for it to give a different, and better, result than simple sequential clustering. Of course UPGMA itself is available as an option in program NEIGHBOR.

An important use of this method will be to do a formal statistical test of the evolutionary clock hypothesis. This can be done by comparing the sums of squares achieved by FITCH and by KITSCH, BUT SOME CAVEATS ARE NECESSARY. First, the assumption is that the observed distances are truly independent, that no original data item contributes to more than one of them (not counting the two reciprocal distances from i to j and from j to i). THIS WILL NOT HOLD IF THE DISTANCES ARE OBTAINED FROM GENE FREQUENCIES, FROM MORPHOLOGICAL CHARACTERS, OR FROM MOLECULAR SEQUENCES. It may be invalid even for immunological distances and levels of DNA hybridization, provided that the use of common standard for all members of a row or column allows an error in the measurement of the standard to affect all these distances simultaneously. It will also be invalid if the numbers have been collected in experimental groups, each measured by taking differences from a common standard which itself is measured with error. Only if the numbers in different cells are measured from independent standards can we depend on the statistical model. The details of the test and the assumptions are discussed in my review paper on distance methods (Felsenstein, 1984a). For further and sometimes irrelevant controversy on these matters see the papers by Farris (1981, 1985, 1986) and myself (Felsenstein, 1986, 1988b).

A second caveat is that the distances must be expected to rise linearly with time, not according to any other curve. Thus it may be necessary to transform the distances to achieve an expected linearity. If the distances have an upper limit beyond which they could not go, this is a signal that linearity may not hold. It is also VERY important to choose the power P at a value that results in the standard deviation of the variation of the observed from the expected distances being the P/2-th power of the expected distance.

To carry out the test, fit the same data with both FITCH and KITSCH, and record the two sums of squares. If the topology has turned out the same, we have N = n(n-1)/2 distances which have been fit with 2n-3 parameters in FITCH, and with n-1 parameters in KITSCH. Then the difference between S(K) and S(F) has d1 = n-2 degrees of freedom. It is statistically independent of the value of S(F), which has d2 = N-(2n-3) degrees of freedom. The ratio of mean squares

      [S(K)-S(F)]/d1
     ----------------
          S(F)/d2

should, under the evolutionary clock, have an F distribution with n-2 and N-(2n-3) degrees of freedom respectively. The test desired is that the F ratio is in the upper tail (say the upper 5%) of its distribution. If the S (subreplication) option is in effect, the above degrees of freedom must be modified by noting that N is not n(n-1)/2 but is the sum of the numbers of replicates of all cells in the distance matrix read in, which may be either square or triangular. A further explanation of the statistical test of the clock is given in a paper of mine (Felsenstein, 1986).

The program uses a similar tree construction method to the other programs in the package and, like them, is not guaranteed to give the best-fitting tree. The assignment of the branch lengths for a given topology is a least squares fit, subject to the constraints against negative branch lengths, and should not be able to be improved upon. KITSCH runs more quickly than FITCH.

Usage

Here is a sample session with fkitsch


% fkitsch 
Fitch-Margoliash method with contemporary tips
Phylip distance matrix file: kitsch.dat
Phylip tree file (optional): 
Phylip kitsch program output file [kitsch.fkitsch]: 

Adding species:
   1. Bovine    
   2. Mouse     
   3. Gibbon    
   4. Orang     
   5. Gorilla   
   6. Chimp     
   7. Human     

Doing global rearrangements
  !-------------!
   .............

Output written to file "kitsch.fkitsch"

Tree also written onto file "kitsch.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Fitch-Margoliash method with contemporary tips
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-datafile]          distances  File containing one or more distance
                                  matrices
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fkitsch] Phylip kitsch program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -matrixtype         menu       [s] Type of data matrix (Values: s (Square);
                                  u (Upper triangular); l (Lower triangular))
   -minev              boolean    [N] Minimum evolution
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -power              float      [2.0] Power (Any numeric value)
   -negallowed         boolean    [N] Negative branch lengths allowed
   -replicates         boolean    [N] Subreplicates
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fkitsch] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-datafile]
(Parameter 1)
distances File containing one or more distance matrices Distance matrix  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip kitsch program output file Output file <*>.fkitsch
Additional (Optional) qualifiers
-matrixtype list Type of data matrix
s (Square)
u (Upper triangular)
l (Lower triangular)
s
-minev boolean Minimum evolution Boolean value Yes/No No
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-power float Power Any numeric value 2.0
-negallowed boolean Negative branch lengths allowed Boolean value Yes/No No
-replicates boolean Subreplicates Boolean value Yes/No No
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fkitsch
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fkitsch requires a bifurcating tree, unlike FITCH, which requires an unrooted tree with a trifurcation at its base. Thus the tree shown below would be written:

     ((D,E),(C,(A,B))); 

If a tree with a trifurcation at the base is by mistake fed into the U option of KITSCH then some of its species (the entire rightmost furc, in fact) will be ignored and too small a tree read in. This should result in an error message and the program should stop. It is important to understand the difference between the User Tree formats for KITSCH and FITCH. You may want to use RETREE to convert a user tree that is suitable for FITCH into one suitable for KITSCH or vice versa.

Input files for usage example

File: kitsch.dat

    7
Bovine      0.0000  1.6866  1.7198  1.6606  1.5243  1.6043  1.5905
Mouse       1.6866  0.0000  1.5232  1.4841  1.4465  1.4389  1.4629
Gibbon      1.7198  1.5232  0.0000  0.7115  0.5958  0.6179  0.5583
Orang       1.6606  1.4841  0.7115  0.0000  0.4631  0.5061  0.4710
Gorilla     1.5243  1.4465  0.5958  0.4631  0.0000  0.3484  0.3083
Chimp       1.6043  1.4389  0.6179  0.5061  0.3484  0.0000  0.2692
Human       1.5905  1.4629  0.5583  0.4710  0.3083  0.2692  0.0000

Output file format

fkitsch output is a rooted tree, together with the sum of squares, the number of tree topologies searched, and, if the power P is at its default value of 2.0, the Average Percent Standard Deviation is also supplied. The lengths of the branches of the tree are given in a table, that also shows for each branch the time at the upper end of the branch. "Time" here really means cumulative branch length from the root, going upwards (on the printed diagram, rightwards). For each branch, the "time" given is for the node at the right (upper) end of the branch. It is important to realize that the branch lengths are not exactly proportional to the lengths drawn on the printed tree diagram! In particular, short branches are exaggerated in the length on that diagram so that they are more visible.

Output files for usage example

File: kitsch.fkitsch


   7 Populations

Fitch-Margoliash method with contemporary tips, version 3.69.650

                  __ __             2
                  \  \   (Obs - Exp)
Sum of squares =  /_ /_  ------------
                                2
                   i  j      Obs

negative branch lengths not allowed


                                           +-------Human     
                                         +-6 
                                    +----5 +-------Chimp     
                                    !    ! 
                                +---4    +---------Gorilla   
                                !   ! 
       +------------------------3   +--------------Orang     
       !                        ! 
  +----2                        +------------------Gibbon    
  !    ! 
--1    +-------------------------------------------Mouse     
  ! 
  +------------------------------------------------Bovine    


Sum of squares =      0.107

Average percent standard deviation =   5.16213

From     To            Length          Height
----     --            ------          ------

   6   Human           0.13460         0.81285
   5      6            0.02836         0.67825
   6   Chimp           0.13460         0.81285
   4      5            0.07638         0.64990
   5   Gorilla         0.16296         0.81285
   3      4            0.06639         0.57352
   4   Orang           0.23933         0.81285
   2      3            0.42923         0.50713
   3   Gibbon          0.30572         0.81285
   1      2            0.07790         0.07790
   2   Mouse           0.73495         0.81285
   1   Bovine          0.81285         0.81285

File: kitsch.treefile

((((((Human:0.13460,Chimp:0.13460):0.02836,Gorilla:0.16296):0.07638,
Orang:0.23933):0.06639,Gibbon:0.30572):0.42923,Mouse:0.73495):0.07790,
Bovine:0.81285);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
efitch Fitch-Margoliash and least-squares distance methods
ekitsch Fitch-Margoliash method with contemporary tips
eneighbor Phylogenies from distance matrix by N-J or UPGMA method
ffitch Fitch-Margoliash and least-squares distance methods
fneighbor Phylogenies from distance matrix by N-J or UPGMA method

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/frestdist.html0000664000175000017500000006372112171064331016253 00000000000000 EMBOSS: frestdist
frestdist

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Calculate distance matrix from restriction sites or fragments

Description

Distances calculated from restriction sites data or restriction fragments data. The restriction sites option is the one to use to also make distances for RAPDs or AFLPs.

Algorithm

Restdist reads the same restriction sites format as RESTML and computes a restriction sites distance. It can also compute a restriction fragments distance. The original restriction fragments and restriction sites distance methods were introduced by Nei and Li (1979). Their original method for restriction fragments is also available in this program, although its default methods are my modifications of the original Nei and Li methods.

These two distances assume that the restriction sites are accidental byproducts of random change of nucleotide sequences. For my restriction sites distance the DNA sequences are assumed to be changing according to the Kimura 2-parameter model of DNA change (Kimura, 1980). The user can set the transition/transversion rate for the model. For my restriction fragments distance there is there is an implicit assumption of a Jukes-Cantor (1969) model of change, The user can also set the parameter of a correction for unequal rates of evolution between sites in the DNA sequences, using a Gamma distribution of rates among sites. The Jukes-Cantor model is also implicit in the restriction fragments distance of Nei and Li(1979). It does not allow us to correct for a Gamma distribution of rates among sites.

Restriction Sites Distance

The restriction sites distances use data coded for the presence of absence of individual restriction sites (usually as + and - or 0 and 1). My distance is based on the proportion, out of all sites observed in one species or the other, which are present in both species. This is done to correct for the ascertainment of sites, for the fact that we are not aware of many sites because they do not appear in any species.

My distance starts by computing from the particular pair of species the fraction

                 n++
   f =  ---------------------
         n++ + 1/2 (n+- + n-+)

where n++ is the number of sites contained in both species, n+- is the number of sites contained in the first of the two species but not in the second, and n-+ is the number of sites contained in the second of the two species but not in the first. This is the fraction of sites that are present in one species which are present in both. Since the number of sites present in the two species will often differ, the denominator is the average of the number of sites found in the two species.

If each restriction site is s nucleotides long, the probability that a restriction site is present in the other species, given that it is present in a species, is

      Qs,

`where Q is the probability that a nucleotide has no net change as one goes from the one species to the other. It may have changed in between; we are interested in the probability that that nucleotide site is in the same base in both species, irrespective of what has happened in between. The distance is then computed by finding the branch length of a two-species tree (connecting these two species with a single branch) such that Q equals the s-th root of f. For this the program computes Q for various values of branch length, iterating them by a Newton-Raphson algorithm until the two quantities are equal.

The resulting distance should be numerically close to the original restriction sites distance of Nei and Li (1979) when divergence is small. Theirs computes the probability of retention of a site in a way that assumes that the site is present in the common ancestor of the two species. Ours does not make this assumption. It is inspired by theirs, but differs in this detail. Their distance also assumes a Jukes-Cantor (1969) model of base change, and does not allow for transitions being more frequent than transversions. In this sense mine generalizes theres somewhat. Their distance does include, as mine does as well, a correction for Gamma distribution of rate of change among nucleotide sites.

I have made their original distance available here

Restriction Fragments Distance

For restriction fragments data we use a different distance. If we average over all restriction fragment lengths, each at its own expected frequency, the probability that the fragment will still be in existence after a certain amount of branch length, we must take into account the probability that the two restriction sites at the ends of the fragment do not mutate, and the probability that no new restriction site occurs within the fragment in that amount of branch length. The result for a restriction site length of s is:

                Q2s
          f = --------
               2 - Qs

(The details of the derivation will be given in my forthcoming book Inferring Phylogenies (to be published by Sinauer Associates in 2001). Given the observed fraction of restriction sites retained, f, we can solve a quadratic equation from the above expression for Qs. That makes it easy to obtain a value of Q, and the branch length can then be estimated by adjusting it so the probability of a base not changing is equal to that value. Alternatively, if we use the Nei and Li (1979) restriction fragments distance, this involves solving for g in the nonlinear equation

       g  =  [ f (3 - 2g) ]1/4

and then the distance is given by

       d  =  - (2/r) loge(g)

where r is the length of the restriction site.

Comparing these two restriction fragments distances in a case where their underlying DNA model is the same (which is when the transition/transversion ratio of the modified model is set to 0.5), you will find that they are very close to each other, differing very little at small distances, with the modified distance become smaller than the Nei/Li distance at larger distances. It will therefore matter very little which one you use.

A Comment About RAPDs and AFLPs

Although these distances are designed for restriction sites and restriction fragments data, they can be applied to RAPD and AFLP data as well. RAPD (Randomly Amplified Polymorphic DNA) and AFLP (Amplified Fragment Length Polymorphism) data consist of presence or absence of individual bands on a gel. The bands are segments of DNA with PCR primers at each end. These primers are defined sequences of known length (often about 10 nucleotides each). For AFLPs the reolevant length is the primer length, plus three nucleotides. Mutation in these sequences makes them no longer be primers, just as in the case of restriction sites. Thus a pair of 10-nucleotide primers will behave much the same as a 20-nucleotide restriction site, for RAPDs (26 for AFLPs). You can use the restriction sites distance as the distance between RAPD or AFLP patterns if you set the proper value for the total length of the site to the total length of the primers (plus 6 in the case of AFLPs). Of course there are many possible sources of noise in these data, including confusing fragments of similar length for each other and having primers near each other in the genome, and these are not taken into account in the statistical model used here.

Usage

Here is a sample session with frestdist


% frestdist 
Calculate distance matrix from restriction sites or fragments
Input file: restdist.dat
Phylip restdist program output file [restdist.frestdist]: 


Restriction site or fragment distances, version 3.69.650

Distances calculated for species
    Alpha        ....
    Beta         ...
    Gamma        ..
    Delta        .
    Epsilon   

Distances written to file "restdist.frestdist"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Calculate distance matrix from restriction sites or fragments
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-data]              discretestates File containing one or more sets of
                                  restriction data
  [-outfile]           outfile    [*.frestdist] Phylip restdist program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -[no]restsites      boolean    [Y] Restriction sites (put N if you want
                                  restriction fragments)
   -neili              boolean    [N] Use original Nei/Li model (default uses
                                  modified Nei/Li model)
*  -gammatype          boolean    [N] Gamma distributed rates among sites
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
   -ttratio            float      [2.0] Transition/transversion ratio (Number
                                  0.001 or more)
   -sitelength         integer    [6] Site length (Integer 1 or more)
   -lower              boolean    [N] Lower triangular distance matrix
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-data]
(Parameter 1)
discretestates File containing one or more sets of restriction data Discrete states file  
[-outfile]
(Parameter 2)
outfile Phylip restdist program output file Output file <*>.frestdist
Additional (Optional) qualifiers
-[no]restsites boolean Restriction sites (put N if you want restriction fragments) Boolean value Yes/No Yes
-neili boolean Use original Nei/Li model (default uses modified Nei/Li model) Boolean value Yes/No No
-gammatype boolean Gamma distributed rates among sites Boolean value Yes/No No
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ttratio float Transition/transversion ratio Number 0.001 or more 2.0
-sitelength integer Site length Integer 1 or more 6
-lower boolean Lower triangular distance matrix Boolean value Yes/No No
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

frestdist input is fairly standard, with one addition. As usual the first line of the file gives the number of species and the number of sites, but there is also a third number, which is the number of different restriction enzymes that were used to detect the restriction sites. Thus a data set with 10 species and 35 different sites, representing digestion with 4 different enzymes, would have the first line of the data file look like this:

   10   35    4

The site data are in standard form. Each species starts with a species name whose maximum length is given by the constant "nmlngth" (whose value in the program as distributed is 10 characters). The name should, as usual, be padded out to that length with blanks if necessary. The sites data then follows, one character per site (any blanks will be skipped and ignored). Like the DNA and protein sequence data, the restriction sites data may be either in the "interleaved" form or the "sequential" form. Note that if you are analyzing restriction sites data with the programs DOLLOP or MIX or other discrete character programs, at the moment those programs do not use the "aligned" or "interleaved" data format. Therefore you may want to avoid that format when you have restriction sites data that you will want to feed into those programs.

The presence of a site is indicated by a "+" and the absence by a "-". I have also allowed the use of "1" and "0" as synonyms for "+" and "-", for compatibility with MIX and DOLLOP which do not allow "+" and "-". If the presence of the site is unknown (for example, if the DNA containing it has been deleted so that one does not know whether it would have contained the site) then the state "?" can be used to indicate that the state of this site is unknown.

Input files for usage example

File: restdist.dat

   5   13   2
Alpha     ++-+-++--+++-
Beta      ++++--+--+++-
Gamma     -+--+-++-+-++
Delta     ++-+----++---
Epsilon   ++++----++---

Output file format

frestdist output contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. If the L option is used, the matrix or distances is in lower triangular form, so that only the distances to the other species that precede each species are printed. Otherwise the distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs.

If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters.

The distances printed out are scaled in terms of expected numbers of substitutions per DNA site, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0. Thus when the G option is used, the rate of change at one site may be higher than at another, but their mean is expected to be 1.

Output files for usage example

File: restdist.frestdist

    5
Alpha       0.000000  0.022368  0.107681  0.082639  0.095581
Beta        0.022368  0.000000  0.107681  0.082639  0.056895
Gamma       0.107681  0.107681  0.000000  0.192466  0.207319
Delta       0.082639  0.082639  0.192466  0.000000  0.015945
Epsilon     0.095581  0.056895  0.207319  0.015945  0.000000

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnamlk.html0000664000175000017500000013264212171064331015657 00000000000000 EMBOSS: fdnamlk
fdnamlk

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Estimates nucleotide phylogeny by maximum likelihood

Description

Same as DNAML but assumes a molecular clock. The use of the two programs together permits a likelihood ratio test of the molecular clock hypothesis to be made.

Estimates phylogenies from nucleotide sequences by maximum likelihood. The model employed allows for unequal expected frequencies of the four nucleotides, for unequal rates of transitions and transversions, and for different (prespecified) rates of change in different categories of sites, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites.

Algorithm

This program implements the maximum likelihood method for DNA sequences under the constraint that the trees estimated must be consistent with a molecular clock. The molecular clock is the assumption that the tips of the tree are all equidistant, in branch length, from its root. This program is indirectly related to DNAML. Details of the algorithm are not yet published, but many aspects of it are similar to DNAML, and these are published in the paper by Felsenstein and Churchill (1996). The model of base substitution allows the expected frequencies of the four bases to be unequal, allows the expected frequencies of transitions and transversions to be unequal, and has several ways of allowing different rates of evolution at different sites.

The assumptions of the model are:

  1. Each site in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. There is a molecular clock.
  4. Each site undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  5. All relevant sites are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  6. A substitution consists of one of two sorts of events:
    1. The first kind of event consists of the replacement of the existing base by a base drawn from a pool of purines or a pool of pyrimidines (depending on whether the base being replaced was a purine or a pyrimidine). It can lead either to no change or to a transition.
    2. The second kind of event consists of the replacement of the existing base by a base drawn at random from a pool of bases at known frequencies, independently of the identity of the base which is being replaced. This could lead either to a no change, to a transition or to a transversion. The ratio of the two purines in the purine replacement pool is the same as their ratio in the overall pool, and similarly for the pyrimidines.

      The ratios of transitions to transversions can be set by the user. The substitution process can be diagrammed as follows: Suppose that you specified A, C, G, and T base frequencies of 0.24, 0.28, 0.27, and 0.21.

      • First kind of event:

        Determine whether the existing base is a purine or a pyrimidine. Draw from the proper pool:

              Purine pool:                Pyrimidine pool:
        
             |               |            |               |
             |   0.4706 A    |            |   0.5714 C    |
             |   0.5294 G    |            |   0.4286 T    |
             | (ratio is     |            | (ratio is     |
             |  0.24 : 0.27) |            |  0.28 : 0.21) |
             |_______________|            |_______________|
        
      • Second kind of event:

        Draw from the overall pool:

        
                      |                  |
                      |      0.24 A      |
                      |      0.28 C      |
                      |      0.27 G      |
                      |      0.21 T      |
                      |__________________|
        
        

Note that if the existing base is, say, an A, the first kind of event has a 0.4706 probability of "replacing" it by another A. The second kind of event has a 0.24 chance of replacing it by another A. This rather disconcerting model is used because it has nice mathematical properties that make likelihood calculations far easier. A closely similar, but not precisely identical model having different rates of transitions and transversions has been used by Hasegawa et. al. (1985b). The transition probability formulas for the current model were given (with my permission) by Kishino and Hasegawa (1989). Another explanation is available in the paper by Felsenstein and Churchill (1996).

Note the assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those sites that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different sites. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of sites all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant sites. The program computes the the likelihood by summing it over all possible assignments of rates to sites, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a site having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive sites with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all sites to rate 2.4, or that fail to have consecutive sites that have the same rate.

The Hidden Markov Model framework for rate variation among sites was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant sites.

This feature effectively removes the artificial assumption that all sites have the same rate, and also means that we need not know in advance the identities of the sites that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each site (for example, we might want first, second, and third codon positions in a protein coding sequence to be three different categories. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of sites in the different categories. For example, we might specify that first, second, and third positions evolve at relative rates of 1.0, 0.8, and 2.7.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a site is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

Usage

Here is a sample session with fdnamlk


% fdnamlk -printdata -ncategories 2 -categories "1111112222222" -rate "1.0 2.0" -gammatype h -nhmmcategories 5 -hmmrates "0.264 1.413 3.596 7.086 12.641" -hmmprobabilities "0.522 0.399 0.076 0.0036 0.000023" -lambda 1.5 -weight "0111111111110" 
Estimates nucleotide phylogeny by maximum likelihood
Input (aligned) nucleotide sequence set(s): dnaml.dat
Phylip tree file (optional): 
Phylip dnamlk program output file [dnaml.fdnamlk]: 


Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Output written to file "dnaml.fdnamlk"

Tree also written onto file "dnaml.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Estimates nucleotide phylogeny by maximum likelihood
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fdnamlk] Phylip dnamlk program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -ncategories        integer    [1] Number of substitution rate categories
                                  (Integer from 1 to 9)
*  -rate               array      Rate for each category
*  -categories         properties File of substitution rate categories
   -weights            properties Weights file
   -ttratio            float      [2.0] Transition/transversion ratio (Number
                                  0.001 or more)
   -[no]freqsfrom      toggle     [Y] Use empirical base frequencies from
                                  seqeunce input
*  -basefreq           array      [0.25 0.25 0.25 0.25] Base frequencies for A
                                  C G T/U (use blanks to separate)
   -gammatype          menu       [Constant rate] Rate variation among sites
                                  (Values: g (Gamma distributed rates); i
                                  (Gamma+invariant sites); h (User defined HMM
                                  of rates); n (Constant rate))
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ngammacat          integer    [1] Number of categories (1-9) (Integer from
                                  1 to 9)
*  -invarcoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ninvarcat          integer    [1] Number of categories (1-9) including one
                                  for invariant sites (Integer from 1 to 9)
*  -invarfrac          float      [0.0] Fraction of invariant sites (Number
                                  from 0.000 to 1.000)
*  -nhmmcategories     integer    [1] Number of HMM rate categories (Integer
                                  from 1 to 9)
*  -hmmrates           array      [1.0] HMM category rates
*  -hmmprobabilities   array      [1.0] Probability for each HMM category
*  -adjsite            boolean    [N] Rates at adjacent sites correlated
*  -lambda             float      [1.0] Mean block length of sites having the
                                  same rate (Number 1.000 or more)
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
*  -global             boolean    [N] Global rearrangements
*  -lengths            boolean    [N] Use branch lengths from user trees
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdnamlk] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -hypstate           boolean    [N] Reconstruct hypothetical sequence

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip dnamlk program output file Output file <*>.fdnamlk
Additional (Optional) qualifiers
-ncategories integer Number of substitution rate categories Integer from 1 to 9 1
-rate array Rate for each category List of floating point numbers  
-categories properties File of substitution rate categories Property value(s)  
-weights properties Weights file Property value(s)  
-ttratio float Transition/transversion ratio Number 0.001 or more 2.0
-[no]freqsfrom toggle Use empirical base frequencies from seqeunce input Toggle value Yes/No Yes
-basefreq array Base frequencies for A C G T/U (use blanks to separate) List of floating point numbers 0.25 0.25 0.25 0.25
-gammatype list Rate variation among sites
g (Gamma distributed rates)
i (Gamma+invariant sites)
h (User defined HMM of rates)
n (Constant rate)
Constant rate
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ngammacat integer Number of categories (1-9) Integer from 1 to 9 1
-invarcoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ninvarcat integer Number of categories (1-9) including one for invariant sites Integer from 1 to 9 1
-invarfrac float Fraction of invariant sites Number from 0.000 to 1.000 0.0
-nhmmcategories integer Number of HMM rate categories Integer from 1 to 9 1
-hmmrates array HMM category rates List of floating point numbers 1.0
-hmmprobabilities array Probability for each HMM category List of floating point numbers 1.0
-adjsite boolean Rates at adjacent sites correlated Boolean value Yes/No No
-lambda float Mean block length of sites having the same rate Number 1.000 or more 1.0
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-global boolean Global rearrangements Boolean value Yes/No No
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdnamlk
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-hypstate boolean Reconstruct hypothetical sequence Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnamlk reads any normal sequence USAs.

Input files for usage example

File: dnaml.dat

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC

Output file format

--> fdnamlk output starts by giving the number of species, the number of sites, and the base frequencies for A, C, G, and T that have been specified. It then prints out the transition/transversion ratio that was specified or used by default. It also uses the base frequencies to compute the actual transition/transversion ratio implied by the parameter.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of sites is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The trees found are printed as a rooted tree topology. The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen.

A table is printed showing the length of each tree segment, and the time (in units of expected nucleotide substitutions per site) of each fork in the tree, measured from the root of the tree. I have not attempted in include code for approximate confidence limits on branch points, as I have done for branch lengths in DNAML, both because of the extreme crudeness of that test, and because the variation of times for different forks would be highly correlated.

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the expected transition/transversion ratio to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

This program makes possible a (reasonably) legitimate statistical test of the molecular clock. To do such a test, run DNAML and DNAMLK on the same data. If the trees obtained are of the same topology (when considered as unrooted), it is legitimate to compare their likelihoods by the likelihood ratio test. In DNAML the likelihood has been computed by estimating 2n-3 branch lengths, if their are n tips on the tree. In DNAMLK it has been computed by estimating n-1 branching times (in effect, n-1 branch lengths). The difference in the number of parameters is (2n-3)-(n-1) = n-2. To perform the test take the difference in log likelihoods between the two runs (DNAML should be the higher of the two, barring numerical iteration difficulties) and double it. Look this up on a chi-square distribution with n-2 degrees of freedom. If the result is significant, the log likelihood has been significantly increased by allowing all 2n-3 branch lengths to be estimated instead of just n-1, and molecular clock may be rejected.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring sites (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what site categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like.

A second list will also be printed out, showing for each site which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). In that table, if a site has a base which accounts for more than 95% of the likelihood, it is printed in capital letters (A rather than a). If the best nucleotide accounts for less than 50% of the likelihood, the program prints out an ambiguity code (such as M for "A or C") for the set of nucleotides which, taken together, account for more half of the likelihood. The ambiguity codes are listed in the sequence programs documentation file. One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed nucleotides are based on only the single assignment of rates to sites which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

Output files for usage example

File: dnaml.fdnamlk


Nucleic acid sequence Maximum Likelihood method, version 3.69.650

 5 species,  13  sites

    Site categories are:

             1111112222 222


    Sites are weighted as follows:

             01111 11111 110


Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         AAGGTCGCCA AAC
Gamma        CATTTCGTCA CAA
Delta        GGTATTTCGG CCT
Epsilon      GGGATCTCGG CCC



Empirical Base Frequencies:

   A       0.23636
   C       0.29091
   G       0.25455
  T(U)     0.21818


Transition/transversion ratio =   2.000000


State in HMM    Rate of change    Probability

        1           0.264            0.522
        2           1.413            0.399
        3           3.596            0.076
        4           7.086            0.0036
        5          12.641            0.000023


Site category   Rate of change

        1           1.000
        2           2.000






                                                            +-Epsilon   
  +---------------------------------------------------------4  
  !                                                         +-Delta     
--3  
  !                                                   +-------Gamma     
  +---------------------------------------------------2  
                                                      !     +-Beta      
                                                      +-----1  
                                                            +-Alpha     


Ln Likelihood =   -57.98242

 Ancestor      Node      Node Height     Length
 --------      ----      ---- ------     ------
 root            3      
   3             4          4.01604      4.01604
   4          Epsilon       4.15060      0.13456
   4          Delta         4.15060      0.13456
   3             2          3.59089      3.59089
   2          Gamma         4.15060      0.55971
   2             1          3.99329      0.40240
   1          Beta          4.15060      0.15731
   1          Alpha         4.15060      0.15731

Combination of categories that contributes the most to the likelihood:

             1132121111 211

Most probable category at each site if > 0.95 probability ("." otherwise)

             .......... ...


File: dnaml.treefile

((Epsilon:0.13456,Delta:0.13456):4.01604,(Gamma:0.55971,
(Beta:0.15731,Alpha:0.15731):0.40240):3.59089);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fprotpars.html0000664000175000017500000013210412171064331016254 00000000000000 EMBOSS: fprotpars
fprotpars

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Protein parsimony algorithm

Description

Estimates phylogenies from protein sequences (input using the standard one-letter code for amino acids) using the parsimony method, in a variant which counts only those nucleotide changes that change the amino acid, on the assumption that silent changes are more easily accomplished.

Algorithm

This program infers an unrooted phylogeny from protein sequences, using a new method intermediate between the approaches of Eck and Dayhoff (1966) and Fitch (1971). Eck and Dayhoff (1966) allowed any amino acid to change to any other, and counted the number of such changes needed to evolve the protein sequences on each given phylogeny. This has the problem that it allows replacements which are not consistent with the genetic code, counting them equally with replacements that are consistent. Fitch, on the other hand, counted the minimum number of nucleotide substitutions that would be needed to achieve the given protein sequences. This counts silent changes equally with those that change the amino acid.

The present method insists that any changes of amino acid be consistent with the genetic code so that, for example, lysine is allowed to change to methionine but not to proline. However, changes between two amino acids via a third are allowed and counted as two changes if each of the two replacements is individually allowed. This sometimes allows changes that at first sight you would think should be outlawed. Thus we can change from phenylalanine to glutamine via leucine in two steps total. Consulting the genetic code, you will find that there is a leucine codon one step away from a phenylalanine codon, and a leucine codon one step away from glutamine. But they are not the same leucine codon. It actually takes three base substitutions to get from either of the phenylalanine codons TTT and TTC to either of the glutamine codons CAA or CAG. Why then does this program count only two? The answer is that recent DNA sequence comparisons seem to show that synonymous changes are considerably faster and easier than ones that change the amino acid. We are assuming that, in effect, synonymous changes occur so much more readily that they need not be counted. Thus, in the chain of changes TTT (Phe) --> CTT (Leu) --> CTA (Leu) --> CAA (Glu), the middle one is not counted because it does not change the amino acid (leucine).

To maintain consistency with the genetic code, it is necessary for the program internally to treat serine as two separate states (ser1 and ser2) since the two groups of serine codons are not adjacent in the code. Changes to the state "deletion" are counted as three steps to prevent the algorithm from assuming unnecessary deletions. The state "unknown" is simply taken to mean that the amino acid, which has not been determined, will in each part of a tree that is evaluated be assumed be whichever one causes the fewest steps.

The assumptions of this method (which has not been described in the literature), are thus something like this:

Change in different sites is independent. Change in different lineages is independent. The probability of a base substitution that changes the amino acid sequence is small over the lengths of time involved in a branch of the phylogeny. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another. The probability of a base change that is synonymous is much higher than the probability of a change that is not synonymous. That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the works by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986).

The input for the program is fairly standard. The first line contains the number of species and the number of amino acid positions (counting any stop codons that you want to include).

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

The protein sequences are given by the one-letter code used by described in the Molecular Sequence Programs documentation file. Note that if two polypeptide chains are being used that are of different length owing to one terminating before the other, they should be coded as (say)

             HIINMA*????
             HIPNMGVWABT

since after the stop codon we do not definitely know that there has been a deletion, and do not know what amino acid would have been there. If DNA studies tell us that there is DNA sequence in that region, then we could use "X" rather than "?". Note that "X" means an unknown amino acid, but definitely an amino acid, while "?" could mean either that or a deletion. The distinction is often significant in regions where there are deletions: one may want to encode a six-base deletion as "-?????" since that way the program will only count one deletion, not six deletion events, when the deletion arises. However, if there are overlapping deletions it may not be so easy to know what coding is correct.

One will usually want to use "?" after a stop codon, if one does not know what amino acid is there. If the DNA sequence has been observed there, one probably ought to resist putting in the amino acids that this DNA would code for, and one should use "X" instead, because under the assumptions implicit in this parsimony method, changes to any noncoding sequence are much easier than changes in a coding region that change the amino acid, so that they shouldn't be counted anyway!

The form of this information is the standard one described in the main documentation file. For the U option the tree provided must be a rooted bifurcating tree, with the root placed anywhere you want, since that root placement does not affect anything.

Usage

Here is a sample session with fprotpars


% fprotpars 
Protein parsimony algorithm
Input (aligned) protein sequence set(s): protpars.dat
Phylip tree file (optional): 
Phylip protpars program output file [protpars.fprotpars]: 


Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements
  !---------!
   .........
   .........

Output written to file "protpars.fprotpars"

Trees also written onto file "protpars.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Example 2


% fprotpars -njumble 3 -seed 3 -printdata -ancseq -whichcode m -stepbox -outgrno 2  -dothreshold -threshold 3 
Protein parsimony algorithm
Input (aligned) protein sequence set(s): protpars.dat
Phylip tree file (optional): 
Phylip protpars program output file [protpars.fprotpars]: 


Adding species:
   1. Delta     
   2. Epsilon   
   3. Alpha     
   4. Beta      
   5. Gamma     

Doing global rearrangements
  !---------!
   .........
   .........


Adding species:
   1. Beta      
   2. Epsilon   
   3. Delta     
   4. Alpha     
   5. Gamma     

Doing global rearrangements
  !---------!
   .........


Adding species:
   1. Epsilon   
   2. Alpha     
   3. Gamma     
   4. Delta     
   5. Beta      

Doing global rearrangements
  !---------!
   .........

Output written to file "protpars.fprotpars"

Trees also written onto file "protpars.treefile"

Done.


Go to the output files for this example

Example 3


% fprotpars -njumble 3 -seed 3 
Protein parsimony algorithm
Input (aligned) protein sequence set(s): protpars2.dat
Phylip tree file (optional): 
Phylip protpars program output file [protpars2.fprotpars]: 

Data set # 1:


Adding species:
   1. Delta     
   2. Epsilon   
   3. Alpha     
   4. Beta      
   5. Gamma     

Doing global rearrangements
  !---------!
   .........
   .........


Adding species:
   1. Beta      
   2. Epsilon   
   3. Delta     
   4. Alpha     
   5. Gamma     

Doing global rearrangements
  !---------!
   .........


Adding species:
   1. Epsilon   
   2. Alpha     
   3. Gamma     
   4. Delta     
   5. Beta      

Doing global rearrangements
  !---------!
   .........

Output written to file "protpars2.fprotpars"

Trees also written onto file "protpars2.treefile"
Data set # 2:


Adding species:
   1. Gamma     
   2. Delta     
   3. Epsilon   
   4. Beta      
   5. Alpha     

Doing global rearrangements
  !---------!
   .........
   .........


Adding species:
   1. Alpha     
   2. Delta     
   3. Epsilon   
   4. Gamma     
   5. Beta      

Doing global rearrangements
  !---------!
   .........


Adding species:
   1. Epsilon   
   2. Beta      
   3. Gamma     
   4. Alpha     
   5. Delta     

Doing global rearrangements
  !---------!
   .........

Output written to file "protpars2.fprotpars"

Trees also written onto file "protpars2.treefile"
Data set # 3:


Adding species:
   1. Delta     
   2. Beta      
   3. Gamma     
   4. Alpha     
   5. Epsilon   

Doing global rearrangements
  !---------!
   .........
   .........


Adding species:
   1. Gamma     
   2. Delta     
   3. Beta      
   4. Epsilon   
   5. Alpha     

Doing global rearrangements
  !---------!
   .........


Adding species:
   1. Epsilon   
   2. Alpha     
   3. Gamma     
   4. Delta     
   5. Beta      

Doing global rearrangements
  !---------!
   .........

Output written to file "protpars2.fprotpars"

Trees also written onto file "protpars2.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Example 4


% fprotpars -option 
Protein parsimony algorithm
Input (aligned) protein sequence set(s): protpars.dat
Phylip tree file (optional): 
Phylip weights file (optional): protparswts.dat
Number of times to randomise [0]: 
Species number to use as outgroup [0]: 
Use threshold parsimony [N]: 
Genetic codes
         U : Universal
         M : Mitochondrial
         V : Vertebrate mitochondrial
         F : Fly mitochondrial
         Y : Yeast mitochondrial
Use which genetic code [Universal]: 
Phylip protpars program output file [protpars.fprotpars]: 
Write out trees to tree file [Y]: 
Phylip tree output file (optional) [protpars.treefile]: 
Print data at start of run [N]: 
Print indications of progress of run [Y]: 
Print out tree [Y]: 
Print steps at each site [N]: 
Print sequences at all nodes of tree [N]: 


Weights set # 1:


Adding species:
   1. Delta     
   2. Alpha     
   3. Gamma     
   4. Epsilon   
   5. Beta      

Doing global rearrangements
  !---------!
   .........
   .........

Output written to file "protpars.fprotpars"

Trees also written onto file "protpars.treefile"

Weights set # 2:


Adding species:
   1. Epsilon   
   2. Alpha     
   3. Delta     
   4. Gamma     
   5. Beta      

Doing global rearrangements
  !---------!
   .........
   .........

Output written to file "protpars.fprotpars"

Trees also written onto file "protpars.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Protein parsimony algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fprotpars] Phylip protpars program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Phylip weights file (optional)
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1] Threshold value (Number 1.000 or more)
   -whichcode          menu       [Universal] Use which genetic code (Values:
                                  U (Universal); M (Mitochondrial); V
                                  (Vertebrate mitochondrial); F (Fly
                                  mitochondrial); Y (Yeast mitochondrial))
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fprotpars] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -stepbox            boolean    [N] Print steps at each site
   -ancseq             boolean    [N] Print sequences at all nodes of tree
*  -[no]dotdiff        boolean    [Y] Use dot differencing to display results

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip protpars program output file Output file <*>.fprotpars
Additional (Optional) qualifiers
-weights properties Phylip weights file (optional) Property value(s)  
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 1.000 or more 1
-whichcode list Use which genetic code
U (Universal)
M (Mitochondrial)
V (Vertebrate mitochondrial)
F (Fly mitochondrial)
Y (Yeast mitochondrial)
Universal
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fprotpars
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-stepbox boolean Print steps at each site Boolean value Yes/No No
-ancseq boolean Print sequences at all nodes of tree Boolean value Yes/No No
-[no]dotdiff boolean Use dot differencing to display results Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fprotpars reads any normal sequence USAs.

Input files for usage example

File: protpars.dat

     5    10
Alpha     ABCDEFGHIK
Beta      AB--EFGHIK
Gamma     ?BCDSFG*??
Delta     CIKDEFGHIK
Epsilon   DIKDEFGHIK

Input files for usage example 3

File: protpars2.dat

    5    10
Alpha     AABBCCCFHK 
Beta      AABB---FHK 
Gamma     ??BBCCCF*? 
Delta     CCIIKKKFHK 
Epsilon   DDIIKKKFHK 
    5    10
Alpha     AADDEGGIIK 
Beta      AA--EGGIIK 
Gamma     ??DDSGG??? 
Delta     CCDDEGGIIK 
Epsilon   DDDDEGGIIK 
    5    10
Alpha     AACDDDEGHI 
Beta      AA----EGHI 
Gamma     ??CDDDSG*? 
Delta     CCKDDDEGHI 
Epsilon   DDKDDDEGHI 

Input files for usage example 4

File: protparswts.dat

1111100000
0000011111

Output file format

fprotpars output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees, and (if option 2 is toggled on) a table of the number of changes of state required in each position. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item "." appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?" there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across positions. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual positions, and a conclusion as to whether that tree is or is not significantly worse than the best one.

Output files for usage example

File: protpars.fprotpars


Protein parsimony algorithm, version 3.69.650



     3 trees in all found




     +--------Gamma     
     !  
  +--2     +--Epsilon   
  !  !  +--4  
  !  +--3  +--Delta     
  1     !  
  !     +-----Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     16.000




           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     16.000




           +--Epsilon   
     +-----4  
     !     +--Delta     
  +--3  
  !  !     +--Gamma     
  1  +-----2  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     16.000

File: protpars.treefile

((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.3333];
((((Epsilon,Delta),Gamma),Beta),Alpha)[0.3333];
(((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.3333];

Output files for usage example 2

File: protpars.fprotpars


Protein parsimony algorithm, version 3.69.650

 5 species,  10  sites


Name          Sequences
----          ---------

Alpha        ABCDEFGHIK 
Beta         ..--...... 
Gamma        ?...S..*?? 
Delta        CIK....... 
Epsilon      DIK....... 




     3 trees in all found




  +-----------Beta      
  !  
  1  +--------Gamma     
  !  !  
  +--2     +--Epsilon   
     !  +--4  
     +--3  +--Delta     
        !  
        +-----Alpha     

  remember: (although rooted by outgroup) this is an unrooted tree!


requires a total of     14.000

steps in each position:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       3   1   5   3   2   0   0   2   0
   10!   0                                    

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)


root     1                AN??EFGHIK 
  1   Beta        maybe   .B--...... 


  [Part of this file has been deleted for brevity]


root     1                AN??EFGHIK 
  1   Beta        maybe   .B--...... 
  1      2        maybe   ..CD...... 
  2      3        maybe   ?......... 
  3      4         yes    .IK....... 
  4   Epsilon     maybe   D......... 
  4   Delta        yes    C......... 
  3   Gamma        yes    ?B..S..*?? 
  2   Alpha       maybe   .B........ 





  +-----------Beta      
  !  
  1        +--Epsilon   
  !  +-----4  
  !  !     +--Delta     
  +--3  
     !     +--Gamma     
     +-----2  
           +--Alpha     

  remember: (although rooted by outgroup) this is an unrooted tree!


requires a total of     14.000

steps in each position:
         0   1   2   3   4   5   6   7   8   9
     *-----------------------------------------
    0!       3   1   5   3   2   0   0   2   0
   10!   0                                    

From    To     Any Steps?    State at upper node
                             ( . means same as in the node below it on tree)


root     1                AN??EFGHIK 
  1   Beta        maybe   .B--...... 
  1      3         yes    ..?D...... 
  3      4         yes    ?IK....... 
  4   Epsilon     maybe   D......... 
  4   Delta        yes    C......... 
  3      2         yes    ..C....... 
  2   Gamma        yes    ?B..S..*?? 
  2   Alpha       maybe   .B........ 


File: protpars.treefile

(Beta,(Gamma,((Epsilon,Delta),Alpha)))[0.3333];
(Beta,(((Epsilon,Delta),Gamma),Alpha))[0.3333];
(Beta,((Epsilon,Delta),(Gamma,Alpha)))[0.3333];

Output files for usage example 3

File: protpars2.fprotpars


Protein parsimony algorithm, version 3.69.650


Data set # 1:


     3 trees in all found




     +--------Gamma     
     !  
  +--2     +--Epsilon   
  !  !  +--4  
  !  +--3  +--Delta     
  1     !  
  !     +-----Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     25.000




           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     25.000




           +--Epsilon   
     +-----4  


  [Part of this file has been deleted for brevity]

     +--------Gamma     
  +--2  
  !  !  +-----Epsilon   
  !  +--4  
  1     !  +--Delta     
  !     +--3  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     24.000




           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     24.000




           +--Epsilon   
     +-----4  
     !     +--Delta     
  +--3  
  !  !     +--Gamma     
  1  +-----2  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     24.000

File: protpars2.treefile

((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.3333];
((((Epsilon,Delta),Gamma),Beta),Alpha)[0.3333];
(((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.3333];
((Gamma,(Delta,(Epsilon,Beta))),Alpha)[0.0667];
(((Epsilon,Gamma),(Delta,Beta)),Alpha)[0.0667];
((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.0667];
((Epsilon,(Gamma,(Delta,Beta))),Alpha)[0.0667];
((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.0667];
(((Delta,Gamma),(Epsilon,Beta)),Alpha)[0.0667];
(((Delta,(Epsilon,Gamma)),Beta),Alpha)[0.0667];
((((Epsilon,Delta),Gamma),Beta),Alpha)[0.0667];
((Epsilon,((Delta,Gamma),Beta)),Alpha)[0.0667];
(((Epsilon,(Delta,Gamma)),Beta),Alpha)[0.0667];
((Delta,(Gamma,(Epsilon,Beta))),Alpha)[0.0667];
((Delta,((Epsilon,Gamma),Beta)),Alpha)[0.0667];
(((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.0667];
((Delta,(Epsilon,(Gamma,Beta))),Alpha)[0.0667];
((Epsilon,(Delta,(Gamma,Beta))),Alpha)[0.0667];
((Gamma,(Delta,(Epsilon,Beta))),Alpha)[0.2000];
((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.2000];
((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.2000];
((((Epsilon,Delta),Gamma),Beta),Alpha)[0.2000];
(((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.2000];

Output files for usage example 4

File: protpars.fprotpars


Protein parsimony algorithm, version 3.69.650




Weights set # 1:


     3 trees in all found




     +--------Gamma     
     !  
  +--2     +--Epsilon   
  !  !  +--4  
  !  +--3  +--Delta     
  1     !  
  !     +-----Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     14.000




           +--Epsilon   
        +--4  
     +--3  +--Delta     
     !  !  
  +--2  +-----Gamma     
  !  !  
  1  +--------Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of     14.000






  [Part of this file has been deleted for brevity]

           +--Epsilon   
     +-----4  
     !     +--Delta     
  +--3  
  !  !     +--Gamma     
  1  +-----2  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      2.000




     +--------Delta     
  +--3  
  !  !  +-----Epsilon   
  !  +--4  
  1     !  +--Gamma     
  !     +--2  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      2.000




     +--------Epsilon   
  +--4  
  !  !  +-----Delta     
  !  +--3  
  1     !  +--Gamma     
  !     +--2  
  !        +--Beta      
  !  
  +-----------Alpha     

  remember: this is an unrooted tree!


requires a total of      2.000

File: protpars.treefile

((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.3333];
((((Epsilon,Delta),Gamma),Beta),Alpha)[0.3333];
(((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.3333];
((Gamma,(Delta,(Epsilon,Beta))),Alpha)[0.0667];
(((Epsilon,Gamma),(Delta,Beta)),Alpha)[0.0667];
((Gamma,((Epsilon,Delta),Beta)),Alpha)[0.0667];
((Epsilon,(Gamma,(Delta,Beta))),Alpha)[0.0667];
((Gamma,(Epsilon,(Delta,Beta))),Alpha)[0.0667];
(((Delta,Gamma),(Epsilon,Beta)),Alpha)[0.0667];
(((Delta,(Epsilon,Gamma)),Beta),Alpha)[0.0667];
((((Epsilon,Delta),Gamma),Beta),Alpha)[0.0667];
((Epsilon,((Delta,Gamma),Beta)),Alpha)[0.0667];
(((Epsilon,(Delta,Gamma)),Beta),Alpha)[0.0667];
((Delta,(Gamma,(Epsilon,Beta))),Alpha)[0.0667];
((Delta,((Epsilon,Gamma),Beta)),Alpha)[0.0667];
(((Epsilon,Delta),(Gamma,Beta)),Alpha)[0.0667];
((Delta,(Epsilon,(Gamma,Beta))),Alpha)[0.0667];
((Epsilon,(Delta,(Gamma,Beta))),Alpha)[0.0667];

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnapars.html0000664000175000017500000007640312171064331016043 00000000000000 EMBOSS: fdnapars
fdnapars

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

DNA parsimony algorithm

Description

Estimates phylogenies by the parsimony method using nucleic acid sequences. Allows use the full IUB ambiguity codes, and estimates ancestral nucleotide states. Gaps treated as a fifth nucleotide state. It can also do transversion parsimony. Can cope with multifurcations, reconstruct ancestral states, use 0/1 character weights, and infer branch lengths

Algorithm

This program carries out unrooted parsimony (analogous to Wagner trees) (Eck and Dayhoff, 1966; Kluge and Farris, 1969) on DNA sequences. The method of Fitch (1971) is used to count the number of changes of base needed on a given tree. The assumptions of this method are analogous to those of MIX:
  1. Each site evolves independently.
  2. Different lineages evolve independently.
  3. The probability of a base substitution at a given site is small over the lengths of time involved in a branch of the phylogeny.
  4. The expected amounts of change in different branches of the phylogeny do not vary by so much that two changes in a high-rate branch are more probable than one change in a low-rate branch.
  5. The expected amounts of change do not vary enough among sites that two changes in one site are more probable than one change in another.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b, 1988), but also read the exchange between Felsenstein and Sober (1986).

Change from an occupied site to a deletion is counted as one change. Reversion from a deletion to an occupied site is allowed and is also counted as one change. Note that this in effect assumes that a deletion N bases long is N separate events.

Dnapars can handle both bifurcating and multifurcating trees. In doing its search for most parsimonious trees, it adds species not only by creating new forks in the middle of existing branches, but it also tries putting them at the end of new branches which are added to existing forks. Thus it searches among both bifurcating and multifurcating trees. If a branch in a tree does not have any characters which might change in that branch in the most parsimonious tree, it does not save that tree. Thus in any tree that results, a branch exists only if some character has a most parsimonious reconstruction that would involve change in that branch. It also saves a number of trees tied for best (you can alter the

number it saves using the V option in the menu). When rearranging trees, it tries rearrangements of all of the saved trees. This makes the algorithm slower than earlier versions of Dnapars.

The input data is standard. The first line of the input file contains the number of species and the number of sites.

Next come the species data. Each sequence starts on a new line, has a ten-character species name that must be blank-filled to be of that length, followed immediately by the species data in the one-letter code. The sequences must either be in the "interleaved" or "sequential" formats described in the Molecular Sequence Programs document. The I option selects between them. The sequences can have internal blanks in the sequence but there must be no extra blanks at the end of the terminated line. Note that a blank is not a valid symbol for a deletion.

Usage

Here is a sample session with fdnapars


% fdnapars 
DNA parsimony algorithm
Input (aligned) nucleotide sequence set(s): dnapars.dat
Phylip tree file (optional): 
Phylip dnapars program output file [dnapars.fdnapars]: 

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements on the first of the trees tied for best
  !---------!
   .........
   .........

Collapsing best trees
   .

Output written to file "dnapars.fdnapars"

Tree also written onto file "dnapars.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

DNA parsimony algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fdnapars] Phylip dnapars program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -maxtrees           integer    [10000] Number of trees to save (Integer
                                  from 1 to 1000000)
*  -[no]thorough       toggle     [Y] More thorough search
*  -[no]rearrange      boolean    [Y] Rearrange on just one best tree
   -transversion       boolean    [N] Use transversion parsimony
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1.0] Threshold value (Number 1.000 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdnapars] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -stepbox            boolean    [N] Print out steps in each site
   -ancseq             boolean    [N] Print sequences at all nodes of tree
   -[no]treeprint      boolean    [Y] Print out tree
*  -[no]dotdiff        boolean    [Y] Use dot differencing to display results

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip dnapars program output file Output file <*>.fdnapars
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-maxtrees integer Number of trees to save Integer from 1 to 1000000 10000
-[no]thorough toggle More thorough search Toggle value Yes/No Yes
-[no]rearrange boolean Rearrange on just one best tree Boolean value Yes/No Yes
-transversion boolean Use transversion parsimony Boolean value Yes/No No
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 1.000 or more 1.0
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdnapars
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-stepbox boolean Print out steps in each site Boolean value Yes/No No
-ancseq boolean Print sequences at all nodes of tree Boolean value Yes/No No
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-[no]dotdiff boolean Use dot differencing to display results Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnapars reads any normal sequence USAs.

Input files for usage example

File: dnapars.dat

   5   13
Alpha     AACGUGGCCAAAU
Beta      AAGGUCGCCAAAC
Gamma     CAUUUCGUCACAA
Delta     GGUAUUUCGGCCU
Epsilon   GGGAUCUCGGCCC

Output file format

fdnapars output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees. Each tree has branch lengths. These are computed using an algorithm published by Hochbaum and Pathria (1997) which I first heard of from Wayne Maddison who invented it independently of them. This algorithm averages the number of reconstructed changes of state over all sites a over all possible most parsimonious placements of the changes of state among branches. Note that it does not correct in any way for multiple changes that overlay each other. If option 2 is toggled on a table of the number of changes of state required in each character is also printed. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item "." appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?" or one of the IUB ambiguity symbols, there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. A "?" in the reconstructed states means that in addition to one or more bases, a deletion may or may not be present. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across sites. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, this is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989) It uses the mean and variance of the differences in the number of steps between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different, then the trees are declared significantly different. If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used. In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. Option 6 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

Output files for usage example

File: dnapars.fdnapars


DNA parsimony algorithm, version 3.69.650


One most parsimonious tree found:


                                            +-----Epsilon   
               +----------------------------3  
  +------------2                            +-------Delta     
  |            |  
  |            +----------------Gamma     
  |  
  1----Beta      
  |  
  +---------Alpha     


requires a total of     19.000

  between      and       length
  -------      ---       ------
     1           2       0.217949
     2           3       0.487179
     3      Epsilon      0.096154
     3      Delta        0.134615
     2      Gamma        0.275641
     1      Beta         0.076923
     1      Alpha        0.173077

File: dnapars.treefile

(((Epsilon:0.09615,Delta:0.13462):0.48718,Gamma:0.27564):0.21795,
Beta:0.07692,Alpha:0.17308);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fpromlk.html0000664000175000017500000012273312171064331015715 00000000000000 EMBOSS: fpromlk
fpromlk

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Protein phylogeny by maximum likelihood

Description

Same as PROML but assumes a molecular clock. The use of the two programs together permits a likelihood ratio test of the molecular clock hypothesis to be made.

Estimates phylogenies from protein amino acid sequences by maximum likelihood. The PAM, JTT, or PMB models can be employed, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites. It also allows different rates of change at known sites.

Algorithm

This program implements the maximum likelihood method for protein amino acid sequences under the constraint that the trees estimated must be consistent with a molecular clock. The molecular clock is the assumption that the tips of the tree are all equidistant, in branch length, from its root. This program is indirectly related to PROML. It uses the Dayhoff probability model of change between amino acids. Its algorithmic details are not yet published, but many of them are similar to DNAMLK.

The assumptions of the model are:

  1. Each position in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. Each position undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  4. All relevant positions are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  5. The probabilities of change between amino acids are given by the model of Jones,
  6. Taylor, and Thornton (1992), the PMB model of Veerassamy, Smith and Tillier (2004), or the PAM model of Dayhoff (Dayhoff and Eck, 1968; Dayhoff et. al., 1979).

Note the assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those positions that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different amino acid positions. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of positions all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant positions. The program computes the likelihood by summing it over all possible assignments of rates to positions, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a position having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive positions with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all positions to rate 2.4, or that fail to have consecutive positions that have the same rate.

The Hidden Markov Model framework for rate variation among positions was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant positions.

This feature effectively removes the artificial assumption that all positions have the same rate, and also means that we need not know in advance the identities of the positions that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each positions (for example, we might want amino acid positions in the active site of a protein to change more slowly than other positions. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of amino acid positions in the different categories. For example, we might specify that positions in the active site evolve at relative rates of 0.2 compared to 1.0 at other positions. If we are assuming that a particular position maintains a cysteine bridge to another, we may want to put it in a category of positions (including perhaps the initial position of the protein sequence which maintains methionine) which changes at a rate of 0.0.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a position is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

Usage

Here is a sample session with fpromlk


% fpromlk 
Protein phylogeny by maximum likelihood
Input (aligned) protein sequence set(s): promlk.dat
Phylip tree file (optional): 
Phylip promlk program output file [promlk.fpromlk]: 


Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Output written to file "promlk.fpromlk"

Tree also written onto file "promlk.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Protein phylogeny by maximum likelihood
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fpromlk] Phylip promlk program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -ncategories        integer    [1] Number of substitution rate categories
                                  (Integer from 1 to 9)
*  -rate               array      Rate for each category
*  -categories         properties File of substitution rate categories
   -weights            properties Weights file
*  -lengths            boolean    [N] Use branch lengths from user trees
   -model              menu       [Jones-Taylor-Thornton] Probability model
                                  for amino acid change (Values: j
                                  (Jones-Taylor-Thornton); h (Henikoff/Tillier
                                  PMBs); d (Dayhoff PAM))
   -gammatype          menu       [n] Rate variation among sites (Values: g
                                  (Gamma distributed rates); i
                                  (Gamma+invariant sites); h (User defined HMM
                                  of rates); n (Constant rate))
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ngammacat          integer    [1] Number of categories (1-9) (Integer from
                                  1 to 9)
*  -invarcoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ninvarcat          integer    [1] Number of categories (1-9) including one
                                  for invariant sites (Integer from 1 to 9)
*  -invarfrac          float      [0.0] Fraction of invariant sites (Number
                                  from 0.000 to 1.000)
*  -nhmmcategories     integer    [1] Number of HMM rate categories (Integer
                                  from 1 to 9)
*  -hmmrates           array      [1.0] HMM category rates
*  -hmmprobabilities   array      [1.0] Probability for each HMM category
*  -adjsite            boolean    [N] Rates at adjacent sites correlated
*  -lambda             float      [1.0] Mean block length of sites having the
                                  same rate (Number 1.000 or more)
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
*  -global             boolean    [N] Global rearrangements
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fpromlk] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -hypstate           boolean    [N] Reconstruct hypothetical sequence

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip promlk program output file Output file <*>.fpromlk
Additional (Optional) qualifiers
-ncategories integer Number of substitution rate categories Integer from 1 to 9 1
-rate array Rate for each category List of floating point numbers  
-categories properties File of substitution rate categories Property value(s)  
-weights properties Weights file Property value(s)  
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-model list Probability model for amino acid change
j (Jones-Taylor-Thornton)
h (Henikoff/Tillier PMBs)
d (Dayhoff PAM)
Jones-Taylor-Thornton
-gammatype list Rate variation among sites
g (Gamma distributed rates)
i (Gamma+invariant sites)
h (User defined HMM of rates)
n (Constant rate)
n
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ngammacat integer Number of categories (1-9) Integer from 1 to 9 1
-invarcoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ninvarcat integer Number of categories (1-9) including one for invariant sites Integer from 1 to 9 1
-invarfrac float Fraction of invariant sites Number from 0.000 to 1.000 0.0
-nhmmcategories integer Number of HMM rate categories Integer from 1 to 9 1
-hmmrates array HMM category rates List of floating point numbers 1.0
-hmmprobabilities array Probability for each HMM category List of floating point numbers 1.0
-adjsite boolean Rates at adjacent sites correlated Boolean value Yes/No No
-lambda float Mean block length of sites having the same rate Number 1.000 or more 1.0
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-global boolean Global rearrangements Boolean value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fpromlk
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-hypstate boolean Reconstruct hypothetical sequence Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fpromlk reads any normal sequence USAs.

Input files for usage example

File: promlk.dat

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC

Output file format

fpromlk output starts by giving the number of species, the number of amino acid positions.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of positions is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten amino acids. The trees found are printed as a rooted tree topology. The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. The unit of branch length is the expected fraction of amino acids changed (so that 1.0 is 100 PAMs).

A table is printed showing the length of each tree segment, and the time (in units of expected amino acid substitutions per position) of each fork in the tree, measured from the root of the tree. I have not attempted in include code for approximate confidence limits on branch points, as I have done for branch lengths in PROML, both because of the extreme crudeness of that test, and because the variation of times for different forks would be highly correlated.

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the relative rate of change in the active site and in the rest of the protein to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possibl values, and this might get expensive.

This program makes possible a (reasonably) legitimate statistical test of the molecular clock. To do such a test, run PROML and PROMLK on the same data. If the trees obtained are of the same topology (when considered as unrooted), it is legitimate to compare their likelihoods by the likelihood ratio test. In PROML the likelihood has been computed by estimating 2n-3 branch lengths, if their are n tips on the tree. In PROMLK it has been computed by estimating n-1 branching times (in effect, n-1 branch lengths). The difference in the number of parameters is (2n-3)-(n-1) = n-2. To perform the test take the difference in log likelihoods between the two runs (PROML should be the higher of the two, barring numerical iteration difficulties) and double it. Look this up on a chi-square distribution with n-2 degrees of freedom. If the result is significant, the log likelihood has been significantly increased by allowing all 2n-3 branch lengths to be estimated instead of just n-1, and molecular clock may be rejected.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different amino acid positions, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across amino acid positions. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across amino acid positions are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring positions (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of 100 times the expected numbers of amino acid substitutions, scaled so that the average rate of change, averaged over all the positions analyzed, is set to 100.0, if there are multiple categories of positions. This means that whether or not there are multiple categories of positions, the expected percentage of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same position and overlie or even reverse each other. underlying numbers of changes. That means that a branch of length 26 is 26 times as long as one which would show a 1% difference between the amino acid sequences at the beginning and end of the branch, but we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what amino acid position categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each position which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). The symbol printed out is for the amino acid which accounts for the largest fraction of the likelihood at that position. In that table, if a position has an amino acid which accounts for more than 95% of the likelihood, its symbol printed in capital letters (W rather than w). One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed amino acids are based on only the single assignment of rates to positions which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

Output files for usage example

File: promlk.fpromlk


Amino acid sequence
   Maximum Likelihood method with molecular clock, version 3.69.650

Jones-Taylor-Thornton model of amino acid change





                                          +-----------Epsilon   
  +---------------------------------------4  
  !                                       +-----------Delta     
--3  
  !                      +----------------------------Gamma     
  +----------------------2  
                         !                   +--------Beta      
                         +-------------------1  
                                             +--------Alpha     


Ln Likelihood =  -134.70332

 Ancestor      Node      Node Height     Length
 --------      ----      ---- ------     ------
 root            3      
   3             4          0.66464      0.66464
   4          Epsilon       0.85971      0.19507
   4          Delta         0.85971      0.19507
   3             2          0.37420      0.37420
   2          Gamma         0.85971      0.48551
   2             1          0.70208      0.32788
   1          Beta          0.85971      0.15763
   1          Alpha         0.85971      0.15763




File: promlk.treefile

((Epsilon:0.19507,Delta:0.19507):0.66464,(Gamma:0.48551,
(Beta:0.15763,Alpha:0.15763):0.32788):0.37420);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/Makefile0000664000175000017500000003544212171071711015015 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # emboss_doc/html/Makefile. Generated from Makefile.in by configure. # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/PHYLIPNEW pkglibdir = $(libdir)/PHYLIPNEW pkglibexecdir = $(libexecdir)/PHYLIPNEW am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = x86_64-unknown-linux-gnu host_triplet = x86_64-unknown-linux-gnu subdir = emboss_doc/html DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/EMBOSS/doc/html/embassy/phylipnew ACLOCAL = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run aclocal-1.12 AMTAR = $${TAR-tar} ANT = AR = ar AUTOCONF = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoconf AUTOHEADER = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoheader AUTOMAKE = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run automake-1.12 AWK = gawk CC = gcc CCDEPMODE = depmode=gcc3 CFLAGS = -O2 CPP = gcc -E CPPFLAGS = -DAJ_LinuxLF -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 CXX = g++ CXXCPP = g++ -E CXXDEPMODE = depmode=gcc3 CXXFLAGS = -g -O2 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps DEVWARN_CFLAGS = DLLTOOL = false DSYMUTIL = DUMPBIN = ECHO_C = ECHO_N = -n ECHO_T = EGREP = /usr/bin/grep -E EXEEXT = FGREP = /usr/bin/grep -F GREP = /usr/bin/grep HAVE_MEMMOVE = HAVE_STRERROR = INSTALL = /usr/bin/install -c INSTALL_DATA = ${INSTALL} -m 644 INSTALL_PROGRAM = ${INSTALL} INSTALL_SCRIPT = ${INSTALL} INSTALL_STRIP_PROGRAM = $(install_sh) -c -s JAR = JAVA = JAVAC = JAVA_CFLAGS = JAVA_CPPFLAGS = -DNO_AUTH JAVA_LDFLAGS = LD = /usr/bin/ld -m elf_x86_64 LDFLAGS = LIBOBJS = LIBS = -lm -lhpdf -lgd -lpng -lz -lm LIBTOOL = $(SHELL) $(top_builddir)/libtool LIPO = LN_S = ln -s LTLIBOBJS = MAKEINFO = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run makeinfo MANIFEST_TOOL = : MKDIR_P = /usr/bin/mkdir -p MYSQL_CFLAGS = -I/usr/include/mysql -g -pipe -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-strict-aliasing -fwrapv -fPIC -fPIC -g -static-libgcc -fno-omit-frame-pointer -fno-strict-aliasing -DMY_PTHREAD_FASTMUTEX=1 MYSQL_CONFIG = /usr/bin/mysql_config MYSQL_CPPFLAGS = -I/usr/include/mysql MYSQL_LDFLAGS = -L/usr/lib64/mysql -lmysqlclient -lpthread -lz -lm -lrt -lssl -lcrypto -ldl MYSQL_VERSION = 5.5.32 NM = /usr/bin/nm -B NMEDIT = OBJDUMP = objdump OBJEXT = o OTOOL = OTOOL64 = PACKAGE = PHYLIPNEW PACKAGE_BUGREPORT = emboss-bug@emboss.open-bio.org PACKAGE_NAME = PHYLIPNEW PACKAGE_STRING = PHYLIPNEW 3.69.650 PACKAGE_TARNAME = PHYLIPNEW PACKAGE_URL = http://emboss.open-bio.org/ PACKAGE_VERSION = 3.69.650 PATH_SEPARATOR = : PCRE_DATE = 11-Apr-2009 PCRE_LIB_VERSION = 0:1:0 PCRE_MAJOR = 7 PCRE_MINOR = 9 PCRE_POSIXLIB_VERSION = 0:0:0 PCRE_VERSION = 7.9 POSIX_MALLOC_THRESHOLD = -DPOSIX_MALLOC_THRESHOLD=10 POSTGRESQL_CFLAGS = -I/usr/include POSTGRESQL_CONFIG = /usr/bin/pg_config POSTGRESQL_CPPFLAGS = -I/usr/include POSTGRESQL_LDFLAGS = -L/usr/lib64 -lpq POSTGRESQL_VERSION = 9.2.4 RANLIB = ranlib SED = /usr/bin/sed SET_MAKE = SHELL = /bin/sh STRIP = strip VERSION = 3.69.650 WARN_CFLAGS = XLIB = -lX11 -lXaw -lXt XMKMF = X_CFLAGS = X_EXTRA_LIBS = X_LIBS = X_PRE_LIBS = -lSM -lICE abs_builddir = /data/scratch/embossdist/embassy/phylipnew/emboss_doc/html abs_srcdir = /data/scratch/embossdist/embassy/phylipnew/emboss_doc/html abs_top_builddir = /data/scratch/embossdist/embassy/phylipnew abs_top_srcdir = /data/scratch/embossdist/embassy/phylipnew ac_ct_AR = ar ac_ct_CC = gcc ac_ct_CXX = g++ ac_ct_DUMPBIN = am__include = include am__leading_dot = . am__quote = am__tar = $${TAR-tar} chof - "$$tardir" am__untar = $${TAR-tar} xf - bindir = ${exec_prefix}/bin build = x86_64-unknown-linux-gnu build_alias = build_cpu = x86_64 build_os = linux-gnu build_vendor = unknown builddir = . datadir = ${datarootdir} datarootdir = ${prefix}/share docdir = ${datarootdir}/doc/${PACKAGE_TARNAME} dvidir = ${docdir} embprefix = /usr/local exec_prefix = ${prefix} host = x86_64-unknown-linux-gnu host_alias = host_cpu = x86_64 host_os = linux-gnu host_vendor = unknown htmldir = ${docdir} includedir = ${prefix}/include infodir = ${datarootdir}/info install_sh = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/install-sh libdir = ${exec_prefix}/lib libexecdir = ${exec_prefix}/libexec localedir = ${datarootdir}/locale localstatedir = ${prefix}/var mandir = ${datarootdir}/man mkdir_p = $(MKDIR_P) oldincludedir = /usr/include pdfdir = ${docdir} prefix = /usr/local program_transform_name = s,x,x, psdir = ${docdir} sbindir = ${exec_prefix}/sbin sharedstatedir = ${prefix}/com srcdir = . sysconfdir = ${prefix}/etc target_alias = top_build_prefix = ../../ top_builddir = ../.. top_srcdir = ../.. pkgdata_DATA = index.html \ fclique.html \ fconsense.html fcontml.html fcontrast.html \ fdiscboot.html fdnacomp.html fdnadist.html fdnainvar.html \ fdnaml.html fdnamlk.html fdnamove.html fdnapars.html fdnapenny.html \ fdollop.html fdolmove.html fdolpenny.html \ fdrawgram.html fdrawtree.html \ ffactor.html ffitch.html ffreqboot.html \ fgendist.html fkitsch.html \ fmix.html fmove.html fneighbor.html \ fpars.html fpenny.html fproml.html fpromlk.html \ fprotdist.html fprotpars.html \ frestboot.html frestdist.html frestml.html fretree.html \ fseqboot.html fseqbootall.html \ ftreedist.html ftreedistpair.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_doc/html/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_doc/html/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_doc/html/fdrawgram.html0000664000175000017500000006001612171064331016210 00000000000000 EMBOSS: fdrawgram
fdrawgram

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Plots a cladogram- or phenogram-like rooted tree diagram

Description

Plots rooted phylogenies, cladograms, circular trees and phenograms in a wide variety of user-controllable formats. The program is interactive and allows previewing of the tree on PC, Macintosh, or X Windows screens, or on Tektronix or Digital graphics terminals. Final output can be to a file formatted for one of the drawing programs, for a ray-tracing or VRML browser, or one at can be sent to a laser printer (such as Postscript or PCL-compatible printers), on graphics screens or terminals, on pen plotters or on dot matrix printers capable of graphics.

Similar to DRAWTREE but plots rooted phylogenies.

Algorithm

DRAWGRAM interactively plots a cladogram- or phenogram-like rooted tree diagram, with many options including orientation of tree and branches, style of tree, label sizes and angles, tree depth, margin sizes, stem lengths, and placement of nodes in the tree. Particularly if you can use your computer to preview the plot, you can very effectively adjust the details of the plotting to get just the kind of plot you want.

To understand the working of DRAWGRAM and DRAWTREE, you should first read the Tree Drawing Programs web page in this documentation.

As with DRAWTREE, to run DRAWGRAM you need a compiled copy of the program, a font file, and a tree file. The tree file has a default name of intree. The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default. Note that the program will get confused if the input tree file has the number of trees on the first line of the file, so that numbr may have to be removed.

Usage

Here is a sample session with fdrawgram


% fdrawgram -previewer n 
Plots a cladogram- or phenogram-like rooted tree diagram
Phylip tree file: drawgram.tree
Phylip drawgram output file [drawgram.fdrawgram]: 

DRAWGRAM from PHYLIP version 3.69.650
Reading tree ... 
Tree has been read.
Loading the font .... 
Font loaded.

Writing plot file ...

Plot written to file "drawgram.fdrawgram"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Plots a cladogram- or phenogram-like rooted tree diagram
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-intreefile]        tree       Phylip tree file
  [-plotfile]          outfile    [*.fdrawgram] Phylip drawgram output file

   Additional (Optional) qualifiers (* if not always prompted):
   -[no]grows          boolean    [Y] Tree grows horizontally
   -style              menu       [c] Tree style output (Values: c (cladogram
                                  (v-shaped)); p (phenogram (branches are
                                  square)); v (curvogram (branches are 1/4 out
                                  of an ellipse)); e (eurogram (branches
                                  angle outward, then up)); s (swooporam
                                  (branches curve outward then reverse)); o
                                  (circular tree))
   -plotter            menu       [l] Plotter or printer the tree will be
                                  drawn on (Values: l (Postscript printer file
                                  format); m (PICT format (for drawing
                                  programs)); j (HP 75 DPI Laserjet PCL file
                                  format); s (HP 150 DPI Laserjet PCL file
                                  format); y (HP 300 DPI Laserjet PCL file
                                  format); w (MS-Windows Bitmap); f (FIG 2.0
                                  drawing program format); a (Idraw drawing
                                  program format); z (VRML Virtual Reality
                                  Markup Language file); n (PCX 640x350 file
                                  format (for drawing programs)); p (PCX
                                  800x600 file format (for drawing programs));
                                  q (PCX 1024x768 file format (for drawing
                                  programs)); k (TeKtronix 4010 graphics
                                  terminal); x (X Bitmap format); v (POVRAY 3D
                                  rendering program file); r (Rayshade 3D
                                  rendering program file); h (Hewlett-Packard
                                  pen plotter (HPGL file format)); d (DEC
                                  ReGIS graphics (VT240 terminal)); e (Epson
                                  MX-80 dot-matrix printer); c
                                  (Prowriter/Imagewriter dot-matrix printer);
                                  t (Toshiba 24-pin dot-matrix printer); o
                                  (Okidata dot-matrix printer); b (Houston
                                  Instruments plotter); u (other (one you have
                                  inserted code for)))
   -previewer          menu       [x] Previewing device (Values: n (Will not
                                  be previewed); I i (MSDOS graphics screen
                                  m:Macintosh screens); x (X Windows display);
                                  w (MS Windows display); k (TeKtronix 4010
                                  graphics terminal); d (DEC ReGIS graphics
                                  (VT240 terminal)); o (Other (one you have
                                  inserted code for)))
   -lengths            boolean    [N] Use branch lengths from user trees
*  -labelrotation      float      [90.0] Angle of labels (0 degrees is
                                  horizontal for a tree growing vertically)
                                  (Number from 0.000 to 360.000)
   -[no]rescaled       toggle     [Y] Automatically rescale branch lengths
*  -bscale             float      [1.0] Centimeters per unit branch length
                                  (Any numeric value)
   -treedepth          float      [0.53] Depth of tree as fraction of its
                                  breadth (Number from 0.100 to 100.000)
   -stemlength         float      [0.05] Stem length as fraction of tree depth
                                  (Number from 0.010 to 100.000)
   -nodespace          float      [0.3333] Character height as fraction of tip
                                  spacing (Number from 0.100 to 100.000)
   -nodeposition       menu       [c] Position of interior nodes (Values: i
                                  (Intermediate between their immediate
                                  descendants); w (Weighted average of tip
                                  positions); c (Centered among their ultimate
                                  descendants); n (Innermost of immediate
                                  descendants); v (So tree is v shaped))
*  -xmargin            float      [1.65] Horizontal margin (cm) (Number 0.100
                                  or more)
*  -ymargin            float      [2.16] Vertical margin (cm) (Number 0.100 or
                                  more)
*  -xrayshade          float      [1.65] Horizontal margin (pixels) for
                                  Rayshade output (Number 0.100 or more)
*  -yrayshade          float      [2.16] Vertical margin (pixels) for Rayshade
                                  output (Number 0.100 or more)
   -paperx             float      [20.63750] Paper width (Any numeric value)
   -papery             float      [26.98750] Paper height (Number 0.100 or
                                  more)
   -pagesheight        float      [1] Number of trees across height of page
                                  (Number 1.000 or more)
   -pageswidth         float      [1] Number of trees across width of page
                                  (Number 1.000 or more)
   -hpmargin           float      [0.41275] Horizontal overlap (cm) (Number
                                  0.001 or more)
   -vpmargin           float      [0.53975] Vertical overlap (cm) (Number
                                  0.001 or more)

   Advanced (Unprompted) qualifiers:
   -fontfile           string     [font1] Fontfile name (Any string)

   Associated qualifiers:

   "-plotfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-intreefile]
(Parameter 1)
tree Phylip tree file Phylogenetic tree  
[-plotfile]
(Parameter 2)
outfile Phylip drawgram output file Output file <*>.fdrawgram
Additional (Optional) qualifiers
-[no]grows boolean Tree grows horizontally Boolean value Yes/No Yes
-style list Tree style output
c (cladogram (v-shaped))
p (phenogram (branches are square))
v (curvogram (branches are 1/4 out of an ellipse))
e (eurogram (branches angle outward, then up))
s (swooporam (branches curve outward then reverse))
o (circular tree)
c
-plotter list Plotter or printer the tree will be drawn on
l (Postscript printer file format)
m (PICT format (for drawing programs))
j (HP 75 DPI Laserjet PCL file format)
s (HP 150 DPI Laserjet PCL file format)
y (HP 300 DPI Laserjet PCL file format)
w (MS-Windows Bitmap)
f (FIG 2.0 drawing program format)
a (Idraw drawing program format)
z (VRML Virtual Reality Markup Language file)
n (PCX 640x350 file format (for drawing programs))
p (PCX 800x600 file format (for drawing programs))
q (PCX 1024x768 file format (for drawing programs))
k (TeKtronix 4010 graphics terminal)
x (X Bitmap format)
v (POVRAY 3D rendering program file)
r (Rayshade 3D rendering program file)
h (Hewlett-Packard pen plotter (HPGL file format))
d (DEC ReGIS graphics (VT240 terminal))
e (Epson MX-80 dot-matrix printer)
c (Prowriter/Imagewriter dot-matrix printer)
t (Toshiba 24-pin dot-matrix printer)
o (Okidata dot-matrix printer)
b (Houston Instruments plotter)
u (other (one you have inserted code for))
l
-previewer list Previewing device
n (Will not be previewed)
I i (MSDOS graphics screen m:Macintosh screens)
x (X Windows display)
w (MS Windows display)
k (TeKtronix 4010 graphics terminal)
d (DEC ReGIS graphics (VT240 terminal))
o (Other (one you have inserted code for))
x
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-labelrotation float Angle of labels (0 degrees is horizontal for a tree growing vertically) Number from 0.000 to 360.000 90.0
-[no]rescaled toggle Automatically rescale branch lengths Toggle value Yes/No Yes
-bscale float Centimeters per unit branch length Any numeric value 1.0
-treedepth float Depth of tree as fraction of its breadth Number from 0.100 to 100.000 0.53
-stemlength float Stem length as fraction of tree depth Number from 0.010 to 100.000 0.05
-nodespace float Character height as fraction of tip spacing Number from 0.100 to 100.000 0.3333
-nodeposition list Position of interior nodes
i (Intermediate between their immediate descendants)
w (Weighted average of tip positions)
c (Centered among their ultimate descendants)
n (Innermost of immediate descendants)
v (So tree is v shaped)
c
-xmargin float Horizontal margin (cm) Number 0.100 or more 1.65
-ymargin float Vertical margin (cm) Number 0.100 or more 2.16
-xrayshade float Horizontal margin (pixels) for Rayshade output Number 0.100 or more 1.65
-yrayshade float Vertical margin (pixels) for Rayshade output Number 0.100 or more 2.16
-paperx float Paper width Any numeric value 20.63750
-papery float Paper height Number 0.100 or more 26.98750
-pagesheight float Number of trees across height of page Number 1.000 or more 1
-pageswidth float Number of trees across width of page Number 1.000 or more 1
-hpmargin float Horizontal overlap (cm) Number 0.001 or more 0.41275
-vpmargin float Vertical overlap (cm) Number 0.001 or more 0.53975
Advanced (Unprompted) qualifiers
-fontfile string Fontfile name Any string font1
Associated qualifiers
"-plotfile" associated outfile qualifiers
-odirectory2
-odirectory_plotfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdrawgram reads ...

Input files for usage example

File: drawgram.tree

(Delta,(Epsilon,(Gamma,(Beta,Alpha))));

Output file format

fdrawgram output ...

Output files for usage example

Graphics File: drawgram.fdrawgram

[fdrawgram results]

Data files

The font file has a default name of "fontfile". If there is no file of that name, the program will ask you for the name of a font file (we provide ones that have the names font1 through font6). Once you decide on a favorite one of these, you could make a copy of it and call it fontfile, and it will then be used by default.

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
fdrawtree Plots an unrooted tree diagram
fretree Interactive tree rearrangement

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnaml.html0000664000175000017500000014424112171064331015502 00000000000000 EMBOSS: fdnaml
fdnaml

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Estimate nucleotide phylogeny by maximum likelihood

Description

Estimates phylogenies from nucleotide sequences by maximum likelihood. The model employed allows for unequal expected frequencies of the four nucleotides, for unequal rates of transitions and transversions, and for different (prespecified) rates of change in different categories of sites, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites.

Algorithm

This program implements the maximum likelihood method for DNA sequences. The present version is faster than earlier versions of DNAML. Details of the algorithm are published in the paper by Felsenstein and Churchill (1996). The model of base substitution allows the expected frequencies of the four bases to be unequal, allows the expected frequencies of transitions and transversions to be unequal, and has several ways of allowing different rates of evolution at different sites.

The assumptions of the present model are:

  1. Each site in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. Each site undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  4. All relevant sites are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  5. A substitution consists of one of two sorts of events:
    1. The first kind of event consists of the replacement of the existing base by a base drawn from a pool of purines or a pool of pyrimidines (depending on whether the base being replaced was a purine or a pyrimidine). It can lead either to no change or to a transition.
    2. The second kind of event consists of the replacement of the existing base by a base drawn at random from a pool of bases at known frequencies, independently of the identity of the base which is being replaced. This could lead either to a no change, to a transition or to a transversion.

      The ratio of the two purines in the purine replacement pool is the same as their ratio in the overall pool, and similarly for the pyrimidines.

      The ratios of transitions to transversions can be set by the user. The substitution process can be diagrammed as follows: Suppose that you specified A, C, G, and T base frequencies of 0.24, 0.28, 0.27, and 0.21.

      • First kind of event:

        Determine whether the existing base is a purine or a pyrimidine. Draw from the proper pool:

        
              Purine pool:                Pyrimidine pool:
        
             |               |            |               |
             |   0.4706 A    |            |   0.5714 C    |
             |   0.5294 G    |            |   0.4286 T    |
             | (ratio is     |            | (ratio is     |
             |  0.24 : 0.27) |            |  0.28 : 0.21) |
             |_______________|            |_______________|
        
        
      • Second kind of event:

        Draw from the overall pool:

        
                      |                  |
                      |      0.24 A      |
                      |      0.28 C      |
                      |      0.27 G      |
                      |      0.21 T      |
                      |__________________|
        
        

Note that if the existing base is, say, an A, the first kind of event has a 0.4706 probability of "replacing" it by another A. The second kind of event has a 0.24 chance of replacing it by another A. This rather disconcerting model is used because it has nice mathematical properties that make likelihood calculations far easier. A closely similar, but not precisely identical model having different rates of transitions and transversions has been used by Hasegawa et. al. (1985b). The transition probability formulas for the current model were given (with my permission) by Kishino and Hasegawa (1989). Another explanation is available in the paper by Felsenstein and Churchill (1996).

Note the assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those sites that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different sites. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of sites all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant sites. The program computes the the likelihood by summing it over all possible assignments of rates to sites, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a site having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive sites with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all sites to rate 2.4, or that fail to have consecutive sites that have the same rate.

The Hidden Markov Model framework for rate variation among sites was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant sites.

This feature effectively removes the artificial assumption that all sites have the same rate, and also means that we need not know in advance the identities of the sites that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each site (for example, we might want first, second, and third codon positions in a protein coding sequence to be three different categories. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of sites in the different categories. For example, we might specify that first, second, and third positions evolve at relative rates of 1.0, 0.8, and 2.7.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a site is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

Usage

Here is a sample session with fdnaml


% fdnaml -printdata -ncategories 2 -categories "1111112222222" -rate "1.0 2.0" -gammatype h -nhmmcategories 5 -hmmrates "0.264 1.413 3.596 7.086 12.641" -hmmprobabilities "0.522 0.399 0.076 0.0036 0.000023" -lambda 1.5 -weight "0111111111110" 
Estimate nucleotide phylogeny by maximum likelihood
Input (aligned) nucleotide sequence set(s): dnaml.dat
Phylip tree file (optional): 
Phylip dnaml program output file [dnaml.fdnaml]: 


 mulsets: false
 datasets : 1
 rctgry : true
 gama : false
 invar : false
 numwts : 1
 numseqs : 1

 ctgry: true
 categs : 2
 rcategs : 5
 auto_: false
 freqsfrom : true
 global : false
 hypstate : false
 improve : false
 invar : false
 jumble : false
 njumble : 1
 lngths : false
 lambda : 1.000000
 cv : 1.000000
 freqa : 0.000000
 freqc : 0.000000
 freqg : 0.000000
 freqt : 0.000000
 outgrno : 1
 outgropt: false
 trout : true
 ttratio : 2.000000
 ttr : false
 usertree : false
 weights: true
 printdata : true
 progress : true
 treeprint: true
 interleaved : false 


Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Output written to file "dnaml.fdnaml"

Tree also written onto file "dnaml.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Example 2


% fdnaml -printdata -njumble 3 -seed 3  
Estimate nucleotide phylogeny by maximum likelihood
Input (aligned) nucleotide sequence set(s): dnaml.dat
Phylip tree file (optional): 
Phylip dnaml program output file [dnaml.fdnaml]: 


 mulsets: false
 datasets : 1
 rctgry : false
 gama : false
 invar : false
 numwts : 0
 numseqs : 1

 ctgry: false
 categs : 1
 rcategs : 1
 auto_: false
 freqsfrom : true
 global : false
 hypstate : false
 improve : false
 invar : false
 jumble : true
 njumble : 3
 lngths : false
 lambda : 1.000000
 cv : 1.000000
 freqa : 0.000000
 freqc : 0.000000
 freqg : 0.000000
 freqt : 0.000000
 outgrno : 1
 outgropt: false
 trout : true
 ttratio : 2.000000
 ttr : false
 usertree : false
 weights: false
 printdata : true
 progress : true
 treeprint: true
 interleaved : false 


Adding species:
   1. Delta     
   2. Epsilon   
   3. Alpha     
   4. Beta      
   5. Gamma     

Adding species:
   1. Beta      
   2. Epsilon   
   3. Delta     
   4. Alpha     
   5. Gamma     

Adding species:
   1. Epsilon   
   2. Alpha     
   3. Gamma     
   4. Delta     
   5. Beta      

Output written to file "dnaml.fdnaml"

Tree also written onto file "dnaml.treefile"

Done.


Go to the output files for this example

Command line arguments

Estimate nucleotide phylogeny by maximum likelihood
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fdnaml] Phylip dnaml program output file

   Additional (Optional) qualifiers (* if not always prompted):
   -ncategories        integer    [1] Number of substitution rate categories
                                  (Integer from 1 to 9)
*  -rate               array      Rate for each category
*  -categories         properties File of substitution rate categories
   -weights            properties Weights file
*  -lengths            boolean    [N] Use branch lengths from user trees
   -ttratio            float      [2.0] Transition/transversion ratio (Number
                                  0.001 or more)
   -[no]freqsfrom      toggle     [Y] Use empirical base frequencies from
                                  seqeunce input
*  -basefreq           array      [0.25 0.25 0.25 0.25] Base frequencies for A
                                  C G T/U (use blanks to separate)
   -gammatype          menu       [Constant rate] Rate variation among sites
                                  (Values: g (Gamma distributed rates); i
                                  (Gamma+invariant sites); h (User defined HMM
                                  of rates); n (Constant rate))
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ngammacat          integer    [1] Number of categories (1-9) (Integer from
                                  1 to 9)
*  -invarcoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ninvarcat          integer    [1] Number of categories (1-9) including one
                                  for invariant sites (Integer from 1 to 9)
*  -invarfrac          float      [0.0] Fraction of invariant sites (Number
                                  from 0.000 to 1.000)
*  -nhmmcategories     integer    [1] Number of HMM rate categories (Integer
                                  from 1 to 9)
*  -hmmrates           array      [1.0] HMM category rates
*  -hmmprobabilities   array      [1.0] Probability for each HMM category
*  -adjsite            boolean    [N] Rates at adjacent sites correlated
*  -lambda             float      [1.0] Mean block length of sites having the
                                  same rate (Number 1.000 or more)
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
*  -global             boolean    [N] Global rearrangements
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]rough          boolean    [Y] Speedier but rougher analysis
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fdnaml] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -hypstate           boolean    [N] Reconstruct hypothetical sequence

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip dnaml program output file Output file <*>.fdnaml
Additional (Optional) qualifiers
-ncategories integer Number of substitution rate categories Integer from 1 to 9 1
-rate array Rate for each category List of floating point numbers  
-categories properties File of substitution rate categories Property value(s)  
-weights properties Weights file Property value(s)  
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-ttratio float Transition/transversion ratio Number 0.001 or more 2.0
-[no]freqsfrom toggle Use empirical base frequencies from seqeunce input Toggle value Yes/No Yes
-basefreq array Base frequencies for A C G T/U (use blanks to separate) List of floating point numbers 0.25 0.25 0.25 0.25
-gammatype list Rate variation among sites
g (Gamma distributed rates)
i (Gamma+invariant sites)
h (User defined HMM of rates)
n (Constant rate)
Constant rate
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ngammacat integer Number of categories (1-9) Integer from 1 to 9 1
-invarcoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ninvarcat integer Number of categories (1-9) including one for invariant sites Integer from 1 to 9 1
-invarfrac float Fraction of invariant sites Number from 0.000 to 1.000 0.0
-nhmmcategories integer Number of HMM rate categories Integer from 1 to 9 1
-hmmrates array HMM category rates List of floating point numbers 1.0
-hmmprobabilities array Probability for each HMM category List of floating point numbers 1.0
-adjsite boolean Rates at adjacent sites correlated Boolean value Yes/No No
-lambda float Mean block length of sites having the same rate Number 1.000 or more 1.0
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-global boolean Global rearrangements Boolean value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]rough boolean Speedier but rougher analysis Boolean value Yes/No Yes
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdnaml
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-hypstate boolean Reconstruct hypothetical sequence Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnaml reads any normal sequence USAs.

Input files for usage example

File: dnaml.dat

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC

Output file format

fdnaml output starts by giving the number of species, the number of sites, and the base frequencies for A, C, G, and T that have been specified. It then prints out the transition/transversion ratio that was specified or used by default. It also uses the base frequencies to compute the actual transition/transversion ratio implied by the parameter.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of sites is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them out, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. Note that the trees printed out have a trifurcation at the base. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen.

A table is printed showing the length of each tree segment (in units of expected nucleotide substitutions per site), as well as (very) rough confidence limits on their lengths. If a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow.

In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant. YOU ARE WARNED AGAINST TAKING IT TOO SERIOUSLY. If you want to get a better likelihood curve for a branch length you can do multiple runs with different prespecified lengths for that branch, as discussed above in the discussion of the L option.

One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that).

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the expected transition/transversion ratio to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different sites, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across sites. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across sites are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring sites (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same site and overlie or even reverse each other. The branch length estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length. Note that versions 2.7 and earlier of this program printed out the branch lengths in terms of expected probability of change, so that they were scaled differently.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what site categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each site which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). In that table, if a site has a base which accounts for more than 95% of the likelihood, it is printed in capital letters (A rather than a). If the best nucleotide accounts for less than 50% of the likelihood, the program prints out an ambiguity code (such as M for "A or C") for the set of nucleotides which, taken together, account for more half of the likelihood. The ambiguity codes are listed in the sequence programs documentation file. One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed nucleotides are based on only the single assignment of rates to sites which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

Output files for usage example

File: dnaml.fdnaml


Nucleic acid sequence Maximum Likelihood method, version 3.69.650

 5 species,  13  sites

    Site categories are:

             1111112222 222


    Sites are weighted as follows:

             01111 11111 110


Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         AAGGTCGCCA AAC
Gamma        CATTTCGTCA CAA
Delta        GGTATTTCGG CCT
Epsilon      GGGATCTCGG CCC



Empirical Base Frequencies:

   A       0.23636
   C       0.29091
   G       0.25455
  T(U)     0.21818


Transition/transversion ratio =   2.000000


State in HMM    Rate of change    Probability

        1           0.264            0.522
        2           1.413            0.399
        3           3.596            0.076
        4           7.086            0.0036
        5          12.641            0.000023



Site category   Rate of change

        1           1.000
        2           2.000



                                                              +Epsilon   
     +--------------------------------------------------------3  
  +--2                                                        +-Delta     
  |  |  
  |  +Beta      
  |  
  1------Gamma     
  |  
  +-Alpha     


remember: this is an unrooted tree!

Ln Likelihood =   -57.87892

 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------

     1          Alpha             0.26766     (     zero,     0.80513) *
     1             2              0.04687     (     zero,     0.48388)
     2             3              7.59821     (     zero,    22.01485) **
     3          Epsilon           0.00006     (     zero,     0.46205)
     3          Delta             0.27319     (     zero,     0.73380) **
     2          Beta              0.00006     (     zero,     0.44052)
     1          Gamma             0.95677     (     zero,     2.46186) **

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01

Combination of categories that contributes the most to the likelihood:

             1132121111 211

Most probable category at each site if > 0.95 probability ("." otherwise)

             .......... ...

File: dnaml.treefile

(((Epsilon:0.00006,Delta:0.27319):7.59821,Beta:0.00006):0.04687,
Gamma:0.95677,Alpha:0.26766);

Output files for usage example 2

File: dnaml.fdnaml


Nucleic acid sequence Maximum Likelihood method, version 3.69.650

 5 species,  13  sites

Name            Sequences
----            ---------

Alpha        AACGTGGCCA AAT
Beta         AAGGTCGCCA AAC
Gamma        CATTTCGTCA CAA
Delta        GGTATTTCGG CCT
Epsilon      GGGATCTCGG CCC



Empirical Base Frequencies:

   A       0.24615
   C       0.29231
   G       0.24615
  T(U)     0.21538


Transition/transversion ratio =   2.000000


                                                  +Epsilon   
     +--------------------------------------------1  
  +--2                                            +--------Delta     
  |  |  
  |  +Beta      
  |  
  3------------------------------Gamma     
  |  
  +-----Alpha     


remember: this is an unrooted tree!

Ln Likelihood =   -72.25088

 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------

     3          Alpha             0.20745     (     zero,     0.56578)
     3             2              0.09408     (     zero,     0.40912)
     2             1              1.51296     (     zero,     3.31130) **
     1          Epsilon           0.00006     (     zero,     0.34299)
     1          Delta             0.28137     (     zero,     0.62654) **
     2          Beta              0.00006     (     zero,     0.32900)
     3          Gamma             1.01651     (     zero,     2.33178) **

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01


File: dnaml.treefile

(((Epsilon:0.00006,Delta:0.28137):1.51296,Beta:0.00006):0.09408,
Gamma:1.01651,Alpha:0.20745);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/ffreqboot.html0000664000175000017500000010635312171064331016232 00000000000000 EMBOSS: ffreqboot
ffreqboot

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Bootstrapped genetic frequencies algorithm

Description

Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development

Algorithm

FFREQBOOT is a gene frequency specific version of SEQBOOT.

SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format.

To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis.

This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does.

If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input.

The resampling methods available are:

  • The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.
  • The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values.
  • Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Künsch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3.
  • Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters.
  • Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters.
  • Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained.
  • Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species).
  • Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test).
  • Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species.
  • Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats:
    Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there.
    MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects.
    BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format

Usage

Here is a sample session with ffreqboot


% ffreqboot -seed 3 
Bootstrapped genetic frequencies algorithm
Input file: freqboot.dat
Phylip seqboot_freq program output file [freqboot.ffreqboot]: 


completed replicate number   10
completed replicate number   20
completed replicate number   30
completed replicate number   40
completed replicate number   50
completed replicate number   60
completed replicate number   70
completed replicate number   80
completed replicate number   90
completed replicate number  100

Output written to file "freqboot.ffreqboot"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Bootstrapped genetic frequencies algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            frequencies (no help text) frequencies value
  [-outfile]           outfile    [*.ffreqboot] Phylip seqboot_freq program
                                  output file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -test               menu       [b] Choose test (Values: b (Bootstrap); j
                                  (Jackknife); c (Permute species for each
                                  character); o (Permute character order); s
                                  (Permute within species); r (Rewrite data))
*  -regular            toggle     [N] Altered sampling fraction
*  -fracsample         float      [100.0] Samples as percentage of sites
                                  (Number from 0.100 to 100.000)
*  -blocksize          integer    [1] Block size for bootstraping (Integer 1
                                  or more)
*  -reps               integer    [100] How many replicates (Integer 1 or
                                  more)
*  -justweights        menu       [d] Write out datasets or just weights
                                  (Values: d (Datasets); w (Weights))
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -printdata          boolean    [N] Print out the data at start of run
*  -[no]dotdiff        boolean    [Y] Use dot-differencing
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
frequencies (no help text) frequencies value Frequency value(s)  
[-outfile]
(Parameter 2)
outfile Phylip seqboot_freq program output file Output file <*>.ffreqboot
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-test list Choose test
b (Bootstrap)
j (Jackknife)
c (Permute species for each character)
o (Permute character order)
s (Permute within species)
r (Rewrite data)
b
-regular toggle Altered sampling fraction Toggle value Yes/No No
-fracsample float Samples as percentage of sites Number from 0.100 to 100.000 100.0
-blocksize integer Block size for bootstraping Integer 1 or more 1
-reps integer How many replicates Integer 1 or more 100
-justweights list Write out datasets or just weights
d (Datasets)
w (Weights)
d
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-printdata boolean Print out the data at start of run Boolean value Yes/No No
-[no]dotdiff boolean Use dot-differencing Boolean value Yes/No Yes
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

ffreqboot data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs.

Input files for usage example

File: freqboot.dat

    5    10
2 2 2 2 2 2 2 2 2 2
European   0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205
0.8055 0.5043
African    0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600
0.7582 0.6207
Chinese    0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726
0.7482 0.7334
American   0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000
0.8086 0.8636
Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396
0.9097 0.2976

Output file format

ffreqboot output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters.

The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group.

The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor.

The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file.

Output files for usage example

File: freqboot.ffreqboot

    5    10
   2   2   2   2   2   2   2   2   2   2
European   0.28680 0.71320 0.56840 0.43160 0.56840 0.43160 0.44220 0.55780
               0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.72850 0.27150
               0.72850 0.27150 0.02050 0.97950
African    0.13560 0.86440 0.48400 0.51600 0.48400 0.51600 0.06020 0.93980
               0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.96750 0.03250
               0.96750 0.03250 0.06000 0.94000
Chinese    0.16280 0.83720 0.59580 0.40420 0.59580 0.40420 0.72980 0.27020
               1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.79860 0.20140
               0.79860 0.20140 0.07260 0.92740
American   0.01440 0.98560 0.69900 0.30100 0.69900 0.30100 0.32800 0.67200
               0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.86030 0.13970
               0.86030 0.13970 0.00000 1.00000
Australian 0.12110 0.87890 0.22740 0.77260 0.22740 0.77260 0.58210 0.41790
               1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.90000 0.10000
               0.90000 0.10000 0.03960 0.96040
    5    10
   2   2   2   2   2   2   2   2   2   2
European   0.28680 0.71320 0.56840 0.43160 0.44220 0.55780 0.42860 0.57140
               0.38280 0.61720 0.38280 0.61720 0.38280 0.61720 0.02050 0.97950
               0.02050 0.97950 0.80550 0.19450
African    0.13560 0.86440 0.48400 0.51600 0.06020 0.93980 0.03970 0.96030
               0.59770 0.40230 0.59770 0.40230 0.59770 0.40230 0.06000 0.94000
               0.06000 0.94000 0.75820 0.24180
Chinese    0.16280 0.83720 0.59580 0.40420 0.72980 0.27020 1.00000 0.00000
               0.38110 0.61890 0.38110 0.61890 0.38110 0.61890 0.07260 0.92740
               0.07260 0.92740 0.74820 0.25180
American   0.01440 0.98560 0.69900 0.30100 0.32800 0.67200 0.74210 0.25790
               0.66060 0.33940 0.66060 0.33940 0.66060 0.33940 0.00000 1.00000
               0.00000 1.00000 0.80860 0.19140
Australian 0.12110 0.87890 0.22740 0.77260 0.58210 0.41790 1.00000 0.00000
               0.20180 0.79820 0.20180 0.79820 0.20180 0.79820 0.03960 0.96040
               0.03960 0.96040 0.90970 0.09030
    5    10
   2   2   2   2   2   2   2   2   2   2
European   0.28680 0.71320 0.28680 0.71320 0.44220 0.55780 0.42860 0.57140
               0.42860 0.57140 0.38280 0.61720 0.72850 0.27150 0.80550 0.19450
               0.80550 0.19450 0.50430 0.49570
African    0.13560 0.86440 0.13560 0.86440 0.06020 0.93980 0.03970 0.96030
               0.03970 0.96030 0.59770 0.40230 0.96750 0.03250 0.75820 0.24180
               0.75820 0.24180 0.62070 0.37930
Chinese    0.16280 0.83720 0.16280 0.83720 0.72980 0.27020 1.00000 0.00000
               1.00000 0.00000 0.38110 0.61890 0.79860 0.20140 0.74820 0.25180
               0.74820 0.25180 0.73340 0.26660
American   0.01440 0.98560 0.01440 0.98560 0.32800 0.67200 0.74210 0.25790
               0.74210 0.25790 0.66060 0.33940 0.86030 0.13970 0.80860 0.19140
               0.80860 0.19140 0.86360 0.13640
Australian 0.12110 0.87890 0.12110 0.87890 0.58210 0.41790 1.00000 0.00000
               1.00000 0.00000 0.20180 0.79820 0.90000 0.10000 0.90970 0.09030


  [Part of this file has been deleted for brevity]

    5    10
   2   2   2   2   2   2   2   2   2   2
European   0.28680 0.71320 0.56840 0.43160 0.56840 0.43160 0.56840 0.43160
               0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.80550 0.19450
               0.50430 0.49570 0.50430 0.49570
African    0.13560 0.86440 0.48400 0.51600 0.48400 0.51600 0.48400 0.51600
               0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.75820 0.24180
               0.62070 0.37930 0.62070 0.37930
Chinese    0.16280 0.83720 0.59580 0.40420 0.59580 0.40420 0.59580 0.40420
               1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.74820 0.25180
               0.73340 0.26660 0.73340 0.26660
American   0.01440 0.98560 0.69900 0.30100 0.69900 0.30100 0.69900 0.30100
               0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.80860 0.19140
               0.86360 0.13640 0.86360 0.13640
Australian 0.12110 0.87890 0.22740 0.77260 0.22740 0.77260 0.22740 0.77260
               1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.90970 0.09030
               0.29760 0.70240 0.29760 0.70240
    5    10
   2   2   2   2   2   2   2   2   2   2
European   0.28680 0.71320 0.28680 0.71320 0.56840 0.43160 0.56840 0.43160
               0.44220 0.55780 0.42860 0.57140 0.42860 0.57140 0.72850 0.27150
               0.63860 0.36140 0.02050 0.97950
African    0.13560 0.86440 0.13560 0.86440 0.48400 0.51600 0.48400 0.51600
               0.06020 0.93980 0.03970 0.96030 0.03970 0.96030 0.96750 0.03250
               0.95110 0.04890 0.06000 0.94000
Chinese    0.16280 0.83720 0.16280 0.83720 0.59580 0.40420 0.59580 0.40420
               0.72980 0.27020 1.00000 0.00000 1.00000 0.00000 0.79860 0.20140
               0.77820 0.22180 0.07260 0.92740
American   0.01440 0.98560 0.01440 0.98560 0.69900 0.30100 0.69900 0.30100
               0.32800 0.67200 0.74210 0.25790 0.74210 0.25790 0.86030 0.13970
               0.79240 0.20760 0.00000 1.00000
Australian 0.12110 0.87890 0.12110 0.87890 0.22740 0.77260 0.22740 0.77260
               0.58210 0.41790 1.00000 0.00000 1.00000 0.00000 0.90000 0.10000
               0.98370 0.01630 0.03960 0.96040
    5    10
   2   2   2   2   2   2   2   2   2   2
European   0.56840 0.43160 0.56840 0.43160 0.44220 0.55780 0.44220 0.55780
               0.42860 0.57140 0.38280 0.61720 0.38280 0.61720 0.72850 0.27150
               0.63860 0.36140 0.80550 0.19450
African    0.48400 0.51600 0.48400 0.51600 0.06020 0.93980 0.06020 0.93980
               0.03970 0.96030 0.59770 0.40230 0.59770 0.40230 0.96750 0.03250
               0.95110 0.04890 0.75820 0.24180
Chinese    0.59580 0.40420 0.59580 0.40420 0.72980 0.27020 0.72980 0.27020
               1.00000 0.00000 0.38110 0.61890 0.38110 0.61890 0.79860 0.20140
               0.77820 0.22180 0.74820 0.25180
American   0.69900 0.30100 0.69900 0.30100 0.32800 0.67200 0.32800 0.67200
               0.74210 0.25790 0.66060 0.33940 0.66060 0.33940 0.86030 0.13970
               0.79240 0.20760 0.80860 0.19140
Australian 0.22740 0.77260 0.22740 0.77260 0.58210 0.41790 0.58210 0.41790
               1.00000 0.00000 0.20180 0.79820 0.20180 0.79820 0.90000 0.10000
               0.98370 0.01630 0.90970 0.09030

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdnadist.html0000664000175000017500000011064012171064331016031 00000000000000 EMBOSS: fdnadist
fdnadist

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Nucleic acid sequence distance matrix program

Description

Computes four different distances between species from nucleic acid sequences. The distances can then be used in the distance matrix programs. The distances are the Jukes-Cantor formula, one based on Kimura's 2- parameter method, the F84 model used in DNAML, and the LogDet distance. The distances can also be corrected for gamma-distributed and gamma-plus-invariant-sites-distributed rates of change in different sites. Rates of evolution can vary among sites in a prespecified way, and also according to a Hidden Markov model. The program can also make a table of percentage similarity among sequences.

Algorithm

This program uses nucleotide sequences to compute a distance matrix, under four different models of nucleotide substitution. It can also compute a table of similarity between the nucleotide sequences. The distance for each pair of species estimates the total branch length between the two species, and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR. This is an alternative to use of the sequence data itself in the maximum likelihood program DNAML or the parsimony program DNAPARS.

The program reads in nucleotide sequences and writes an output file containing the distance matrix, or else a table of similarity between sequences. The four models of nucleotide substitution are those of Jukes and Cantor (1969), Kimura (1980), the F84 model (Kishino and Hasegawa, 1989; Felsenstein and Churchill, 1996), and the model underlying the LogDet distance (Barry and Hartigan, 1987; Lake, 1994; Steel, 1994; Lockhart et. al., 1994). All except the LogDet distance can be made to allow for for unequal rates of substitution at different sites, as Jin and Nei (1990) did for the Jukes-Cantor model. The program correctly takes into account a variety of sequence ambiguities, although in cases where they exist it can be slow.

Jukes and Cantor's (1969) model assumes that there is independent change at all sites, with equal probability. Whether a base changes is independent of its identity, and when it changes there is an equal probability of ending up with each of the other three bases. Thus the transition probability matrix (this is a technical term from probability theory and has nothing to do with transitions as opposed to transversions) for a short period of time dt is:

              To:    A        G        C        T
                   ---------------------------------
               A  | 1-3a      a         a       a
       From:   G  |  a       1-3a       a       a
               C  |  a        a        1-3a     a
               T  |  a        a         a      1-3a

where a is u dt, the product of the rate of substitution per unit time (u) and the length dt of the time interval. For longer periods of time this implies that the probability that two sequences will differ at a given site is:

      p = 3/4 ( 1 - e- 4/3 u t) 

and hence that if we observe p, we can compute an estimate of the branch length ut by inverting this to get

     ut = - 3/4 loge ( 1 - 4/3 p ) 

The Kimura "2-parameter" model is almost as symmetric as this, but allows for a difference between transition and transversion rates. Its transition probability matrix for a short interval of time is:


              To:     A        G        C        T
                   ---------------------------------
               A  | 1-a-2b     a         b       b
       From:   G  |   a      1-a-2b      b       b
               C  |   b        b       1-a-2b    a
               T  |   b        b         a     1-a-2b

where a is u dt, the product of the rate of transitions per unit time and dt is the length dt of the time interval, and b is v dt, the product of half the rate of transversions (i.e., the rate of a specific transversion) and the length dt of the time interval.

The F84 model incorporates different rates of transition and transversion, but also allowing for different frequencies of the four nucleotides. It is the model which is used in DNAML, the maximum likelihood nucelotide sequence phylogenies program in this package. You will find the model described in the document for that program. The transition probabilities for this model are given by Kishino and Hasegawa (1989), and further explained in a paper by me and Gary Churchill (1996).

The LogDet distance allows a fairly general model of substitution. It computes the distance from the determinant of the empirically observed matrix of joint probabilities of nucleotides in the two species. An explanation of it is available in the chapter by Swofford et, al. (1996).

The first three models are closely related. The DNAML model reduces to Kimura's two-parameter model if we assume that the equilibrium frequencies of the four bases are equal. The Jukes-Cantor model in turn is a special case of the Kimura 2-parameter model where a = b. Thus each model is a special case of the ones that follow it, Jukes-Cantor being a special case of both of the others.

The Jin and Nei (1990) correction for variation in rate of evolution from site to site can be adapted to all of the first three models. It assumes that the rate of substitution varies from site to site according to a gamma distribution, with a coefficient of variation that is specified by the user. The user is asked for it when choosing this option in the menu.

Each distance that is calculated is an estimate, from that particular pair of species, of the divergence time between those two species. For the Jukes- Cantor model, the estimate is computed using the formula for ut given above, as long as the nucleotide symbols in the two sequences are all either A, C, G, T, U, N, X, ?, or - (the latter four indicate a deletion or an unknown nucleotide. This estimate is a maximum likelihood estimate for that model. For the Kimura 2-parameter model, with only these nucleotide symbols, formulas special to that estimate are also computed. These are also, in effect, computing the maximum likelihood estimate for that model. In the Kimura case it depends on the observed sequences only through the sequence length and the observed number of transition and transversion differences between those two sequences. The calculation in that case is a maximum likelihood estimate and will differ somewhat from the estimate obtained from the formulas in Kimura's original paper. That formula was also a maximum likelihood estimate, but with the transition/transversion ratio estimated empirically, separately for each pair of sequences. In the present case, one overall preset transition/transversion ratio is used which makes the computations harder but achieves greater consistency between different comparisons.

For the F84 model, or for any of the models where one or both sequences contain at least one of the other ambiguity codons such as Y, R, etc., a maximum likelihood calculation is also done using code which was originally written for DNAML. Its disadvantage is that it is slow. The resulting distance is in effect a maximum likelihood estimate of the divergence time (total branch length between) the two sequences. However the present program will be much faster than versions earlier than 3.5, because I have speeded up the iterations.

The LogDet model computes the distance from the determinant of the matrix of co-occurrence of nucleotides in the two species, according to the formula

   D  = - 1/4(loge(|F|) - 1/2loge(fA1fC1fG1fT1fA2fC2fG2fT2))

Where F is a matrix whose (i,j) element is the fraction of sites at which base i occurs in one species and base j occurs in the other. fji is the fraction of sites at which species i has base j. The LogDet distance cannot cope with ambiguity codes. It must have completely defined sequences. One limitation of the LogDet distance is that it may be infinite sometimes, if there are too many changes between certain pairs of nucleotides. This can be particularly noticeable with distances computed from bootstrapped sequences. Note that there is an assumption that we are looking at all sites, including those that have not changed at all. It is important not to restrict attention to some sites based on whether or not they have changed; doing that would bias the distances by making them too large, and that in turn would cause the distances to misinterpret the meaning of those sites that had changed.

For all of these distance methods, the program allows us to specify that "third position" bases have a different rate of substitution than first and second positions, that introns have a different rate than exons, and so on. The Categories option which does this allows us to make up to 9 categories of sites and specify different rates of change for them.

In addition to the four distance calculations, the program can also compute a table of similarities between nucleotide sequences. These values are the fractions of sites identical between the sequences. The diagonal values are 1.0000. No attempt is made to count similarity of nonidentical nucleotides, so that no credit is given for having (for example) different purines at corresponding sites in the two sequences. This option has been requested by many users, who need it for descriptive purposes. It is not intended that the table be used for inferring the tree.

Usage

Here is a sample session with fdnadist


% fdnadist 
Nucleic acid sequence distance matrix program
Input (aligned) nucleotide sequence set(s): dnadist.dat
Distance methods
         f : F84 distance model
         k : Kimura 2-parameter distance
         j : Jukes-Cantor distance
         l : LogDet distance
         s : Similarity table
Choose the method to use [F84 distance model]: 
Phylip distance matrix output file [dnadist.fdnadist]: 

Distances calculated for species
    Alpha        ....
    Beta         ...
    Gamma        ..
    Delta        .
    Epsilon   

Distances written to file "dnadist.fdnadist"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Nucleic acid sequence distance matrix program
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
   -method             menu       [F84 distance model] Choose the method to
                                  use (Values: f (F84 distance model); k
                                  (Kimura 2-parameter distance); j
                                  (Jukes-Cantor distance); l (LogDet
                                  distance); s (Similarity table))
  [-outfile]           outfile    [*.fdnadist] Phylip distance matrix output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
*  -gammatype          menu       [No distribution parameters used] Gamma
                                  distribution (Values: g (Gamma distributed
                                  rates); i (Gamma+invariant sites); n (No
                                  distribution parameters used))
*  -ncategories        integer    [1] Number of substitution rate categories
                                  (Integer from 1 to 9)
*  -rate               array      [1.0] Category rates
*  -categories         properties File of substitution rate categories
   -weights            properties Weights file
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -invarfrac          float      [0.0] Fraction of invariant sites (Number
                                  from 0.000 to 1.000)
*  -ttratio            float      [2.0] Transition/transversion ratio (Number
                                  0.001 or more)
*  -[no]freqsfrom      toggle     [Y] Use empirical base frequencies from
                                  seqeunce input
*  -basefreq           array      [0.25 0.25 0.25 0.25] Base frequencies for A
                                  C G T/U (use blanks to separate)
   -lower              boolean    [N] Output as a lower triangular distance
                                  matrix
   -humanreadable      boolean    [@($(method)==s?Y:N)] Output as a
                                  human-readable distance matrix
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
-method list Choose the method to use
f (F84 distance model)
k (Kimura 2-parameter distance)
j (Jukes-Cantor distance)
l (LogDet distance)
s (Similarity table)
F84 distance model
[-outfile]
(Parameter 2)
outfile Phylip distance matrix output file Output file <*>.fdnadist
Additional (Optional) qualifiers
-gammatype list Gamma distribution
g (Gamma distributed rates)
i (Gamma+invariant sites)
n (No distribution parameters used)
No distribution parameters used
-ncategories integer Number of substitution rate categories Integer from 1 to 9 1
-rate array Category rates List of floating point numbers 1.0
-categories properties File of substitution rate categories Property value(s)  
-weights properties Weights file Property value(s)  
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-invarfrac float Fraction of invariant sites Number from 0.000 to 1.000 0.0
-ttratio float Transition/transversion ratio Number 0.001 or more 2.0
-[no]freqsfrom toggle Use empirical base frequencies from seqeunce input Toggle value Yes/No Yes
-basefreq array Base frequencies for A C G T/U (use blanks to separate) List of floating point numbers 0.25 0.25 0.25 0.25
-lower boolean Output as a lower triangular distance matrix Boolean value Yes/No No
-humanreadable boolean Output as a human-readable distance matrix Boolean value Yes/No @($(method)==s?Y:N)
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdnadist reads any normal sequence USAs.

Input files for usage example

File: dnadist.dat

   5   13
Alpha     AACGTGGCCACAT
Beta      AAGGTCGCCACAC
Gamma     CAGTTCGCCACAA
Delta     GAGATTTCCGCCT
Epsilon   GAGATCTCCGCCC

Output file format

fdnadist output contains on its first line the number of species. The distance matrix is then printed in standard form, with each species starting on a new line with the species name, followed by the distances to the species in order. These continue onto a new line after every nine distances. If the L option is used, the matrix or distances is in lower triangular form, so that only the distances to the other species that precede each species are printed. Otherwise the distance matrix is square with zero distances on the diagonal. In general the format of the distance matrix is such that it can serve as input to any of the distance matrix programs.

If the option to print out the data is selected, the output file will precede the data by more complete information on the input and the menu selections. The output file begins by giving the number of species and the number of characters, and the identity of the distance measure that is being used.

If the C (Categories) option is used a table of the relative rates of expected substitution at each category of sites is printed, and a listing of the categories each site is in.

There will then follow the equilibrium frequencies of the four bases. If the Jukes-Cantor or Kimura distances are used, these will necessarily be 0.25 : 0.25 : 0.25 : 0.25. The output then shows the transition/transversion ratio that was specified or used by default. In the case of the Jukes-Cantor distance this will always be 0.5. The transition-transversion parameter (as opposed to the ratio) is also printed out: this is used within the program and can be ignored. There then follow the data sequences, with the base sequences printed in groups of ten bases along the lines of the Genbank and EMBL formats.

The distances printed out are scaled in terms of expected numbers of substitutions, counting both transitions and transversions but not replacements of a base by itself, and scaled so that the average rate of change, averaged over all sites analyzed, is set to 1.0 if there are multiple categories of sites. This means that whether or not there are multiple categories of sites, the expected fraction of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes may occur in the same site and overlie or even reverse each other. The branch lengths estimates here are in terms of the expected underlying numbers of changes. That means that a branch of length 0.26 is 26 times as long as one which would show a 1% difference between the nucleotide sequences at the beginning and end of the branch. But we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

One problem that can arise is that two or more of the species can be so dissimilar that the distance between them would have to be infinite, as the likelihood rises indefinitely as the estimated divergence time increases. For example, with the Jukes-Cantor model, if the two sequences differ in 75% or more of their positions then the estimate of dovergence time would be infinite. Since there is no way to represent an infinite distance in the output file, the program regards this as an error, issues an error message indicating which pair of species are causing the problem, and stops. It might be that, had it continued running, it would have also run into the same problem with other pairs of species. If the Kimura distance is being used there may be no error message; the program may simply give a large distance value (it is iterating towards infinity and the value is just where the iteration stopped). Likewise some maximum likelihood estimates may also become large for the same reason (the sequences showing more divergence than is expected even with infinite branch length). I hope in the future to add more warning messages that would alert the user the this.

If the similarity table is selected, the table that is produced is not in a format that can be used as input to the distance matrix programs. it has a heading, and the species names are also put at the tops of the columns of the table (or rather, the first 8 characters of each species name is there, the other two characters omitted to save space). There is not an option to put the table into a format that can be read by the distance matrix programs, nor is there one to make it into a table of fractions of difference by subtracting the similarity values from 1. This is done deliberately to make it more difficult for the use to use these values to construct trees. The similarity values are not corrected for multiple changes, and their use to construct trees (even after converting them to fractions of difference) would be wrong, as it would lead to severe conflict between the distant pairs of sequences and the close pairs of sequences.

Output files for usage example

File: dnadist.fdnadist

    5
Alpha      0.000000 0.303900 0.857544 1.158927 1.542899
Beta       0.303900 0.000000 0.339727 0.913522 0.619671
Gamma      0.857544 0.339727 0.000000 1.631729 1.293713
Delta      1.158927 0.913522 1.631729 0.000000 0.165882
Epsilon    1.542899 0.619671 1.293713 0.165882 0.000000

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/index.html0000664000175000017500000001323111616231345015346 00000000000000 PHYLIPNEW Application

PHYLIPNEW Applications


The PHYLIPNEW programs are EMBOSS conversions of the programs in Joe Felsenstein's PHYLIP package, version 3.69.

The PHYLIPNEW versions of these programs all have the prefix "f" to distinguish them from the original programs.

Applications in the current phylipnew release

Program name Description
fclique Largest clique program
fconsense Majority-rule and strict consensus tree
fcontml Gene frequency and continuous character maximum likelihood
fcontrast Continuous character contrasts
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdollop Dollo and polymorphism parsimony algorithm
fdolmove Interactive Dollo or polymorphism parsimony
fdolpenny Penny algorithm Dollo or polymorphism
fdrawgram Plots a cladogram- or phenogram-like rooted tree diagram
fdrawtree Plots an unrooted tree diagram
ffactor Multistate to binary recoding program
ffitch Fitch-Margoliash and least-squares distance methods
ffreqboot Bootstrapped genetic frequencies algorithm
fgendist Compute genetic distances from gene frequencies
fkitsch Fitch-Margoliash method with contemporary tips
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fneighbor Phylogenies from distance matrix by N-J or UPGMA method
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fretree Interactive tree rearrangement
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm
ftreedist Calculate distances between trees
ftreedistpair Calculate distance between two sets of trees

PHYLIPNEW-3.69.650/emboss_doc/html/fproml.html0000664000175000017500000012505212171064331015537 00000000000000 EMBOSS: fproml
fproml

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Protein phylogeny by maximum likelihood

Description

Estimates phylogenies from protein amino acid sequences by maximum likelihood. The PAM, JTT, or PMB models can be employed, and also use of a Hidden Markov model of rates, with the program inferring which sites have which rates. This also allows gamma-distribution and gamma-plus-invariant sites distributions of rates across sites. It also allows different rates of change at known sites.

Algorithm

This program implements the maximum likelihood method for protein amino acid sequences. It uses the either the Jones-Taylor-Thornton or the Dayhoff probability model of change between amino acids. The assumptions of these present models are:
  1. Each position in the sequence evolves independently.
  2. Different lineages evolve independently.
  3. Each position undergoes substitution at an expected rate which is chosen from a series of rates (each with a probability of occurrence) which we specify.
  4. All relevant positions are included in the sequence, not just those that have changed or those that are "phylogenetically informative".
  5. The probabilities of change between amino acids are given by the model of Jones, Taylor, and Thornton (1992), the PMB model of Veerassamy, Smith and Tillier (2004), or the PAM model of Dayhoff (Dayhoff and Eck, 1968; Dayhoff et. al., 1979).

Note the assumption that we are looking at all positions, including those that have not changed at all. It is important not to restrict attention to some positions based on whether or not they have changed; doing that would bias branch lengths by making them too long, and that in turn would cause the method to misinterpret the meaning of those positions that had changed.

This program uses a Hidden Markov Model (HMM) method of inferring different rates of evolution at different amino acid positions. This was described in a paper by me and Gary Churchill (1996). It allows us to specify to the program that there will be a number of different possible evolutionary rates, what the prior probabilities of occurrence of each is, and what the average length of a patch of positions all having the same rate. The rates can also be chosen by the program to approximate a Gamma distribution of rates, or a Gamma distribution plus a class of invariant positions. The program computes the the likelihood by summing it over all possible assignments of rates to positions, weighting each by its prior probability of occurrence.

For example, if we have used the C and A options (described below) to specify that there are three possible rates of evolution, 1.0, 2.4, and 0.0, that the prior probabilities of a position having these rates are 0.4, 0.3, and 0.3, and that the average patch length (number of consecutive positions with the same rate) is 2.0, the program will sum the likelihood over all possibilities, but giving less weight to those that (say) assign all positions to rate 2.4, or that fail to have consecutive positions that have the same rate.

The Hidden Markov Model framework for rate variation among positions was independently developed by Yang (1993, 1994, 1995). We have implemented a general scheme for a Hidden Markov Model of rates; we allow the rates and their prior probabilities to be specified arbitrarily by the user, or by a discrete approximation to a Gamma distribution of rates (Yang, 1995), or by a mixture of a Gamma distribution and a class of invariant positions.

This feature effectively removes the artificial assumption that all positions have the same rate, and also means that we need not know in advance the identities of the positions that have a particular rate of evolution.

Another layer of rate variation also is available. The user can assign categories of rates to each positions (for example, we might want amino acid positions in the active site of a protein to change more slowly than other positions. This is done with the categories input file and the C option. We then specify (using the menu) the relative rates of evolution of amino acid positions in the different categories. For example, we might specify that positions in the active site evolve at relative rates of 0.2 compared to 1.0 at other positions. If we are assuming that a particular position maintains a cysteine bridge to another, we may want to put it in a category of positions (including perhaps the initial position of the protein sequence which maintains methionine) which changes at a rate of 0.0.

If both user-assigned rate categories and Hidden Markov Model rates are allowed, the program assumes that the actual rate at a position is the product of the user-assigned category rate and the Hidden Markov Model regional rate. (This may not always make perfect biological sense: it would be more natural to assume some upper bound to the rate, as we have discussed in the Felsenstein and Churchill paper). Nevertheless you may want to use both types of rate variation.

Usage

Here is a sample session with fproml


% fproml 
Protein phylogeny by maximum likelihood
Input (aligned) protein sequence set(s): proml.dat
Phylip tree file (optional): 
Phylip proml program output file [proml.fproml]: 


Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Output written to file "proml.fproml"

Tree also written onto file "proml.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Protein phylogeny by maximum likelihood
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-sequence]          seqsetall  File containing one or more sequence
                                  alignments
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fproml] Phylip proml program output file

   Additional (Optional) qualifiers (* if not always prompted):
   -ncategories        integer    [1] Number of substitution rate categories
                                  (Integer from 1 to 9)
*  -rate               array      Rate for each category
*  -categories         properties File of substitution rate categories
   -weights            properties Weights file
*  -lengths            boolean    [N] Use branch lengths from user trees
   -model              menu       [Jones-Taylor-Thornton] Probability model
                                  for amino acid change (Values: j
                                  (Jones-Taylor-Thornton); h (Henikoff/Tillier
                                  PMBs); d (Dayhoff PAM))
   -gammatype          menu       [Constant rate] Rate variation among sites
                                  (Values: g (Gamma distributed rates); i
                                  (Gamma+invariant sites); h (User defined HMM
                                  of rates); n (Constant rate))
*  -gammacoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ngammacat          integer    [1] Number of categories (1-9) (Integer from
                                  1 to 9)
*  -invarcoefficient   float      [1] Coefficient of variation of substitution
                                  rate among sites (Number 0.001 or more)
*  -ninvarcat          integer    [1] Number of categories (1-9) including one
                                  for invariant sites (Integer from 1 to 9)
*  -invarfrac          float      [0.0] Fraction of invariant sites (Number
                                  from 0.000 to 1.000)
*  -nhmmcategories     integer    [1] Number of HMM rate categories (Integer
                                  from 1 to 9)
*  -hmmrates           array      [1.0] HMM category rates
*  -hmmprobabilities   array      [1.0] Probability for each HMM category
*  -adjsite            boolean    [N] Rates at adjacent sites correlated
*  -lambda             float      [1.0] Mean block length of sites having the
                                  same rate (Number 1.000 or more)
*  -njumble            integer    [0] Number of times to randomise, choose 0
                                  if you don't want to randomise (Integer 0 or
                                  more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
*  -global             boolean    [N] Global rearrangements
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]rough          boolean    [Y] Speedier but rougher analysis
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fproml] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -hypstate           boolean    [N] Reconstruct hypothetical sequence

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-sequence" associated qualifiers
   -sbegin1            integer    Start of each sequence to be used
   -send1              integer    End of each sequence to be used
   -sreverse1          boolean    Reverse (if DNA)
   -sask1              boolean    Ask for begin/end/reverse
   -snucleotide1       boolean    Sequence is nucleotide
   -sprotein1          boolean    Sequence is protein
   -slower1            boolean    Make lower case
   -supper1            boolean    Make upper case
   -scircular1         boolean    Sequence is circular
   -squick1            boolean    Read id and sequence only
   -sformat1           string     Input sequence format
   -iquery1            string     Input query fields or ID list
   -ioffset1           integer    Input start position offset
   -sdbname1           string     Database name
   -sid1               string     Entryname
   -ufo1               string     UFO features
   -fformat1           string     Features format
   -fopenfile1         string     Features file name

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-sequence]
(Parameter 1)
seqsetall File containing one or more sequence alignments Readable sets of sequences Required
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip proml program output file Output file <*>.fproml
Additional (Optional) qualifiers
-ncategories integer Number of substitution rate categories Integer from 1 to 9 1
-rate array Rate for each category List of floating point numbers  
-categories properties File of substitution rate categories Property value(s)  
-weights properties Weights file Property value(s)  
-lengths boolean Use branch lengths from user trees Boolean value Yes/No No
-model list Probability model for amino acid change
j (Jones-Taylor-Thornton)
h (Henikoff/Tillier PMBs)
d (Dayhoff PAM)
Jones-Taylor-Thornton
-gammatype list Rate variation among sites
g (Gamma distributed rates)
i (Gamma+invariant sites)
h (User defined HMM of rates)
n (Constant rate)
Constant rate
-gammacoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ngammacat integer Number of categories (1-9) Integer from 1 to 9 1
-invarcoefficient float Coefficient of variation of substitution rate among sites Number 0.001 or more 1
-ninvarcat integer Number of categories (1-9) including one for invariant sites Integer from 1 to 9 1
-invarfrac float Fraction of invariant sites Number from 0.000 to 1.000 0.0
-nhmmcategories integer Number of HMM rate categories Integer from 1 to 9 1
-hmmrates array HMM category rates List of floating point numbers 1.0
-hmmprobabilities array Probability for each HMM category List of floating point numbers 1.0
-adjsite boolean Rates at adjacent sites correlated Boolean value Yes/No No
-lambda float Mean block length of sites having the same rate Number 1.000 or more 1.0
-njumble integer Number of times to randomise, choose 0 if you don't want to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-global boolean Global rearrangements Boolean value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]rough boolean Speedier but rougher analysis Boolean value Yes/No Yes
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fproml
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-hypstate boolean Reconstruct hypothetical sequence Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-sequence" associated seqsetall qualifiers
-sbegin1
-sbegin_sequence
integer Start of each sequence to be used Any integer value 0
-send1
-send_sequence
integer End of each sequence to be used Any integer value 0
-sreverse1
-sreverse_sequence
boolean Reverse (if DNA) Boolean value Yes/No N
-sask1
-sask_sequence
boolean Ask for begin/end/reverse Boolean value Yes/No N
-snucleotide1
-snucleotide_sequence
boolean Sequence is nucleotide Boolean value Yes/No N
-sprotein1
-sprotein_sequence
boolean Sequence is protein Boolean value Yes/No N
-slower1
-slower_sequence
boolean Make lower case Boolean value Yes/No N
-supper1
-supper_sequence
boolean Make upper case Boolean value Yes/No N
-scircular1
-scircular_sequence
boolean Sequence is circular Boolean value Yes/No N
-squick1
-squick_sequence
boolean Read id and sequence only Boolean value Yes/No N
-sformat1
-sformat_sequence
string Input sequence format Any string  
-iquery1
-iquery_sequence
string Input query fields or ID list Any string  
-ioffset1
-ioffset_sequence
integer Input start position offset Any integer value 0
-sdbname1
-sdbname_sequence
string Database name Any string  
-sid1
-sid_sequence
string Entryname Any string  
-ufo1
-ufo_sequence
string UFO features Any string  
-fformat1
-fformat_sequence
string Features format Any string  
-fopenfile1
-fopenfile_sequence
string Features file name Any string  
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fproml reads any normal sequence USAs.

Input files for usage example

File: proml.dat

   5   13
Alpha     AACGTGGCCAAAT
Beta      AAGGTCGCCAAAC
Gamma     CATTTCGTCACAA
Delta     GGTATTTCGGCCT
Epsilon   GGGATCTCGGCCC

Output file format

fproml output starts by giving the number of species and the number of amino acid positions.

If the R (HMM rates) option is used a table of the relative rates of expected substitution at each category of positions is printed, as well as the probabilities of each of those rates.

There then follow the data sequences, if the user has selected the menu option to print them, with the sequences printed in groups of ten amino acids. The trees found are printed as an unrooted tree topology (possibly rooted by outgroup if so requested). The internal nodes are numbered arbitrarily for the sake of identification. The number of trees evaluated so far and the log likelihood of the tree are also given. Note that the trees printed out have a trifurcation at the base. The branch lengths in the diagram are roughly proportional to the estimated branch lengths, except that very short branches are printed out at least three characters in length so that the connections can be seen. The unit of branch length is the expected fraction of amino acids changed (so that 1.0 is 100 PAMs).

A table is printed showing the length of each tree segment (in units of expected amino acid substitutions per position), as well as (very) rough confidence limits on their lengths. If a confidence limit is negative, this indicates that rearrangement of the tree in that region is not excluded, while if both limits are positive, rearrangement is still not necessarily excluded because the variance calculation on which the confidence limits are based results in an underestimate, which makes the confidence limits too narrow.

In addition to the confidence limits, the program performs a crude Likelihood Ratio Test (LRT) for each branch of the tree. The program computes the ratio of likelihoods with and without this branch length forced to zero length. This done by comparing the likelihoods changing only that branch length. A truly correct LRT would force that branch length to zero and also allow the other branch lengths to adjust to that. The result would be a likelihood ratio closer to 1. Therefore the present LRT will err on the side of being too significant. YOU ARE WARNED AGAINST TAKING IT TOO SERIOUSLY. If you want to get a better likelihood curve for a branch length you can do multiple runs with different prespecified lengths for that branch, as discussed above in the discussion of the L option.

One should also realize that if you are looking not at a previously-chosen branch but at all branches, that you are seeing the results of multiple tests. With 20 tests, one is expected to reach significance at the P = .05 level purely by chance. You should therefore use a much more conservative significance level, such as .05 divided by the number of tests. The significance of these tests is shown by printing asterisks next to the confidence interval on each branch length. It is important to keep in mind that both the confidence limits and the tests are very rough and approximate, and probably indicate more significance than they should. Nevertheless, maximum likelihood is one of the few methods that can give you any indication of its own error; most other methods simply fail to warn the user that there is any error! (In fact, whole philosophical schools of taxonomists exist whose main point seems to be that there isn't any error, that the "most parsimonious" tree is the best tree by definition and that's that).

The log likelihood printed out with the final tree can be used to perform various likelihood ratio tests. One can, for example, compare runs with different values of the relative rate of change in the active site and in the rest of the protein to determine which value is the maximum likelihood estimate, and what is the allowable range of values (using a likelihood ratio test, which you will find described in mathematical statistics books). One could also estimate the base frequencies in the same way. Both of these, particularly the latter, require multiple runs of the program to evaluate different possible values, and this might get expensive.

If the U (User Tree) option is used and more than one tree is supplied, and the program is not told to assume autocorrelation between the rates at different amino acid positions, the program also performs a statistical test of each of these trees against the one with highest likelihood. If there are two user trees, the test done is one which is due to Kishino and Hasegawa (1989), a version of a test originally introduced by Templeton (1983). In this implementation it uses the mean and variance of log-likelihood differences between trees, taken across amino acid positions. If the two trees' means are more than 1.96 standard deviations different then the trees are declared significantly different. This use of the empirical variance of log-likelihood differences is more robust and nonparametric than the classical likelihood ratio test, and may to some extent compensate for the any lack of realism in the model underlying this program.

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sum of log likelihoods across amino acid positions are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected log-likelihood, log-likelihoods for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the highest log-likelihood exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the log-likelihoods of each tree, the differences of each from the highest one, the variance of that quantity as determined by the log-likelihood differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. However the test is not available if we assume that there is autocorrelation of rates at neighboring positions (option A) and is not done in those cases.

The branch lengths printed out are scaled in terms of 100 times the expected numbers of amino acid substitutions, scaled so that the average rate of change, averaged over all the positions analyzed, is set to 100.0, if there are multiple categories of positions. This means that whether or not there are multiple categories of positions, the expected percentage of change for very small branches is equal to the branch length. Of course, when a branch is twice as long this does not mean that there will be twice as much net change expected along it, since some of the changes occur in the same position and overlie or even reverse each other. underlying numbers of changes. That means that a branch of length 26 is 26 times as long as one which would show a 1% difference between the amino acid sequences at the beginning and end of the branch, but we would not expect the sequences at the beginning and end of the branch to be 26% different, as there would be some overlaying of changes.

Confidence limits on the branch lengths are also given. Of course a negative value of the branch length is meaningless, and a confidence limit overlapping zero simply means that the branch length is not necessarily significantly different from zero. Because of limitations of the numerical algorithm, branch length estimates of zero will often print out as small numbers such as 0.00001. If you see a branch length that small, it is really estimated to be of zero length.

Another possible source of confusion is the existence of negative values for the log likelihood. This is not really a problem; the log likelihood is not a probability but the logarithm of a probability. When it is negative it simply means that the corresponding probability is less than one (since we are seeing its logarithm). The log likelihood is maximized by being made more positive: -30.23 is worse than -29.14.

At the end of the output, if the R option is in effect with multiple HMM rates, the program will print a list of what amino acid position categories contributed the most to the final likelihood. This combination of HMM rate categories need not have contributed a majority of the likelihood, just a plurality. Still, it will be helpful as a view of where the program infers that the higher and lower rates are. Note that the use in this calculations of the prior probabilities of different rates, and the average patch length, gives this inference a "smoothed" appearance: some other combination of rates might make a greater contribution to the likelihood, but be discounted because it conflicts with this prior information. See the example output below to see what this printout of rate categories looks like. A second list will also be printed out, showing for each position which rate accounted for the highest fraction of the likelihood. If the fraction of the likelihood accounted for is less than 95%, a dot is printed instead.

Option 3 in the menu controls whether the tree is printed out into the output file. This is on by default, and usually you will want to leave it this way. However for runs with multiple data sets such as bootstrapping runs, you will primarily be interested in the trees which are written onto the output tree file, rather than the trees printed on the output file. To keep the output file from becoming too large, it may be wisest to use option 3 to prevent trees being printed onto the output file.

Option 4 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Option 5 in the menu controls whether ancestral states are estimated at each node in the tree. If it is in effect, a table of ancestral sequences is printed out (including the sequences in the tip species which are the input sequences). The symbol printed out is for the amino acid which accounts for the largest fraction of the likelihood at that position. In that table, if a position has an amino acid which accounts for more than 95% of the likelihood, its symbol printed in capital letters (W rather than w). One limitation of the current version of the program is that when there are multiple HMM rates (option R) the reconstructed amino acids are based on only the single assignment of rates to positions which accounts for the largest amount of the likelihood. Thus the assessment of 95% of the likelihood, in tabulating the ancestral states, refers to 95% of the likelihood that is accounted for by that particular combination of rates.

Output files for usage example

File: proml.fproml


Amino acid sequence Maximum Likelihood method, version 3.69.650

Jones-Taylor-Thornton model of amino acid change


  +Beta      
  |  
  |                                     +Epsilon   
  |      +------------------------------3  
  1------2                              +------------Delta     
  |      |  
  |      +--------------------Gamma     
  |  
  +---------Alpha     


remember: this is an unrooted tree!

Ln Likelihood =  -131.55052

 Between        And            Length      Approx. Confidence Limits
 -------        ---            ------      ------- ---------- ------

     1          Alpha             0.31006     (     zero,     0.66806) **
     1          Beta              0.00010     (     zero,    infinity)
     1             2              0.22206     (     zero,     0.62979) *
     2             3              1.00907     (  0.13965,     1.87849) **
     3          Epsilon           0.00010     (     zero,    infinity)
     3          Delta             0.41176     (     zero,     0.86685) **
     2          Gamma             0.68569     (  0.01628,     1.35510) **

     *  = significantly positive, P < 0.05
     ** = significantly positive, P < 0.01


File: proml.treefile

(Beta:0.00010,((Epsilon:0.00010,Delta:0.41176):1.00907,
Gamma:0.68569):0.22206,Alpha:0.31006);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fclique.html0000664000175000017500000004356412171064331015677 00000000000000 EMBOSS: fclique
fclique

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Largest clique program

Description

Finds the largest clique of mutually compatible characters, and the phylogeny which they recommend, for discrete character data with two states. The largest clique (or all cliques within a given size range of the largest one) are found by a very fast branch and bound search method. The method does not allow for missing data. For such cases the T (Threshold) option of PARS or MIX may be a useful alternative. Compatibility methods are particular useful when some characters are of poor quality and the rest of good quality, but when it is not known in advance which ones are which.

Algorithm

This program uses the compatibility method for unrooted two-state characters to obtain the largest cliques of characters and the trees which they suggest. This approach originated in the work of Le Quesne (1969), though the algorithms were not precisely specified until the later work of Estabrook, Johnson, and McMorris (1976a, 1976b). These authors proved the theorem that a group of two-state characters which were pairwise compatible would be jointly compatible. This program uses an algorithm inspired by the Kent Fiala - George Estabrook program CLINCH, though closer in detail to the algorithm of Bron and Kerbosch (1973). I am indebted to Kent Fiala for pointing out that paper to me, and to David Penny for decribing to me his branch-and-bound approach to finding largest cliques, from which I have also borrowed. I am particularly grateful to Kent Fiala for catching a bug in versions 2.0 and 2.1 which resulted in those versions failing to find all of the cliques which they should. The program computes a compatibility matrix for the characters, then uses a recursive procedure to examine all possible cliques of characters.

After one pass through all possible cliques, the program knows the size of the largest clique, and during a second pass it prints out the cliques of the right size. It also, along with each clique, prints out the tree suggested by that clique.

ASSUMPTIONS

Basically the following assumptions are made:
  1. Each character evolves independently.
  2. Different lineages evolve independently.
  3. The ancestral state is not known.
  4. Each character has a small chance of being one which evolves so rapidly, or is so thoroughly misinterpreted, that it provides no information on the tree.
  5. The probability of a single change in a character (other than in the high rate characters) is low but not as low as the probability of being one of these "bad" characters.
  6. The probability of two changes in a low-rate character is much less than the probability that it is a high-rate character.
  7. The true tree has segments which are not so unequal in length that two changes in a long are as easy to envisage as one change in a short segment.

The assumptions of compatibility methods have been treated in several of my papers (1978b, 1979, 1981b, 1988b), especially the 1981 paper. For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

A constant available for alteration at the beginning of the program is the form width, "FormWide", which you may want to change to make it as large as possible consistent with the page width available on your output device, so as to avoid the output of cliques and of trees getting wrapped around unnecessarily.

Usage

Here is a sample session with fclique


% fclique 
Largest clique program
Phylip discrete states file: clique.dat
Phylip clique program output file [clique.fclique]: 


Output written to file "clique.fclique"

Tree written on file "clique.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Largest clique program
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates Phylip discrete states file
  [-outfile]           outfile    [*.fclique] Phylip clique program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -ancfile            properties Phylip ancestral states file (optional)
   -factorfile         properties Phylip multistate factors file (optional)
   -weights            properties Phylip weights file (optional)
   -cliqmin            integer    [0] Minimum clique size (Integer 0 or more)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fclique] Phylip tree output file
                                  (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -printcomp          boolean    [N] Print out compatibility matrix

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates Phylip discrete states file Discrete states file  
[-outfile]
(Parameter 2)
outfile Phylip clique program output file Output file <*>.fclique
Additional (Optional) qualifiers
-ancfile properties Phylip ancestral states file (optional) Property value(s)  
-factorfile properties Phylip multistate factors file (optional) Property value(s)  
-weights properties Phylip weights file (optional) Property value(s)  
-cliqmin integer Minimum clique size Integer 0 or more 0
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fclique
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-printcomp boolean Print out compatibility matrix Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

Input to the algorithm is standard, but the "?", "P", and "B" states are not allowed. This is a serious limitation of this program. If you want to find large cliques in data that have "?" states, I recommend that you use fmix instead with the -Threshold option and the value of the threshold set to 2.0. The theory underlying this is given in my paper on character weighting (Felsenstein, 1981b).

fclique reads discrete character data with 2 states.

Input files for usage example

File: clique.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Output file format

fclique writes the cliques to the text output file and a tree to a separate output file

Output files for usage example

File: clique.fclique


Largest clique program, version 3.69.650




Largest Cliques
------- -------


Characters: (  1  2  3  6)


  Tree and characters:

     2  1  3  6
     0  0  1  1

             +1-Delta     
       +0--1-+
  +--0-+     +--Epsilon   
  !    !
  !    +--------Gamma     
  !
  +-------------Alpha     
  !
  +-------------Beta      

remember: this is an unrooted tree!


File: clique.treefile

(((Delta,Epsilon),Gamma),Alpha,Beta);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fdollop Dollo and polymorphism parsimony algorithm
fdolpenny Penny algorithm Dollo or polymorphism
ffactor Multistate to binary recoding program
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fretree.html0000664000175000017500000004713112171064331015675 00000000000000 EMBOSS: fretree
fretree

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Interactive tree rearrangement

Description

Reads in a tree (with branch lengths if necessary) and allows you to reroot the tree, to flip branches, to change species names and branch lengths, and then write the result out. Can be used to convert between rooted and unrooted trees, and to write the tree into a preliminary version of a new XML tree file format which is under development and which is described in the RETREE documentation web page.

Algorithm

RETREE is a tree editor. It reads in a tree, or allows the user to construct one, and displays this tree on the screen. The user then can specify how the tree is to be rearranged, rerooted or written out to a file.

The input trees are in one file (with default file name intree), the output trees are written into another (outtree). The user can reroot, flip branches, change names of species, change or remove branch lengths, and move around to look at various parts of the tree if it is too large to fit on the screen. The trees can be multifurcating at any level, although the user is warned that many PHYLIP programs still cannot handle multifurcations above the root, or even at the root.

A major use for this program will be to change rootedness of trees so that a rooted tree derived from one program can be fed in as an unrooted tree to another (you are asked about this when you give the command to write out the tree onto the tree output file). It will also be useful for specifying the length of a branch in a tree where you want a program like DNAML, DNAMLK, FITCH, or CONTML to hold that branch length constant (see the L suboption of the User Tree option in those programs. It will also be useful for changing the order of species for purely cosmetic reasons for DRAWGRAM and DRAWTREE, including using the Midpoint method of rooting the tree. It can also be used to write out a tree file in the Nexus format used by Paup and MacClade or in our XML tree file format.

This program uses graphic characters that show the tree to best advantage on some computer systems. Its graphic characters will work best on MSDOS systems or MSDOS windows in Windows, and to any system whose screen or terminals emulate ANSI standard terminals such as old Digitial VT100 terminals, Telnet programs, or VT100-compatible windows in the X windowing system. For any other screen types, (such as Macintosh windows) there is a generic option which does not make use of screen graphics characters. The program will work well in those cases, but the tree it displays will look a bit uglier.

Usage

Here is a sample session with fretree


% fretree 
Interactive tree rearrangement
Number of species [0]: 10
Phylip tree file: retree.dat
Phylip tree output file [retree.treefile]: 
NEXT? (R . U W O T F D B N H J K L C + ? X Q) (? for Help): Q
Do you want to write out the tree to a file? (Y or N): Y
Enter R if the tree is to be rooted, OR enter U if the tree is to be unrooted: U

Tree written to file "retree.treefile"



Reading tree file ...



                                      ,>>1:Human
                                   ,>22  
                                ,>21  `>>2:Chimp
                                !  !  
                             ,>20  `>>>>>3:Gorilla
                             !  !  
                 ,>>>>>>>>>>19  `>>>>>>>>4:Orang
                 !           !  
              ,>18           `>>>>>>>>>>>5:Gibbon
              !  !  
              !  !              ,>>>>>>>>6:Barbary Ma
              !  `>>>>>>>>>>>>>23  
              !                 !  ,>>>>>7:Crab-e. Ma
     ,>>>>>>>17                 `>24  
     !        !                    !  ,>>8:Rhesus Mac
     !        !                    `>25  
     !        !                       `>>9:Jpn Macaq
  ,>16        !  
  !  !        `>>>>>>>>>>>>>>>>>>>>>>>>>10:Squir. Mon
  !  !  
  !  !                                ,>11:Tarsier
** 7 lines below screen **

Go to the input files for this example
Go to the output files for this example

Command line arguments

Interactive tree rearrangement
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-spp]               integer    [0] Number of species (Any integer value)
  [-intreefile]        tree       Phylip tree file
  [-outtreefile]       outfile    [*.fretree] Phylip tree output file

   Additional (Optional) qualifiers:
   -initialtree        menu       [Arbitary] Initial tree (Values: a
                                  (Arbitary); u (User); s (Specify))
   -format             menu       [p] Format to write trees (Values: p
                                  (PHYLIP); n (NEXUS); x (XML))
   -screenwidth        integer    [80] Width of terminal screen in characters
                                  (Any integer value)
   -vscreenwidth       integer    [80] Width of plotting area in characters
                                  (Any integer value)
   -screenlines        integer    [24] Number of lines on screen (Any integer
                                  value)

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outtreefile" associated qualifiers
   -odirectory3        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-spp]
(Parameter 1)
integer Number of species Any integer value 0
[-intreefile]
(Parameter 2)
tree Phylip tree file Phylogenetic tree  
[-outtreefile]
(Parameter 3)
outfile Phylip tree output file Output file <*>.fretree
Additional (Optional) qualifiers
-initialtree list Initial tree
a (Arbitary)
u (User)
s (Specify)
Arbitary
-format list Format to write trees
p (PHYLIP)
n (NEXUS)
x (XML)
p
-screenwidth integer Width of terminal screen in characters Any integer value 80
-vscreenwidth integer Width of plotting area in characters Any integer value 80
-screenlines integer Number of lines on screen Any integer value 24
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outtreefile" associated outfile qualifiers
-odirectory3
-odirectory_outtreefile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fretree reads any normal sequence USAs.

Input files for usage example

File: retree.dat

((((((((Human,Chimp),Gorilla),Orang),Gibbon),(Barbary_Ma,(Crab-e._Ma,
(Rhesus_Mac,Jpn_Macaq)))),Squir._Mon),((Tarsier,Lemur),Bovine)),Mouse);

Output file format

The N (output file format) option allows the user to specify that the tree files that are written by the program will be in one of three formats:
  1. The PHYLIP default file format (the Newick standard) used by the programs in this package.
  2. The Nexus format defined by David Swofford and by Wayne Maddison and David Maddison for their programs PAUP and MacClade. A tree file written in Nexus format should be directly readable by those programs (They also have options to read a regular PHYLIP tree file as well).
  3. An XML tree file format which we have defined.

The XML tree file format is fairly simple. The tree file, which may have multiple trees, is enclosed in a pair of <PHYLOGENIES> ... </PHYLOGENIES> tags. Each tree is included in tags <PHYLOGENY> ... </PHYLOGENY>. Each branch of the tree is enclosed in a pair of tags <CLADE> ... </CLADE>, which enclose the branch and all its descendants. If the branch has a length, this is given by the LENGTH attribute of the CLADE tag, so that the pair of tags looks like this: <CLADE LENGTH="0.09362"> ... </CLADE>

A tip of the tree is at the end of a branch (and hence that branch is enclosed in a pair of <CLADE> ... </CLADE> tags). Its name is enclosed by <NAME> ... </NAME> tags. Here is an XML tree:

<phylogenies>
  <phylogeny>
    <clade>
      <clade length="0.87231"><name>Mouse</name></clade>
      <clade length="0.49807"><name>Bovine</name></clade>
      <clade length="0.39538">
        <clade length="0.25930"><name>Gibbon</name></clade>
        <clade length="0.10815">
          <clade length="0.24166"><name>Orang</name></clade>
          <clade length="0.04405">
            <clade length="0.12322"><name>Gorilla</name></clade>
            <clade length="0.06026">
              <clade length="0.13846"><name>Chimp</name></clade>
              <clade length="0.0857"><name>Human</name></clade>
            </clade>
          </clade>
        </clade>
      </clade>
    </clade>
  </phylogeny>
</phylogenies>
  

The indentation is for readability but is not part of the XML tree standard, which ignores that kind of white space.

What programs can read an XML tree? None right now, not even PHYLIP programs! But soon our lab's LAMARC package will have programs that can read an XML tree. XML is rapidly becoming the standard for representing and interchanging complex data -- it is time to have an XML tree standard. Certain extensions are obvious (to represent the bootstrap proportion for a branch, use BOOTP=0.83 in the CLADE tag, for example).

There are other proposals for an XML tree standard. They have many similarities to this one, but are not identical to it. At the moment there is no mechanism in place for deciding between them other than seeing which get widely used. Here are links to other proposals:
Taxonomic Markup Language http://www.albany.edu/~gilmr/pubxml/. and preprint at xml.coverpages.org/gilmour-TML.pdf published in the paper by Ron Gilmour (2000).
Andrew Rambaut's BEAST XML phylogeny format See page 9 of PDF of BEAST manual at http://evolve.zoo.ox.ac.uk/beast/ An XML format for phylogenies is sketchly described there.
treeml http://www.nomencurator.org/InfoVis2003/download/treeml.dtd (see also example: ) http://www.cs.umd.edu/hcil/iv03contest/datasets/treeml-sample.xml Jean-Daniel Fekete's DTD for a tree XML file

The W (screen and window Width) option specifies the width in characters of the area which the trees will be plotted to fit into. This is by default 80 characters so that they will fit on a normal width terminal. The actual width of the display on the terminal (normally 80 characters) will be regarded as a window displaying part of the tree. Thus you could set the "plotting area" to 132 characters, and inform the program that the screen width is 80 characters. Then the program will display only part of the tree at any one time.

Output files for usage example

File: retree.treefile

(((((((Human,Chimp),Gorilla),Orang),Gibbon),(Barbary_Ma,(Crab-e._Ma,
(Rhesus_Mac,Jpn_Macaq)))),Squir._Mon),((Tarsier,Lemur),Bovine),Mouse);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
fdrawgram Plots a cladogram- or phenogram-like rooted tree diagram
fdrawtree Plots an unrooted tree diagram

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fgendist.html0000664000175000017500000006270712171064331016052 00000000000000 EMBOSS: fgendist
fgendist

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Compute genetic distances from gene frequencies

Description

Computes one of three different genetic distance formulas from gene frequency data. The formulas are Nei's genetic distance, the Cavalli-Sforza chord measure, and the genetic distance of Reynolds et. al. The former is appropriate for data in which new mutations occur in an infinite isoalleles neutral mutation model, the latter two for a model without mutation and with pure genetic drift. The distances are written to a file in a format appropriate for input to the distance matrix programs.

Algorithm

This program computes any one of three measures of genetic distance from a set of gene frequencies in different populations (or species). The three are Nei's genetic distance (Nei, 1972), Cavalli-Sforza's chord measure (Cavalli- Sforza and Edwards, 1967) and Reynolds, Weir, and Cockerham's (1983) genetic distance. These are written to an output file in a format that can be read by the distance matrix phylogeny programs FITCH and KITSCH.

The three measures have somewhat different assumptions. All assume that all differences between populations arise from genetic drift. Nei's distance is formulated for an infinite isoalleles model of mutation, in which there is a rate of neutral mutation and each mutant is to a completely new alleles. It is assumed that all loci have the same rate of neutral mutation, and that the genetic variability initially in the population is at equilibrium between mutation and genetic drift, with the effective population size of each population remaining constant.

Nei's distance is:

                                            
                                      \   \
                                      /_  /_  p1mi   p2mi
                                       m   i
           D  =  - ln  ( ------------------------------------- ).
                                                                   
                           \   \                \   \
                         [ /_  /_  p1mi2]1/2   [ /_  /_  p2mi2]1/2     
                            m   i                m   i

where m is summed over loci, i over alleles at the m-th locus, and where

     p1mi 

is the frequency of the i-th allele at the m-th locus in population 1. Subject to the above assumptions, Nei's genetic distance is expected, for a sample of sufficiently many equivalent loci, to rise linearly with time.

The other two genetic distances assume that there is no mutation, and that all gene frequency changes are by genetic drift alone. However they do not assume that population sizes have remained constant and equal in all populations. They cope with changing population size by having expectations that rise linearly not with time, but with the sum over time of 1/N, where N is the effective population size. Thus if population size doubles, genetic drift will be taking place more slowly, and the genetic distance will be expected to be rising only half as fast with respect to time. Both genetic distances are different estimators of the same quantity under the same model.

Cavalli-Sforza's chord distance is given by

                                                              
                   \               \                        \
     D2    =    4  /_  [  1   -    /_   p1mi1/2 p 2mi1/2]  /  /_  (am  - 1)
                    m               i                        m

where m indexes the loci, where i is summed over the alleles at the m-th locus, and where a is the number of alleles at the m-th locus. It can be shown that this distance always satisfies the triangle inequality. Note that as given here it is divided by the number of degrees of freedom, the sum of the numbers of alleles minus one. The quantity which is expected to rise linearly with amount of genetic drift (sum of 1/N over time) is D squared, the quantity computed above, and that is what is written out into the distance matrix.

Reynolds, Weir, and Cockerham's (1983) genetic distance is

                              
                       \    \
                       /_   /_  [ p1mi     -  p2mi]2
                        m    i                  
       D2     =      --------------------------------------
                                           
                         \               \
                      2  /_   [  1   -   /_  p1mi    p2mi ]
                          m               i 

where the notation is as before and D2 is the quantity that is expected to rise linearly with cumulated genetic drift.

Having computed one of these genetic distances, one which you feel is appropriate to the biology of the situation, you can use it as the input to the programs FITCH, KITSCH or NEIGHBOR. Keep in mind that the statistical model in those programs implicitly assumes that the distances in the input table have independent errors. For any measure of genetic distance this will not be true, as bursts of random genetic drift, or sampling events in drawing the sample of individuals from each population, cause fluctuations of gene frequency that affect many distances simultaneously. While this is not expected to bias the estimate of the phylogeny, it does mean that the weighing of evidence from all the different distances in the table will not be done with maximal efficiency. One issue is which value of the P (Power) parameter should be used. This depends on how the variance of a distance rises with its expectation. For Cavalli-Sforza's chord distance, and for the Reynolds et. al. distance it can be shown that the variance of the distance will be proportional to the square of its expectation; this suggests a value of 2 for P, which the default value for FITCH and KITSCH (there is no P option in NEIGHBOR).

If you think that the pure genetic drift model is appropriate, and are thus tempted to use the Cavalli-Sforza or Reynolds et. al. distances, you might consider using the maximum likelihood program CONTML instead. It will correctly weigh the evidence in that case. Like those genetic distances, it uses approximations that break down as loci start to drift all the way to fixation. Although Nei's distance will not break down in that case, it makes other assumptions about equality of substitution rates at all loci and constancy of population sizes.

qThe most important thing to remember is that genetic distance is not an abstract, idealized measure of "differentness". It is an estimate of a parameter (time or cumulated inverse effective population size) of the model which is thought to have generated the differences we see. As an estimate, it has statistical properties that can be assessed, and we should never have to choose between genetic distances based on their aesthetic properties, or on the personal prestige of their originators. Considering them as estimates focuses us on the questions which genetic distances are intended to answer, for if there are none there is no reason to compute them. For further perspective on genetic distances, I recommend my own paper evaluating Reynolds, Weir, and Cockerham (1983), and the material in Nei's book (Nei, 1987).

Usage

Here is a sample session with fgendist


% fgendist 
Compute genetic distances from gene frequencies
Phylip gendist program input file: gendist.dat
Phylip gendist program output file [gendist.fgendist]: 

Distances calculated for species
    European     .
    African      ..
    Chinese      ...
    American     ....
    Australian   .....

Distances written to file "gendist.fgendist"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Compute genetic distances from gene frequencies
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            frequencies File containing one or more sets of data
  [-outfile]           outfile    [*.fgendist] Phylip gendist program output
                                  file

   Additional (Optional) qualifiers:
   -method             menu       [n] Which method to use (Values: n (Nei
                                  genetic distance); c (Cavalli-Sforza chord
                                  measure); r (Reynolds genetic distance))
   -[no]progress       boolean    [Y] Print indications of progress of run
   -lower              boolean    [N] Lower triangular distance matrix

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
frequencies File containing one or more sets of data Frequency value(s)  
[-outfile]
(Parameter 2)
outfile Phylip gendist program output file Output file <*>.fgendist
Additional (Optional) qualifiers
-method list Which method to use
n (Nei genetic distance)
c (Cavalli-Sforza chord measure)
r (Reynolds genetic distance)
n
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-lower boolean Lower triangular distance matrix Boolean value Yes/No No
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fgendist reads continuous character data

Continuous character data

The programs in this group use gene frequencies and quantitative character values. One (CONTML) constructs maximum likelihood estimates of the phylogeny, another (GENDIST) computes genetic distances for use in the distance matrix programs, and the third (CONTRAST) examines correlation of traits as they evolve along a given phylogeny.

When the gene frequencies data are used in CONTML or GENDIST, this involves the following assumptions:

  1. Different lineages evolve independently.
  2. After two lineages split, their characters change independently.
  3. Each gene frequency changes by genetic drift, with or without mutation (this varies from method to method).
  4. Different loci or characters drift independently.

How these assumptions affect the methods will be seen in my papers on inference of phylogenies from gene frequency and continuous character data (Felsenstein, 1973b, 1981c, 1985c).

The input formats are fairly similar to the discrete-character programs, but with one difference. When CONTML is used in the gene-frequency mode (its usual, default mode), or when GENDIST is used, the first line contains the number of species (or populations) and the number of loci and the options information. There then follows a line which gives the numbers of alleles at each locus, in order. This must be the full number of alleles, not the number of alleles which will be input: i. e. for a two-allele locus the number should be 2, not 1. There then follow the species (population) data, each species beginning on a new line. The first 10 characters are taken as the name, and thereafter the values of the individual characters are read free-format, preceded and separated by blanks. They can go to a new line if desired, though of course not in the middle of a number. Missing data is not allowed - an important limitation. In the default configuration, for each locus, the numbers should be the frequencies of all but one allele. The menu option A (All) signals that the frequencies of all alleles are provided in the input data -- the program will then automatically ignore the last of them. So without the A option, for a three-allele locus there should be two numbers, the frequencies of two of the alleles (and of course it must always be the same two!). Here is a typical data set without the A option:

     5    3
2 3 2
Alpha      0.90 0.80 0.10 0.56
Beta       0.72 0.54 0.30 0.20
Gamma      0.38 0.10 0.05  0.98
Delta      0.42 0.40 0.43 0.97
Epsilon    0.10 0.30 0.70 0.62

whereas here is what it would have to look like if the A option were invoked:

     5    3
2 3 2
Alpha      0.90 0.10 0.80 0.10 0.10 0.56 0.44
Beta       0.72 0.28 0.54 0.30 0.16 0.20 0.80
Gamma      0.38 0.62 0.10 0.05 0.85  0.98 0.02
Delta      0.42 0.58 0.40 0.43 0.17 0.97 0.03
Epsilon    0.10 0.90 0.30 0.70 0.00 0.62 0.38

The first line has the number of species (or populations) and the number of loci. The second line has the number of alleles for each of the 3 loci. The species lines have names (filled out to 10 characters with blanks) followed by the gene frequencies of the 2 alleles for the first locus, the 3 alleles for the second locus, and the 2 alleles for the third locus. You can start a new line after any of these allele frequencies, and continue to give the frequencies on that line (without repeating the species name).

If all alleles of a locus are given, it is important to have them add up to 1. Roundoff of the frequencies may cause the program to conclude that the numbers do not sum to 1, and stop with an error message.

While many compilers may be more tolerant, it is probably wise to make sure that each number, including the first, is preceded by a blank, and that there are digits both preceding and following any decimal points.

CONTML and CONTRAST also treat quantitative characters (the continuous-characters mode in CONTML, which is option C). It is assumed that each character is evolving according to a Brownian motion model, at the same rate, and independently. In reality it is almost always impossible to guarantee this. The issue is discussed at length in my review article in Annual Review of Ecology and Systematics (Felsenstein, 1988a), where I point out the difficulty of transforming the characters so that they are not only genetically independent but have independent selection acting on them. If you are going to use CONTML to model evolution of continuous characters, then you should at least make some attempt to remove genetic correlations between the characters (usually all one can do is remove phenotypic correlations by transforming the characters so that there is no within-population covariance and so that the within-population variances of the characters are equal -- this is equivalent to using Canonical Variates). However, this will only guarantee that one has removed phenotypic covariances between characters. Genetic covariances could only be removed by knowing the coheritabilities of the characters, which would require genetic experiments, and selective covariances (covariances due to covariation of selection pressures) would require knowledge of the sources and extent of selection pressure in all variables.

CONTRAST is a program designed to infer, for a given phylogeny that is provided to the program, the covariation between characters in a data set. Thus we have a program in this set that allow us to take information about the covariation and rates of evolution of characters and make an estimate of the phylogeny (CONTML), and a program that takes an estimate of the phylogeny and infers the variances and covariances of the character changes. But we have no program that infers both the phylogenies and the character covariation from the same data set.

In the quantitative characters mode, a typical small data set would be:

     5   6
Alpha      0.345 0.467 1.213  2.2  -1.2 1.0
Beta       0.457 0.444 1.1    1.987 -0.2 2.678
Gamma      0.6 0.12 0.97 2.3  -0.11 1.54
Delta      0.68  0.203 0.888 2.0  1.67
Epsilon    0.297  0.22 0.90 1.9 1.74

Note that in the latter case, there is no line giving the numbers of alleles at each locus. In this latter case no square-root transformation of the coordinates is done: each is assumed to give directly the position on the Brownian motion scale.

For further discussion of options and modifiable constants in CONTML, GENDIST, and CONTRAST see the documentation files for those programs.

Input files for usage example

File: gendist.dat

    5    10
2 2 2 2 2 2 2 2 2 2
European   0.2868 0.5684 0.4422 0.4286 0.3828 0.7285 0.6386 0.0205
0.8055 0.5043
African    0.1356 0.4840 0.0602 0.0397 0.5977 0.9675 0.9511 0.0600
0.7582 0.6207
Chinese    0.1628 0.5958 0.7298 1.0000 0.3811 0.7986 0.7782 0.0726
0.7482 0.7334
American   0.0144 0.6990 0.3280 0.7421 0.6606 0.8603 0.7924 0.0000
0.8086 0.8636
Australian 0.1211 0.2274 0.5821 1.0000 0.2018 0.9000 0.9837 0.0396
0.9097 0.2976

Output file format

fgendist output simply contains on its first line the number of species (or populations). Each species (or population) starts a new line, with its name printed out first, and then and there are up to nine genetic distances printed on each line, in the standard format used as input by the distance matrix programs. The output, in its default form, is ready to be used in the distance matrix programs.

Output files for usage example

File: gendist.fgendist

    5
European    0.000000  0.078002  0.080749  0.066805  0.103014
African     0.078002  0.000000  0.234698  0.104975  0.227281
Chinese     0.080749  0.234698  0.000000  0.053879  0.063275
American    0.066805  0.104975  0.053879  0.000000  0.134756
Australian  0.103014  0.227281  0.063275  0.134756  0.000000

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
egendist Genetic distance matrix program
fcontml Gene frequency and continuous character maximum likelihood

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/ffactor.html0000664000175000017500000005253112171064331015665 00000000000000 EMBOSS: ffactor
ffactor

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Multistate to binary recoding program

Description

Takes discrete multistate data with character state trees and produces the corresponding data set with two states (0 and 1). Written by Christopher Meacham. This program was formerly used to accomodate multistate characters in MIX, but this is less necessary now that PARS is available.

Algorithm

This program factors a data set that contains multistate characters, creating a data set consisting entirely of binary (0,1) characters that, in turn, can be used as input to any of the other discrete character programs in this package, except for PARS. Besides this primary function, FACTOR also provides an easy way of deleting characters from a data set. The input format for FACTOR is very similar to the input format for the other discrete character programs except for the addition of character-state tree descriptions.

Note that this program has no way of converting an unordered multistate character into binary characters. Fortunately, PARS has joined the package, and it enables unordered multistate characters, in which any state can change to any other in one step, to be analyzed with parsimony.

FACTOR is really for a different case, that in which there are multiple states related on a "character state tree", which specifies for each state which other states it can change to. That graph of states is assumed to be a tree, with no loops in it.

Usage

Here is a sample session with ffactor


% ffactor 
Multistate to binary recoding program
Phylip factor program input file: factor.dat
Phylip factor program output file [factor.ffactor]: 


Data matrix written on file "factor.ffactor"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Multistate to binary recoding program
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            infile     Phylip factor program input file
  [-outfile]           outfile    [*.ffactor] Phylip factor program output
                                  file

   Additional (Optional) qualifiers:
   -anc                boolean    [N] Put ancestral states in output file
   -factors            boolean    [N] Put factors information in output file
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers:
   -outfactorfile      outfile    [*.ffactor] Phylip factor data output file
                                  (optional)
   -outancfile         outfile    [*.ffactor] Phylip ancestor data output file
                                  (optional)

   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outfactorfile" associated qualifiers
   -odirectory         string     Output directory

   "-outancfile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
infile Phylip factor program input file Input file Required
[-outfile]
(Parameter 2)
outfile Phylip factor program output file Output file <*>.ffactor
Additional (Optional) qualifiers
-anc boolean Put ancestral states in output file Boolean value Yes/No No
-factors boolean Put factors information in output file Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
-outfactorfile outfile Phylip factor data output file (optional) Output file <*>.ffactor
-outancfile outfile Phylip ancestor data output file (optional) Output file <*>.ffactor
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outfactorfile" associated outfile qualifiers
-odirectory string Output directory Any string  
"-outancfile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

ffactor reads character state tree data.

This program factors a data set that contains multistate characters, creating a data set consisting entirely of binary (0,1) characters that, in turn, can be used as input to any of the other discrete character programs in this package, except for PARS. Besides this primary function, FACTOR also provides an easy way of deleting characters from a data set. The input format for FACTOR is very similar to the input format for the other discrete character programs except for the addition of character-state tree descriptions.

Note that this program has no way of converting an unordered multistate character into binary characters. Fortunately, PARS has joined the package, and it enables unordered multistate characters, in which any state can change to any other in one step, to be analyzed with parsimony.

FACTOR is really for a different case, that in which there are multiple states related on a "character state tree", which specifies for each state which other states it can change to. That graph of states is assumed to be a tree, with no loops in it.

First line

The first line of the input file should contain the number of species and the number of multistate characters. This first line is followed by the lines describing the character-state trees, one description per line. The species information constitutes the last part of the file. Any number of lines may be used for a single species.

The first line is free format with the number of species first, separated by at least one blank (space) from the number of multistate characters, which in turn is separated by at least one blank from the options, if present.

Character-state tree descriptions

The character-state trees are described in free format. The character number of the multistate character is given first followed by the description of the tree itself. Each description must be completed on a single line. Each character that is to be factored must have a description, and the characters must be described in the order that they occur in the input, that is, in numerical order.

The tree is described by listing the pairs of character states that are adjacent to each other in the character-state tree. The two character states in each adjacent pair are separated by a colon (":"). If character fifteen has this character state tree for possible states "A", "B", "C", and "D":

                         A ---- B ---- C
                                |
                                |
                                |
                                D

then the character-state tree description would be

                        15  A:B B:C D:B

Note that either symbol may appear first. The ancestral state is identified, if desired, by putting it "adjacent" to a period. If we wanted to root character fifteen at state C:

                         A <--- B <--- C
                                |
                                |
                                V
                                D

we could write

                      15  B:D A:B C:B .:C

Both the order in which the pairs are listed and the order of the symbols in each pair are arbitrary. However, each pair may only appear once in the list. Any symbols may be used for a character state in the input except the character that signals the connection between two states (in the distribution copy this is set to ":"), ".", and, of course, a blank. Blanks are ignored completely in the tree description so that even B:DA:BC:B.:C or B : DA : BC : B. : C would be equivalent to the above example. However, at least one blank must separate the character number from the tree description.

Deleting characters from a data set

If no description line appears in the input for a particular character, then that character will be omitted from the output. If the character number is given on the line, but no character-state tree is provided, then the symbol for the character in the input will be copied directly to the output without change. This is useful for characters that are already coded "0" and "1". Characters can be deleted from a data set simply by listing only those that are to appear in the output.

Terminating the list of tree descriptions

The last character-state tree description should be followed by a line containing the number "999". This terminates processing of the trees and indicates the beginning of the species information.

Species information

The format for the species information is basically identical to the other discrete character programs. The first ten character positions are allotted to the species name (this value may be changed by altering the value of the constant nmlngth at the beginning of the program). The character states follow and may be continued to as many lines as desired. There is no current method for indicating polymorphisms. It is possible to either put blanks between characters or not.

There is a method for indicating uncertainty about states. There is one character value that stands for "unknown". If this appears in the input data then "?" is written out in all the corresponding positions in the output file. The character value that designates "unknown" is given in the constant unkchar at the beginning of the program, and can be changed by changing that constant. It is set to "?" in the distribution copy.

Input files for usage example

File: factor.dat

   4   6
1 A:B B:C
2 A:B B:.
4
5 0:1 1:2 .:0
6 .:# #:$ #:%
999
Alpha     CAW00#
Beta      BBX01%
Gamma     ABY12#
Epsilon   CAZ01$

Output file format

The first line of ffactor output will contain the number of species and the number of binary characters in the factored data set followed by the letter "A" if the A option was specified in the input. If option F was specified, the next line will begin "FACTORS". If option A was specified, the line describing the ancestor will follow next. Finally, the factored characters will be written for each species in the format required for input by the other discrete programs in the package. The maximum length of the output lines is 80 characters, but this maximum length can be changed prior to compilation.

In fact, the format of the output file for the A and F options is not correct for the current release of PHYLIP. We need to change their output to write a factors file and an ancestors file instead of putting the Factors and Ancestors information into the data file.

Output files for usage example

File: factor.ffactor

    4    5
Alpha     CA00#
Beta      BB01%
Gamma     AB12#
Epsilon   CA01$

File: factor.factor


File: factor.ancestor


Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

The output should be checked for error messages. Errors will occur in the character-state tree descriptions if the format is incorrect (colons in the wrong place, etc.), if more than one root is specified, if the tree contains loops (and hence is not a tree), and if the tree is not connected, e.g.


                             A:B B:C D:E

describes

                  A ---- B ---- C          D ---- E

This "tree" is in two unconnected pieces. An error will also occur if a symbol appears in the data set that is not in the tree description for that character. Blanks at the end of lines when the species information is continued to a new line will cause this kind of error.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdollop Dollo and polymorphism parsimony algorithm
fdolpenny Penny algorithm Dollo or polymorphism
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fpars Discrete character parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fconsense.html0000664000175000017500000005263512171064331016231 00000000000000 EMBOSS: fconsense
fconsense

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Majority-rule and strict consensus tree

Description

Computes consensus trees by the majority-rule consensus tree method, which also allows one to easily find the strict consensus tree. Is not able to compute the Adams consensus tree. Trees are input in a tree file in standard nested-parenthesis notation, which is produced by many of the tree estimation programs in the package. This program can be used as the final step in doing bootstrap analyses for many of the methods in the package.

Algorithm

fconsense reads a file of computer-readable trees and prints out (and may also write out onto a file) a consensus tree. At the moment it carries out a family of consensus tree methods called the Ml methods (Margush and McMorris, 1981). These include strict consensus and majority rule consensus. Basically the consensus tree consists of monophyletic groups that occur as often as possible in the data. If a group occurs in more than a fraction l of all the input trees it will definitely appear in the consensus tree.

The tree printed out has at each fork a number indicating how many times the group which consists of the species to the right of (descended from) the fork occurred. Thus if we read in 15 trees and find that a fork has the number 15, that group occurred in all of the trees. The strict consensus tree consists of all groups that occurred 100% of the time, the rest of the resolution being ignored. The tree printed out here includes groups down to 50%, and below it until the tree is fully resolved.

The majority rule consensus tree consists of all groups that occur more than 50% of the time. Any other percentage level between 50% and 100% can also be used, and that is why the program in effect carries out a family of methods. You have to decide on the percentage level, figure out for yourself what number of occurrences that would be (e.g. 15 in the above case for 100%), and resolutely ignore any group below that number. Do not use numbers at or below 50%, because some groups occurring (say) 35% of the time will not be shown on the tree. The collection of all groups that occur 35% or more of the time may include two groups that are mutually self contradictory and cannot appear in the same tree. In this program, as the default method I have included groups that occur less than 50% of the time, working downwards in their frequency of occurrence, as long as they continue to resolve the tree and do not contradict more frequent groups. In this respect the method is similar to the Nelson consensus method (Nelson, 1979) as explicated by Page (1989) although it is not identical to it.

The program can also carry out Strict consensus, Majority Rule consensus without the extension which adds groups until the tree is fully resolved, and other members of the Ml family, where the user supplied the fraction of times the group must appear in the input trees to be included in the consensus tree. For the moment the program cannot carry out any other consensus tree method, such as Adams consensus (Adams, 1972, 1986) or methods based on quadruples of species (Estabrook, McMorris, and Meacham, 1985).

Usage

Here is a sample session with fconsense


% fconsense 
Majority-rule and strict consensus tree
Phylip tree file: consense.dat
Phylip consense program output file [consense.fconsense]: 


Consensus tree written to file "consense.treefile"

Output written to file "consense.fconsense"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Majority-rule and strict consensus tree
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-intreefile]        tree       Phylip tree file
  [-outfile]           outfile    [*.fconsense] Phylip consense program output
                                  file

   Additional (Optional) qualifiers (* if not always prompted):
   -method             menu       [mre] Consensus method (Values: s (strict
                                  consensus tree); mr (Majority Rule); mre
                                  (Majority Rule (extended)); ml (Minimum
                                  fraction (0.5 to 1.0)))
*  -mlfrac             float      [0.5] Fraction (l) of times a branch must
                                  appear (Number from 0.500 to 1.000)
   -root               toggle     [N] Trees to be treated as Rooted
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fconsense] Phylip tree output file
                                  (optional)
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -[no]prntsets       boolean    [Y] Print out the sets of species

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-intreefile]
(Parameter 1)
tree Phylip tree file Phylogenetic tree  
[-outfile]
(Parameter 2)
outfile Phylip consense program output file Output file <*>.fconsense
Additional (Optional) qualifiers
-method list Consensus method
s (strict consensus tree)
mr (Majority Rule)
mre (Majority Rule (extended))
ml (Minimum fraction (0.5 to 1.0))
mre
-mlfrac float Fraction (l) of times a branch must appear Number from 0.500 to 1.000 0.5
-root toggle Trees to be treated as Rooted Toggle value Yes/No No
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fconsense
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-[no]prntsets boolean Print out the sets of species Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fconsense reads any normal sequence USAs.

Input files for usage example

File: consense.dat

(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));
(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));
(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));
(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));
(A,(B,(E,(G,((F,I),(((J,H),D),C))))));
(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));
(A,(B,(E,((F,I),(G,(((J,H),D),C))))));
(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));
(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));

Output file format

fconsense output is a list of the species (in the order in which they appear in the first tree, which is the numerical order used in the program), a list of the subsets that appear in the consensus tree, a list of those that appeared in one or another of the individual trees but did not occur frequently enough to get into the consensus tree, followed by a diagram showing the consensus tree. The lists of subsets consists of a row of symbols, each either "." or "*". The species that are in the set are marked by "*". Every ten species there is a blank, to help you keep track of the alignment of columns. The order of symbols corresponds to the order of species in the species list. Thus a set that consisted of the second, seventh, and eighth out of 13 species would be represented by:

          .*....**.. ...
Note that if the trees are unrooted the final tree will have one group, consisting of every species except the Outgroup (which by default is the first species encountered on the first tree), which always appears. It will not be listed in either of the lists of sets, but it will be shown in the final tree as occurring all of the time. This is hardly surprising: in telling the program that this species is the outgroup we have specified that the set consisting of all of the others is always a monophyletic set. So this is not to be taken as interesting information, despite its dramatic appearance.

Output files for usage example

File: consense.fconsense


Consensus tree program, version 3.69.650

Species in order: 

  1. A
  2. B
  3. H
  4. D
  5. J
  6. G
  7. E
  8. F
  9. I
  10. C



Sets included in the consensus tree

Set (species in order)     How many times out of    9.00

.......**.                   9.00
..********                   9.00
..****.***                   6.00
..***.....                   6.00
..***....*                   6.00
..*.*.....                   4.00
..***..***                   2.00


Sets NOT included in consensus tree:

Set (species in order)     How many times out of    9.00

.....**...                   3.00
.....*****                   3.00
..**......                   3.00
.....****.                   3.00
..****...*                   2.00
.....*.**.                   2.00
..*.******                   2.00
....******                   2.00
...*******                   1.00


Extended majority rule consensus tree

CONSENSUS TREE:
the numbers on the branches indicate the number
of times the partition of the species into the two sets
which are separated by that branch occurred
among the trees, out of   9.00 trees

                                          +-----------------------C
                                          |
                                  +--6.00-|               +-------H
                                  |       |       +--4.00-|
                                  |       +--6.00-|       +-------J
                          +--2.00-|               |
                          |       |               +---------------D
                          |       |
                  +--6.00-|       |                       +-------F
                  |       |       +------------------9.00-|
                  |       |                               +-------I
          +--9.00-|       |
          |       |       +---------------------------------------G
  +-------|       |
  |       |       +-----------------------------------------------E
  |       |
  |       +-------------------------------------------------------B
  |
  +---------------------------------------------------------------A


  remember: this is an unrooted tree!

File: consense.treefile

((((((C:9.00,((H:9.00,J:9.00):4.00,D:9.00):6.00):6.00,(F:9.00,I:9.00):9.00):2.00,G:9.00):6.00,
E:9.00):9.00,B:9.00):9.00,A:9.00);

Branch Lengths on the Consensus Tree?

Note that the lengths on the tree on the output tree file are not branch lengths but the number of times that each group appeared in the input trees. This number is the sum of the weights of the trees in which it appeared, so that if there are 11 trees, ten of them having weight 0.1 and one weight 1.0, a group that appeared in the last tree and in 6 others would be shown as appearing 1.6 times and its branch length will be 1.6. This means that if you take the consensus tree from the output tree file and try to draw it, the branch lengths will be strange. I am often asked how to put the correct branch lengths on these (this is one of our Frequently Asked Questions). There is no simple answer to this. It depends on what "correct" means. For example, if you have a group of species that shows up in 80% of the trees, and the branch leading to that group has average length 0.1 among that 80%, is the "correct" length 0.1? Or is it (0.80 x 0.1)? There is no simple answer. However, if you want to take the consensus tree as an estimate of the true tree (rather than as an indicator of the conflicts among trees) you may be able to use the User Tree (option U) mode of the phylogeny program that you used, and use it to put branch lengths on that tree. Thus, if you used DNAML, you can take the consensus tree, make sure it is an unrooted tree, and feed that to DNAML using the original data set (before bootstrapping) and DNAML's option U. As DNAML wants an unrooted tree, you may have to use RETREE to make the tree unrooted (using the W option of RETREE and choosing the unrooted option within it). Of course you will also want to change the tree file name from "outree" to "intree". If you used a phylogeny program that does not infer branch lengths, you might want to use a different one (such as FITCH or DNAML) to infer the branch lengths, again making sure the tree is unrooted, if the program needs that.

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
econsense Majority-rule and strict consensus tree
ftreedist Calculate distances between trees
ftreedistpair Calculate distance between two sets of trees

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fpars.html0000664000175000017500000006744212171064331015363 00000000000000 EMBOSS: fpars
fpars

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Discrete character parsimony

Description

Multistate discrete-characters parsimony method. Up to 8 states (as well as "?") are allowed. Cannot do Camin-Sokal or Dollo Parsimony. Can cope with multifurcations, reconstruct ancestral states, use character weights, and infer branch lengths.

Algorithm

PARS is a general parsimony program which carries out the Wagner parsimony method with multiple states. Wagner parsimony allows changes among all states. The criterion is to find the tree which requires the minimum number of changes. The Wagner method was originated by Eck and Dayhoff (1966) and by Kluge and Farris (1969). Here are its assumptions:
  1. Ancestral states are unknown unknown.
  2. Different characters evolve independently.
  3. Different lineages evolve independently.
  4. Changes to all other states are equally probable (Wagner).
  5. These changes are a priori improbable over the evolutionary time spans involved in the differentiation of the group in question.
  6. Other kinds of evolutionary event such as retention of polymorphism are far less probable than these state changes.
  7. Rates of evolution in different lineages are sufficiently low that two changes in a long segment of the tree are far less probable than one change in a short segment.

That these are the assumptions of parsimony methods has been documented in a series of papers of mine: (1973a, 1978b, 1979, 1981b, 1983b, 1988b). For an opposing view arguing that the parsimony methods make no substantive assumptions such as these, see the papers by Farris (1983) and Sober (1983a, 1983b), but also read the exchange between Felsenstein and Sober (1986).

Usage

Here is a sample session with fpars


% fpars 
Discrete character parsimony
Input file: pars.dat
Phylip tree file (optional): 
Phylip pars program output file [pars.fpars]: 

Adding species:
   1. Alpha     
   2. Beta      
   3. Gamma     
   4. Delta     
   5. Epsilon   

Doing global rearrangements on the first of the trees tied for best
  !---------!
   .........
   .........

Collapsing best trees
   .

Output written to file "pars.fpars"

Tree also written onto file "pars.treefile"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Discrete character parsimony
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing one or more data sets
  [-intreefile]        tree       Phylip tree file (optional)
  [-outfile]           outfile    [*.fpars] Phylip pars program output file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -method             menu       [Wagner] Choose the parsimony method to use
                                  (Values: w (Wagner); c (Camin-Sokal))
   -maxtrees           integer    [100] Number of trees to save (Integer from
                                  1 to 1000000)
*  -[no]thorough       toggle     [Y] More thorough search
*  -[no]rearrange      boolean    [Y] Rearrange on just one best tree
*  -njumble            integer    [0] Number of times to randomise (Integer 0
                                  or more)
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -outgrno            integer    [0] Species number to use as outgroup
                                  (Integer 0 or more)
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1] Threshold value (Number 1.000 or more)
   -[no]trout          toggle     [Y] Write out trees to tree file
*  -outtreefile        outfile    [*.fpars] Phylip tree output file (optional)
   -printdata          boolean    [N] Print data at start of run
   -[no]progress       boolean    [Y] Print indications of progress of run
   -[no]treeprint      boolean    [Y] Print out tree
   -stepbox            boolean    [N] Print steps at each site
   -ancseq             boolean    [N] Print states at all nodes of tree
*  -[no]dotdiff        boolean    [Y] Use dot differencing to display results

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory3        string     Output directory

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing one or more data sets Discrete states file  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
[-outfile]
(Parameter 3)
outfile Phylip pars program output file Output file <*>.fpars
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-method list Choose the parsimony method to use
w (Wagner)
c (Camin-Sokal)
Wagner
-maxtrees integer Number of trees to save Integer from 1 to 1000000 100
-[no]thorough toggle More thorough search Toggle value Yes/No Yes
-[no]rearrange boolean Rearrange on just one best tree Boolean value Yes/No Yes
-njumble integer Number of times to randomise Integer 0 or more 0
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-outgrno integer Species number to use as outgroup Integer 0 or more 0
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 1.000 or more 1
-[no]trout toggle Write out trees to tree file Toggle value Yes/No Yes
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fpars
-printdata boolean Print data at start of run Boolean value Yes/No No
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
-[no]treeprint boolean Print out tree Boolean value Yes/No Yes
-stepbox boolean Print steps at each site Boolean value Yes/No No
-ancseq boolean Print states at all nodes of tree Boolean value Yes/No No
-[no]dotdiff boolean Use dot differencing to display results Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory3
-odirectory_outfile
string Output directory Any string  
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fpars reads discrete characters input, except that multiple states (up to 9 of them) are allowed. Any characters other than "?" are allowed as states, up to a maximum of 9 states. In fact, one can use different symbols in different columns of the data matrix, although it is rather unlikely that you would want to do that. The symbols you can use are:
  • The digits 0-9,
  • The letters A-Z and a-z,
  • The symbols "!\"#$%&'()*+,-./:;<=>?@\[\\]^_`\{|}~

    (of these, probably only + and - will be of interest to most users).

But note that these do not include blank (" "). Blanks in the input data are simply skipped by the program, so that they can be used to make characters into groups for ease of viewing. The "?" (question mark) symbol has special meaning. It is allowed in the input but is not available as the symbol of a state. Rather, it means that the state is unknown.

PARS can handle both bifurcating and multifurcating trees. In doing its search for most parsimonious trees, it adds species not only by creating new forks in the middle of existing branches, but it also tries putting them at the end of new branches which are added to existing forks. Thus it searches among both bifurcating and multifurcating trees. If a branch in a tree does not have any characters which might change in that branch in the most parsimonious tree, it does not save that tree. Thus in any tree that results, a branch exists only if some character has a most parsimonious reconstruction that would involve change in that branch.

It also saves a number of trees tied for best (you can alter the number it saves using the V option in the menu). When rearranging trees, it tries rearrangements of all of the saved trees. This makes the algorithm slower than earlier programs such as MIX.

(0,1) Discrete character data

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

Input files for usage example

File: pars.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Output file format

fpars output is standard: if option 1 is toggled on, the data is printed out, with the convention that "." means "the same as in the first species". Then comes a list of equally parsimonious trees. Each tree has branch lengths. These are computed using an algorithm published by Hochbaum and Pathria (1997) which I first heard of from Wayne Maddison who invented it independently of them. This algorithm averages the number of reconstructed changes of state over all sites a over all possible most parsimonious placements of the changes of state among branches. Note that it does not correct in any way for multiple changes that overlay each other.

If option 2 is toggled on a table of the number of changes of state required in each character is also printed. If option 5 is toggled on, a table is printed out after each tree, showing for each branch whether there are known to be changes in the branch, and what the states are inferred to have been at the top end of the branch. This is a reconstruction of the ancestral sequences in the tree. If you choose option 5, a menu item D appears which gives you the opportunity to turn off dot-differencing so that complete ancestral sequences are shown. If the inferred state is a "?", there will be multiple equally-parsimonious assignments of states; the user must work these out for themselves by hand. If option 6 is left in its default state the trees found will be written to a tree file, so that they are available to be used in other programs. If the program finds multiple trees tied for best, all of these are written out onto the output tree file. Each is followed by a numerical weight in square brackets (such as [0.25000]). This is needed when we use the trees to make a consensus tree of the results of bootstrapping or jackknifing, to avoid overrepresenting replicates that find many tied trees.

If the U (User Tree) option is used and more than one tree is supplied, the program also performs a statistical test of each of these trees against the best tree. This test, which is a version of the test proposed by Alan Templeton (1983) and evaluated in a test case by me (1985a). It is closely parallel to a test using log likelihood differences due to Kishino and Hasegawa (1989), and uses the mean and variance of step differences between trees, taken across sites. If the mean is more than 1.96 standard deviations different then the trees are declared significantly different. The program prints out a table of the steps for each tree, the differences of each from the best one, the variance of that quantity as determined by the step differences at individual sites, and a conclusion as to whether that tree is or is not significantly worse than the best one. It is important to understand that the test assumes that all the discrete characters are evolving independently, which is unlikely to be true for

If there are more than two trees, the test done is an extension of the KHT test, due to Shimodaira and Hasegawa (1999). They pointed out that a correction for the number of trees was necessary, and they introduced a resampling method to make this correction. In the version used here the variances and covariances of the sums of steps across characters are computed for all pairs of trees. To test whether the difference between each tree and the best one is larger than could have been expected if they all had the same expected number of steps, numbers of steps for all trees are sampled with these covariances and equal means (Shimodaira and Hasegawa's "least favorable hypothesis"), and a P value is computed from the fraction of times the difference between the tree's value and the lowest number of steps exceeds that actually observed. Note that this sampling needs random numbers, and so the program will prompt the user for a random number seed if one has not already been supplied. With the two-tree KHT test no random numbers are used.

In either the KHT or the SH test the program prints out a table of the number of steps for each tree, the differences of each from the lowest one, the variance of that quantity as determined by the differences of the numbers of steps at individual characters, and a conclusion as to whether that tree is or is not significantly worse than the best one.

Option 6 in the menu controls whether the tree estimated by the program is written onto a tree file. The default name of this output tree file is "outtree". If the U option is in effect, all the user-defined trees are written to the output tree file.

Output files for usage example

File: pars.fpars


Discrete character parsimony algorithm, version 3.69.650


One most parsimonious tree found:


                            +Epsilon   
           +----------------3  
  +--------2                +-------------------------Delta     
  |        |  
  |        +Gamma     
  |  
  1----------------Beta      
  |  
  +Alpha     


requires a total of      8.000

  between      and       length
  -------      ---       ------
     1           2         1.00
     2           3         2.00
     3      Epsilon        0.00
     3      Delta          3.00
     2      Gamma          0.00
     1      Beta           2.00
     1      Alpha          0.00

File: pars.treefile

(((Epsilon:0.00,Delta:3.00):2.00,Gamma:0.00):1.00,Beta:2.00,Alpha:0.00);

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
eclique Largest clique program
edollop Dollo and polymorphism parsimony algorithm
edolpenny Penny algorithm Dollo or polymorphism
efactor Multistate to binary recoding program
emix Mixed parsimony algorithm
epenny Penny algorithm, branch-and-bound
fclique Largest clique program
fdollop Dollo and polymorphism parsimony algorithm
fdolpenny Penny algorithm Dollo or polymorphism
ffactor Multistate to binary recoding program
fmix Mixed parsimony algorithm
fmove Interactive mixed method parsimony
fpenny Penny algorithm, branch-and-bound

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/fdolmove.html0000664000175000017500000005451512171064331016060 00000000000000 EMBOSS: fdolmove
fdolmove

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Interactive Dollo or polymorphism parsimony

Description

Interactive construction of phylogenies from discrete character data with two states (0 and 1) using the Dollo or polymorphism parsimony criteria. Evaluates parsimony and compatibility criteria for those phylogenies and displays reconstructed states throughout the tree. This can be used to find parsimony or compatibility estimates by hand.

Algorithm

DOLMOVE is an interactive parsimony program which uses the Dollo and Polymorphism parsimony criteria. It is inspired on Wayne Maddison and David Maddison's marvellous program MacClade, which is written for Apple MacIntosh computers. DOLMOVE reads in a data set which is prepared in almost the same format as one for the Dollo and polymorhism parsimony program DOLLOP. It allows the user to choose an initial tree, and displays this tree on the screen. The user can look at different characters and the way their states are distributed on that tree, given the most parsimonious reconstruction of state changes for that particular tree. The user then can specify how the tree is to be rearraranged, rerooted or written out to a file. By looking at different rearrangements of the tree the user can manually search for the most parsimonious tree, and can get a feel for how different characters are affected by changes in the tree topology.

This program is compatible with fewer computer systems than the other programs in PHYLIP. It can be adapted to PCDOS systems or to any system whose screen or terminals emulate DEC VT100 terminals (such as Telnet programs for logging in to remote computers over a TCP/IP network, VT100-compatible windows in the X windowing system, and any terminal compatible with ANSI standard terminals). For any other screen types, there is a generic option which does not make use of screen graphics characters to display the character states. This will be less effective, as the states will be less easy to see when displayed.

Usage

Here is a sample session with fdolmove


% fdolmove 
Interactive Dollo or polymorphism parsimony
Phylip character discrete states file: dolmove.dat
Phylip tree file (optional): 
NEXT? (R # + - S . T U W O F H J K L C ? X Q) (? for Help): Q
Do you want to write out the tree to a file? (Y or N): Y


Interactive Dollo or polymorphism parsimony, version 3.69.650

 5 species,   6 characters


Computing steps needed for compatibility in sites ...


(unrooted)                           5.0 Steps             4 chars compatible
Dollo               
  ,-----------5:Epsilon   
--9  
  !  ,--------4:Delta     
  `--8  
     !  ,-----3:Gamma     
     `--7  
        !  ,--2:Beta      
        `--6  
           `--1:Alpha     


Tree written to file "dolmove.treefile"


Go to the input files for this example
Go to the output files for this example

Command line arguments

Interactive Dollo or polymorphism parsimony
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing data set
  [-intreefile]        tree       Phylip tree file (optional)

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -ancfile            properties Ancestral states file
   -factorfile         properties Factors file
   -method             menu       [d] Parsimony method (Values: d (Dollo); p
                                  (Polymorphism))
   -dothreshold        toggle     [N] Use threshold parsimony
*  -threshold          float      [1] Threshold value (Number 0.000 or more)
   -initialtree        menu       [Arbitary] Initial tree (Values: a
                                  (Arbitary); u (User); s (Specify))
   -screenwidth        integer    [80] Width of terminal screen in characters
                                  (Any integer value)
   -screenlines        integer    [24] Number of lines on screen (Any integer
                                  value)
   -outtreefile        outfile    [*.fdolmove] Phylip tree output file
                                  (optional)

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outtreefile" associated qualifiers
   -odirectory         string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing data set Discrete states file  
[-intreefile]
(Parameter 2)
tree Phylip tree file (optional) Phylogenetic tree  
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-ancfile properties Ancestral states file Property value(s)  
-factorfile properties Factors file Property value(s)  
-method list Parsimony method
d (Dollo)
p (Polymorphism)
d
-dothreshold toggle Use threshold parsimony Toggle value Yes/No No
-threshold float Threshold value Number 0.000 or more 1
-initialtree list Initial tree
a (Arbitary)
u (User)
s (Specify)
Arbitary
-screenwidth integer Width of terminal screen in characters Any integer value 80
-screenlines integer Number of lines on screen Any integer value 24
-outtreefile outfile Phylip tree output file (optional) Output file <*>.fdolmove
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outtreefile" associated outfile qualifiers
-odirectory string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

fdolmove reads discrete character data with "?", "P", "B" states allowed. .

(0,1) Discrete character data

These programs are intended for the use of morphological systematists who are dealing with discrete characters, or by molecular evolutionists dealing with presence-absence data on restriction sites. One of the programs (PARS) allows multistate characters, with up to 8 states, plus the unknown state symbol "?". For the others, the characters are assumed to be coded into a series of (0,1) two-state characters. For most of the programs there are two other states possible, "P", which stands for the state of Polymorphism for both states (0 and 1), and "?", which stands for the state of ignorance: it is the state "unknown", or "does not apply". The state "P" can also be denoted by "B", for "both".

There is a method invented by Sokal and Sneath (1963) for linear sequences of character states, and fully developed for branching sequences of character states by Kluge and Farris (1969) for recoding a multistate character into a series of two-state (0,1) characters. Suppose we had a character with four states whose character-state tree had the rooted form:

               1 ---> 0 ---> 2
                      |
                      |
                      V
                      3

so that 1 is the ancestral state and 0, 2 and 3 derived states. We can represent this as three two-state characters:

                Old State           New States
                --- -----           --- ------
                    0                  001
                    1                  000
                    2                  011
                    3                  101

The three new states correspond to the three arrows in the above character state tree. Possession of one of the new states corresponds to whether or not the old state had that arrow in its ancestry. Thus the first new state corresponds to the bottommost arrow, which only state 3 has in its ancestry, the second state to the rightmost of the top arrows, and the third state to the leftmost top arrow. This coding will guarantee that the number of times that states arise on the tree (in programs MIX, MOVE, PENNY and BOOT) or the number of polymorphic states in a tree segment (in the Polymorphism option of DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT) will correctly correspond to what would have been the case had our programs been able to take multistate characters into account. Although I have shown the above character state tree as rooted, the recoding method works equally well on unrooted multistate characters as long as the connections between the states are known and contain no loops.

However, in the default option of programs DOLLOP, DOLMOVE, DOLPENNY and DOLBOOT the multistate recoding does not necessarily work properly, as it may lead the program to reconstruct nonexistent state combinations such as 010. An example of this problem is given in my paper on alternative phylogenetic methods (1979).

If you have multistate character data where the states are connected in a branching "character state tree" you may want to do the binary recoding yourself. Thanks to Christopher Meacham, the package contains a program, FACTOR, which will do the recoding itself. For details see the documentation file for FACTOR.

We now also have the program PARS, which can do parsimony for unordered character states.

Input files for usage example

File: dolmove.dat

     5    6
Alpha     110110
Beta      110000
Gamma     100110
Delta     001001
Epsilon   001110

Output file format

fdolmove output:

If the A option is used, then the program will infer, for any character whose ancestral state is unknown ("?") whether the ancestral state 0 or 1 will give the fewest changes (according to the criterion in use). If these are tied, then it may not be possible for the program to infer the state in the internal nodes, and many of these will be shown as "?". If the A option is not used, then the program will assume 0 as the ancestral state.

When reconstructing the placement of forward changes and reversions under the Dollo method, keep in mind that each polymorphic state in the input data will require one "last minute" reversion. This is included in the counts. Thus if we have both states 0 and 1 at a tip of the tree the program will assume that the lineage had state 1 up to the last minute, and then state 0 arose in that population by reversion, without loss of state 1.

When DOLMOVE calculates the number of characters compatible with the tree, it will take the F option into account and count the multistate characters as units, counting a character as compatible with the tree only when all of the binary characters corresponding to it are compatible with the tree.

Output files for usage example

File: dolmove.treefile

(Epsilon,(Delta,(Gamma,(Beta,Alpha))));

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestboot Bootstrapped restriction sites algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/Makefile.am0000664000175000017500000000125711137346244015415 00000000000000pkgdata_DATA = index.html \ fclique.html \ fconsense.html fcontml.html fcontrast.html \ fdiscboot.html fdnacomp.html fdnadist.html fdnainvar.html \ fdnaml.html fdnamlk.html fdnamove.html fdnapars.html fdnapenny.html \ fdollop.html fdolmove.html fdolpenny.html \ fdrawgram.html fdrawtree.html \ ffactor.html ffitch.html ffreqboot.html \ fgendist.html fkitsch.html \ fmix.html fmove.html fneighbor.html \ fpars.html fpenny.html fproml.html fpromlk.html \ fprotdist.html fprotpars.html \ frestboot.html frestdist.html frestml.html fretree.html \ fseqboot.html fseqbootall.html \ ftreedist.html ftreedistpair.html pkgdatadir=$(prefix)/share/EMBOSS/doc/html/embassy/phylipnew PHYLIPNEW-3.69.650/emboss_doc/html/Makefile.in0000664000175000017500000003417212171071677015434 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = emboss_doc/html DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgdatadir)" DATA = $(pkgdata_DATA) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkgdatadir = $(prefix)/share/EMBOSS/doc/html/embassy/phylipnew ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ pkgdata_DATA = index.html \ fclique.html \ fconsense.html fcontml.html fcontrast.html \ fdiscboot.html fdnacomp.html fdnadist.html fdnainvar.html \ fdnaml.html fdnamlk.html fdnamove.html fdnapars.html fdnapenny.html \ fdollop.html fdolmove.html fdolpenny.html \ fdrawgram.html fdrawtree.html \ ffactor.html ffitch.html ffreqboot.html \ fgendist.html fkitsch.html \ fmix.html fmove.html fneighbor.html \ fpars.html fpenny.html fproml.html fpromlk.html \ fprotdist.html fprotpars.html \ frestboot.html frestdist.html frestml.html fretree.html \ fseqboot.html fseqbootall.html \ ftreedist.html ftreedistpair.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_doc/html/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_doc/html/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgdataDATA: $(pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) tags: TAGS TAGS: ctags: CTAGS CTAGS: cscope cscopelist: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(pkgdatadir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-pkgdataDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-pkgdataDATA .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-pkgdataDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ uninstall uninstall-am uninstall-pkgdataDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_doc/html/frestboot.html0000664000175000017500000007740712171064331016261 00000000000000 EMBOSS: frestboot
frestboot

 

Wiki

The master copies of EMBOSS documentation are available at http://emboss.open-bio.org/wiki/Appdocs on the EMBOSS Wiki.

Please help by correcting and extending the Wiki pages.

Function

Bootstrapped restriction sites algorithm

Description

Reads in a data set, and produces multiple data sets from it by bootstrap resampling. Since most programs in the current version of the package allow processing of multiple data sets, this can be used together with the consensus tree program CONSENSE to do bootstrap (or delete-half-jackknife) analyses with most of the methods in this package. This program also allows the Archie/Faith technique of permutation of species within characters. It can also rewrite a data set to convert it from between the PHYLIP Interleaved and Sequential forms, and into a preliminary version of a new XML sequence alignment format which is under development

Algorithm

FRESTBOOT is a restriction site specific version of SEQBOOT.

SEQBOOT is a general bootstrapping and data set translation tool. It is intended to allow you to generate multiple data sets that are resampled versions of the input data set. Since almost all programs in the package can analyze these multiple data sets, this allows almost anything in this package to be bootstrapped, jackknifed, or permuted. SEQBOOT can handle molecular sequences, binary characters, restriction sites, or gene frequencies. It can also convert data sets between Sequential and Interleaved format, and into the NEXUS format or into a new XML sequence alignment format.

To carry out a bootstrap (or jackknife, or permutation test) with some method in the package, you may need to use three programs. First, you need to run SEQBOOT to take the original data set and produce a large number of bootstrapped or jackknifed data sets (somewhere between 100 and 1000 is usually adequate). Then you need to find the phylogeny estimate for each of these, using the particular method of interest. For example, if you were using DNAPARS you would first run SEQBOOT and make a file with 100 bootstrapped data sets. Then you would give this file the proper name to have it be the input file for DNAPARS. Running DNAPARS with the M (Multiple Data Sets) menu choice and informing it to expect 100 data sets, you would generate a big output file as well as a treefile with the trees from the 100 data sets. This treefile could be renamed so that it would serve as the input for CONSENSE. When CONSENSE is run the majority rule consensus tree will result, showing the outcome of the analysis.

This may sound tedious, but the run of CONSENSE is fast, and that of SEQBOOT is fairly fast, so that it will not actually take any longer than a run of a single bootstrap program with the same original data and the same number of replicates. This is not very hard and allows bootstrapping or jackknifing on many of the methods in this package. The same steps are necessary with all of them. Doing things this way some of the intermediate files (the tree file from the DNAPARS run, for example) can be used to summarize the results of the bootstrap in other ways than the majority rule consensus method does.

If you are using the Distance Matrix programs, you will have to add one extra step to this, calculating distance matrices from each of the replicate data sets, using DNADIST or GENDIST. So (for example) you would run SEQBOOT, then run DNADIST using the output of SEQBOOT as its input, then run (say) NEIGHBOR using the output of DNADIST as its input, and then run CONSENSE using the tree file from NEIGHBOR as its input.

The resampling methods available are:

  • The bootstrap. Bootstrapping was invented by Bradley Efron in 1979, and its use in phylogeny estimation was introduced by me (Felsenstein, 1985b; see also Penny and Hendy, 1985). It involves creating a new data set by sampling N characters randomly with replacement, so that the resulting data set has the same size as the original, but some characters have been left out and others are duplicated. The random variation of the results from analyzing these bootstrapped data sets can be shown statistically to be typical of the variation that you would get from collecting new data sets. The method assumes that the characters evolve independently, an assumption that may not be realistic for many kinds of data.
  • The partial bootstrap.. This is the bootstrap where fewer than the full number of characters are sampled. The user is asked for the fraction of characters to be sampled. It is primarily useful in carrying out Zharkikh and Li's (1995) Complete And Partial Bootstrap method, and Shimodaira's (2002) AU method, both of which correct the bias of bootstrap P values.
  • Block-bootstrapping. One pattern of departure from indeopendence of character evolution is correlation of evolution in adjacent characters. When this is thought to have occurred, we can correct for it by samopling, not individual characters, but blocks of adjacent characters. This is called a block bootstrap and was introduced by Künsch (1989). If the correlations are believed to extend over some number of characters, you choose a block size, B, that is larger than this, and choose N/B blocks of size B. In its implementation here the block bootstrap "wraps around" at the end of the characters (so that if a block starts in the last B-1 characters, it continues by wrapping around to the first character after it reaches the last character). Note also that if you have a DNA sequence data set of an exon of a coding region, you can ensure that equal numbers of first, second, and third coding positions are sampled by using the block bootstrap with B = 3.
  • Partial block-bootstrapping. Similar to partial bootstrapping except sampling blocks rather than single characters.
  • Delete-half-jackknifing.. This alternative to the bootstrap involves sampling a random half of the characters, and including them in the data but dropping the others. The resulting data sets are half the size of the original, and no characters are duplicated. The random variation from doing this should be very similar to that obtained from the bootstrap. The method is advocated by Wu (1986). It was mentioned by me in my bootstrapping paper (Felsenstein, 1985b), and has been available for many years in this program as an option. Note that, for the present, block-jackknifing is not available, because I cannot figure out how to do it straightforwardly when the block size is not a divisor of the number of characters.
  • Delete-fraction jackknifing. Jackknifing is advocated by Farris et. al. (1996) but as deleting a fraction 1/e (1/2.71828). This retains too many characters and will lead to overconfidence in the resulting groups when there are conflicting characters. However it is made available here as an option, with the user asked to supply the fraction of characters that are to be retained.
  • Permuting species within characters. This method of resampling (well, OK, it may not be best to call it resampling) was introduced by Archie (1989) and Faith (1990; see also Faith and Cranston, 1991). It involves permuting the columns of the data matrix separately. This produces data matrices that have the same number and kinds of characters but no taxonomic structure. It is used for different purposes than the bootstrap, as it tests not the variation around an estimated tree but the hypothesis that there is no taxonomic structure in the data: if a statistic such as number of steps is significantly smaller in the actual data than it is in replicates that are permuted, then we can argue that there is some taxonomic structure in the data (though perhaps it might be just the presence of aa pair of sibling species).
  • Permuting characters. This simply permutes the order of the characters, the same reordering being applied to all species. For many methods of tree inference this will make no difference to the outcome (unless one has rates of evolution correlated among adjacent sites). It is included as a possible step in carrying out a permutation test of homogeneity of characters (such as the Incongruence Length Difference test).
  • Permuting characters separately for each species. This is a method introduced by Steel, Lockhart, and Penny (1993) to permute data so as to destroy all phylogenetic structure, while keeping the base composition of each species the same as before. It shuffles the character order separately for each species.
  • Rewriting. This is not a resampling or permutation method: it simply rewrites the data set into a different format. That format can be the PHYLIP format. For molecular sequences and discrete morphological character it can also be the NEXUS format. For molecular sequences one other format is available, a new (and nonstandard) XML format of our own devising. When the PHYLIP format is chosen the data set is coverted between Interleaved and Sequential format. If it was read in as Interleaved sequences, it will be written out as Sequential format, and vice versa. The NEXUS format for molecular sequences is always written as interleaved sequences. The XML format is different from (though similar to) a number of other XML sequence alignment formats. An example will be found below. Here is a table to links to those other XML alignment formats:
    Andrew Rambaut's BEAST XML format http://evolve.zoo.ox.ac.uk/beast/introXML.html and http://evolve.zoo.ox.ac.uk/beast/referenindex.html A format for alignments. There is also a format for phylogenies described there.
    MSAML M http://xml.coverpages.org/msaml-desc-dec.html Defined by Paul Gordon of University of Calgary. See his big list of molecular biology XML projects.
    BSML http://www.bsml.org/resources/default.asp Bioinformatic Sequence Markup Language includes a multiple sequence alignment XML format

Usage

Here is a sample session with frestboot


% frestboot -seed 3 
Bootstrapped restriction sites algorithm
Input file: restboot.dat
Phylip seqboot_rest program output file [restboot.frestboot]: 


completed replicate number   10
completed replicate number   20
completed replicate number   30
completed replicate number   40
completed replicate number   50
completed replicate number   60
completed replicate number   70
completed replicate number   80
completed replicate number   90
completed replicate number  100

Output written to file "restboot.frestboot"

Done.


Go to the input files for this example
Go to the output files for this example

Command line arguments

Bootstrapped restriction sites algorithm
Version: EMBOSS:6.6.0.0

   Standard (Mandatory) qualifiers:
  [-infile]            discretestates File containing one or more sets of
                                  restriction data
  [-outfile]           outfile    [*.frestboot] Phylip seqboot_rest program
                                  output file

   Additional (Optional) qualifiers (* if not always prompted):
   -weights            properties Weights file
   -test               menu       [b] Choose test (Values: b (Bootstrap); j
                                  (Jackknife); c (Permute species for each
                                  character); o (Permute character order); s
                                  (Permute within species); r (Rewrite data))
*  -regular            toggle     [N] Altered sampling fraction
*  -fracsample         float      [100.0] Samples as percentage of sites
                                  (Number from 0.100 to 100.000)
*  -rewriteformat      menu       [p] Output format (Values: p (PHYLIP); n
                                  (NEXUS); x (XML))
*  -blocksize          integer    [1] Block size for bootstraping (Integer 1
                                  or more)
*  -reps               integer    [100] How many replicates (Integer 1 or
                                  more)
*  -justweights        menu       [d] Write out datasets or just weights
                                  (Values: d (Datasets); w (Weights))
   -enzymes            boolean    [N] Is the number of enzymes present in
                                  input file
*  -seed               integer    [1] Random number seed between 1 and 32767
                                  (must be odd) (Integer from 1 to 32767)
   -printdata          boolean    [N] Print out the data at start of run
*  -[no]dotdiff        boolean    [Y] Use dot-differencing
   -[no]progress       boolean    [Y] Print indications of progress of run

   Advanced (Unprompted) qualifiers: (none)
   Associated qualifiers:

   "-outfile" associated qualifiers
   -odirectory2        string     Output directory

   General qualifiers:
   -auto               boolean    Turn off prompts
   -stdout             boolean    Write first file to standard output
   -filter             boolean    Read first file from standard input, write
                                  first file to standard output
   -options            boolean    Prompt for standard and additional values
   -debug              boolean    Write debug output to program.dbg
   -verbose            boolean    Report some/full command line options
   -help               boolean    Report command line options and exit. More
                                  information on associated and general
                                  qualifiers can be found with -help -verbose
   -warning            boolean    Report warnings
   -error              boolean    Report errors
   -fatal              boolean    Report fatal errors
   -die                boolean    Report dying program messages
   -version            boolean    Report version number and exit

Qualifier Type Description Allowed values Default
Standard (Mandatory) qualifiers
[-infile]
(Parameter 1)
discretestates File containing one or more sets of restriction data Discrete states file  
[-outfile]
(Parameter 2)
outfile Phylip seqboot_rest program output file Output file <*>.frestboot
Additional (Optional) qualifiers
-weights properties Weights file Property value(s)  
-test list Choose test
b (Bootstrap)
j (Jackknife)
c (Permute species for each character)
o (Permute character order)
s (Permute within species)
r (Rewrite data)
b
-regular toggle Altered sampling fraction Toggle value Yes/No No
-fracsample float Samples as percentage of sites Number from 0.100 to 100.000 100.0
-rewriteformat list Output format
p (PHYLIP)
n (NEXUS)
x (XML)
p
-blocksize integer Block size for bootstraping Integer 1 or more 1
-reps integer How many replicates Integer 1 or more 100
-justweights list Write out datasets or just weights
d (Datasets)
w (Weights)
d
-enzymes boolean Is the number of enzymes present in input file Boolean value Yes/No No
-seed integer Random number seed between 1 and 32767 (must be odd) Integer from 1 to 32767 1
-printdata boolean Print out the data at start of run Boolean value Yes/No No
-[no]dotdiff boolean Use dot-differencing Boolean value Yes/No Yes
-[no]progress boolean Print indications of progress of run Boolean value Yes/No Yes
Advanced (Unprompted) qualifiers
(none)
Associated qualifiers
"-outfile" associated outfile qualifiers
-odirectory2
-odirectory_outfile
string Output directory Any string  
General qualifiers
-auto boolean Turn off prompts Boolean value Yes/No N
-stdout boolean Write first file to standard output Boolean value Yes/No N
-filter boolean Read first file from standard input, write first file to standard output Boolean value Yes/No N
-options boolean Prompt for standard and additional values Boolean value Yes/No N
-debug boolean Write debug output to program.dbg Boolean value Yes/No N
-verbose boolean Report some/full command line options Boolean value Yes/No Y
-help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose Boolean value Yes/No N
-warning boolean Report warnings Boolean value Yes/No Y
-error boolean Report errors Boolean value Yes/No Y
-fatal boolean Report fatal errors Boolean value Yes/No Y
-die boolean Report dying program messages Boolean value Yes/No Y
-version boolean Report version number and exit Boolean value Yes/No N

Input file format

frestboot data files read by SEQBOOT are the standard ones for the various kinds of data. For molecular sequences the sequences may be either interleaved or sequential, and similarly for restriction sites. Restriction sites data may either have or not have the third argument, the number of restriction enzymes used. Discrete morphological characters are always assumed to be in sequential format. Gene frequencies data start with the number of species and the number of loci, and then follow that by a line with the number of alleles at each locus. The data for each locus may either have one entry for each allele, or omit one allele at each locus. The details of the formats are given in the main documentation file, and in the documentation files for the groups of programsreads any normal sequence USAs.

Input files for usage example

File: restboot.dat

   5   13   2
Alpha     ++-+-++--+++-
Beta      ++++--+--+++-
Gamma     -+--+-++-+-++
Delta     ++-+----++---
Epsilon   ++++----++---

Output file format

frestboot output will contain the data sets generated by the resampling process. Note that, when Gene Frequencies data is used or when Discrete Morphological characters with the Factors option are used, the number of characters in each data set may vary. It may also vary if there are an odd number of characters or sites and the Delete-Half-Jackknife resampling method is used, for then there will be a 50% chance of choosing (n+1)/2 characters and a 50% chance of choosing (n-1)/2 characters.

The Factors option causes the characters to be resampled together. If (say) three adjacent characters all have the same factors characters, so that they all are understood to be recoding one multistate character, they will be resampled together as a group.

The order of species in the data sets in the output file will vary randomly. This is a precaution to help the programs that analyze these data avoid any result which is sensitive to the input order of species from showing up repeatedly and thus appearing to have evidence in its favor.

The numerical options 1 and 2 in the menu also affect the output file. If 1 is chosen (it is off by default) the program will print the original input data set on the output file before the resampled data sets. I cannot actually see why anyone would want to do this. Option 2 toggles the feature (on by default) that prints out up to 20 times during the resampling process a notification that the program has completed a certain number of data sets. Thus if 100 resampled data sets are being produced, every 5 data sets a line is printed saying which data set has just been completed. This option should be turned off if the program is running in background and silence is desirable. At the end of execution the program will always (whatever the setting of option 2) print a couple of lines saying that output has been written to the output file.

Output files for usage example

File: restboot.frestboot

    5    13
Alpha     +--++-+++- -++
Beta      +++++----- -++
Gamma     -----+---+ +++
Delta     +--++----- -+-
Epsilon   +++++----- -+-
    5    13
Alpha     ++----+++- +++
Beta      +++-----+- +++
Gamma     -+-+++--++ ++-
Delta     ++-------- ++-
Epsilon   +++------- ++-
    5    13
Alpha     ++++++-+++ ---
Beta      ++++++-+++ ---
Gamma     --++-++--- +++
Delta     +++++----- ---
Epsilon   +++++----- ---
    5    13
Alpha     ++++-+++++ ---
Beta      ++-+-+++++ ---
Gamma     ---+-++--- +++
Delta     ++--+++--- ---
Epsilon   ++--+++--- ---
    5    13
Alpha     +-+++-++++ +--
Beta      +++++-++++ +--
Gamma     ----+----- +++
Delta     +-++-+---- ---
Epsilon   ++++-+---- ---
    5    13
Alpha     +++------- +++
Beta      +++------- +++
Gamma     +--++++++- +-+
Delta     +++------+ +--
Epsilon   +++------+ +--
    5    13
Alpha     ++++-+--++ ++-
Beta      ++++----++ ++-
Gamma     --+++---+- -++
Delta     ++++--+++- ---
Epsilon   ++++--+++- ---
    5    13
Alpha     +--+---+++ +--
Beta      ++++---+++ +--
Gamma     ----++-+-+ +++
Delta     +--+--++-- ---
Epsilon   ++++--++-- ---
    5    13
Alpha     +++--++--+ ++-


  [Part of this file has been deleted for brevity]

Gamma     -+--++-+++ -++
Delta     ++++------ +++
Epsilon   ++++------ +++
    5    13
Alpha     +++---+-++ +++
Beta      +++---+-++ +++
Gamma     ---++++-++ -++
Delta     +++----+++ ---
Epsilon   +++----+++ ---
    5    13
Alpha     ++++--+--+ +--
Beta      +++++----+ +--
Gamma     ---+-+-+++ +++
Delta     ++++-----+ +--
Epsilon   +++++----+ +--
    5    13
Alpha     +-----+--- +++
Beta      +++++++--- +++
Gamma     +------+++ +--
Delta     +-----+--- +--
Epsilon   +++++++--- +--
    5    13
Alpha     +-++--+-++ +--
Beta      ++++--+-++ +--
Gamma     +---++++-- -++
Delta     +-++------ ---
Epsilon   ++++------ ---
    5    13
Alpha     +++-+-++++ ++-
Beta      +++---++++ ++-
Gamma     --++--++-- +++
Delta     +++--+++-- ---
Epsilon   +++--+++-- ---
    5    13
Alpha     ++-+++--++ +--
Beta      +++-++--++ +--
Gamma     ----+++--- +++
Delta     ++-----+-- ---
Epsilon   +++----+-- ---
    5    13
Alpha     +---++---- ++-
Beta      ++---+---- ++-
Gamma     --++-+---- -++
Delta     +-----++++ ---
Epsilon   ++----++++ ---
    5    13
Alpha     +++-++++-+ +++
Beta      +++++----+ +++
Gamma     -++------+ +++
Delta     +++-+---++ ---
Epsilon   +++++---++ ---

Data files

None

Notes

None.

References

None.

Warnings

None.

Diagnostic Error Messages

None.

Exit status

It always exits with status 0.

Known bugs

None.

See also

Program name Description
distmat Create a distance matrix from a multiple sequence alignment
ednacomp DNA compatibility algorithm
ednadist Nucleic acid sequence distance matrix program
ednainvar Nucleic acid sequence invariants method
ednaml Phylogenies from nucleic acid maximum likelihood
ednamlk Phylogenies from nucleic acid maximum likelihood with clock
ednapars DNA parsimony algorithm
ednapenny Penny algorithm for DNA
eprotdist Protein distance algorithm
eprotpars Protein parsimony algorithm
erestml Restriction site maximum likelihood method
eseqboot Bootstrapped sequences algorithm
fdiscboot Bootstrapped discrete sites algorithm
fdnacomp DNA compatibility algorithm
fdnadist Nucleic acid sequence distance matrix program
fdnainvar Nucleic acid sequence invariants method
fdnaml Estimate nucleotide phylogeny by maximum likelihood
fdnamlk Estimates nucleotide phylogeny by maximum likelihood
fdnamove Interactive DNA parsimony
fdnapars DNA parsimony algorithm
fdnapenny Penny algorithm for DNA
fdolmove Interactive Dollo or polymorphism parsimony
ffreqboot Bootstrapped genetic frequencies algorithm
fproml Protein phylogeny by maximum likelihood
fpromlk Protein phylogeny by maximum likelihood
fprotdist Protein distance algorithm
fprotpars Protein parsimony algorithm
frestdist Calculate distance matrix from restriction sites or fragments
frestml Restriction site maximum likelihood method
fseqboot Bootstrapped sequences algorithm
fseqbootall Bootstrapped sequences algorithm

Author(s)

This program is an EMBOSS conversion of a program written by Joe Felsenstein as part of his PHYLIP package.

Please report all bugs to the EMBOSS bug team (emboss-bug © emboss.open-bio.org) not to the original author.

History

Written (2004) - Joe Felsenstein, University of Washington.

Converted (August 2004) to an EMBASSY program by the EMBOSS team.

Target users

This program is intended to be used by everyone and everything, from naive users to embedded scripts.

Comments

None
PHYLIPNEW-3.69.650/emboss_doc/html/.cvsignore0000664000175000017500000000002511326104646015347 00000000000000Makefile Makefile.in PHYLIPNEW-3.69.650/emboss_doc/Makefile0000664000175000017500000004553012171071711014050 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # emboss_doc/Makefile. Generated from Makefile.in by configure. # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgdatadir = $(datadir)/PHYLIPNEW pkgincludedir = $(includedir)/PHYLIPNEW pkglibdir = $(libdir)/PHYLIPNEW pkglibexecdir = $(libexecdir)/PHYLIPNEW am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = x86_64-unknown-linux-gnu host_triplet = x86_64-unknown-linux-gnu subdir = emboss_doc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ install-html-recursive install-info-recursive \ install-pdf-recursive install-ps-recursive install-recursive \ installcheck-recursive installdirs-recursive pdf-recursive \ ps-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ distdir ETAGS = etags CTAGS = ctags DIST_SUBDIRS = $(SUBDIRS) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" ACLOCAL = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run aclocal-1.12 AMTAR = $${TAR-tar} ANT = AR = ar AUTOCONF = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoconf AUTOHEADER = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run autoheader AUTOMAKE = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run automake-1.12 AWK = gawk CC = gcc CCDEPMODE = depmode=gcc3 CFLAGS = -O2 CPP = gcc -E CPPFLAGS = -DAJ_LinuxLF -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 CXX = g++ CXXCPP = g++ -E CXXDEPMODE = depmode=gcc3 CXXFLAGS = -g -O2 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps DEVWARN_CFLAGS = DLLTOOL = false DSYMUTIL = DUMPBIN = ECHO_C = ECHO_N = -n ECHO_T = EGREP = /usr/bin/grep -E EXEEXT = FGREP = /usr/bin/grep -F GREP = /usr/bin/grep HAVE_MEMMOVE = HAVE_STRERROR = INSTALL = /usr/bin/install -c INSTALL_DATA = ${INSTALL} -m 644 INSTALL_PROGRAM = ${INSTALL} INSTALL_SCRIPT = ${INSTALL} INSTALL_STRIP_PROGRAM = $(install_sh) -c -s JAR = JAVA = JAVAC = JAVA_CFLAGS = JAVA_CPPFLAGS = -DNO_AUTH JAVA_LDFLAGS = LD = /usr/bin/ld -m elf_x86_64 LDFLAGS = LIBOBJS = LIBS = -lm -lhpdf -lgd -lpng -lz -lm LIBTOOL = $(SHELL) $(top_builddir)/libtool LIPO = LN_S = ln -s LTLIBOBJS = MAKEINFO = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/missing --run makeinfo MANIFEST_TOOL = : MKDIR_P = /usr/bin/mkdir -p MYSQL_CFLAGS = -I/usr/include/mysql -g -pipe -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-strict-aliasing -fwrapv -fPIC -fPIC -g -static-libgcc -fno-omit-frame-pointer -fno-strict-aliasing -DMY_PTHREAD_FASTMUTEX=1 MYSQL_CONFIG = /usr/bin/mysql_config MYSQL_CPPFLAGS = -I/usr/include/mysql MYSQL_LDFLAGS = -L/usr/lib64/mysql -lmysqlclient -lpthread -lz -lm -lrt -lssl -lcrypto -ldl MYSQL_VERSION = 5.5.32 NM = /usr/bin/nm -B NMEDIT = OBJDUMP = objdump OBJEXT = o OTOOL = OTOOL64 = PACKAGE = PHYLIPNEW PACKAGE_BUGREPORT = emboss-bug@emboss.open-bio.org PACKAGE_NAME = PHYLIPNEW PACKAGE_STRING = PHYLIPNEW 3.69.650 PACKAGE_TARNAME = PHYLIPNEW PACKAGE_URL = http://emboss.open-bio.org/ PACKAGE_VERSION = 3.69.650 PATH_SEPARATOR = : PCRE_DATE = 11-Apr-2009 PCRE_LIB_VERSION = 0:1:0 PCRE_MAJOR = 7 PCRE_MINOR = 9 PCRE_POSIXLIB_VERSION = 0:0:0 PCRE_VERSION = 7.9 POSIX_MALLOC_THRESHOLD = -DPOSIX_MALLOC_THRESHOLD=10 POSTGRESQL_CFLAGS = -I/usr/include POSTGRESQL_CONFIG = /usr/bin/pg_config POSTGRESQL_CPPFLAGS = -I/usr/include POSTGRESQL_LDFLAGS = -L/usr/lib64 -lpq POSTGRESQL_VERSION = 9.2.4 RANLIB = ranlib SED = /usr/bin/sed SET_MAKE = SHELL = /bin/sh STRIP = strip VERSION = 3.69.650 WARN_CFLAGS = XLIB = -lX11 -lXaw -lXt XMKMF = X_CFLAGS = X_EXTRA_LIBS = X_LIBS = X_PRE_LIBS = -lSM -lICE abs_builddir = /data/scratch/embossdist/embassy/phylipnew/emboss_doc abs_srcdir = /data/scratch/embossdist/embassy/phylipnew/emboss_doc abs_top_builddir = /data/scratch/embossdist/embassy/phylipnew abs_top_srcdir = /data/scratch/embossdist/embassy/phylipnew ac_ct_AR = ar ac_ct_CC = gcc ac_ct_CXX = g++ ac_ct_DUMPBIN = am__include = include am__leading_dot = . am__quote = am__tar = $${TAR-tar} chof - "$$tardir" am__untar = $${TAR-tar} xf - bindir = ${exec_prefix}/bin build = x86_64-unknown-linux-gnu build_alias = build_cpu = x86_64 build_os = linux-gnu build_vendor = unknown builddir = . datadir = ${datarootdir} datarootdir = ${prefix}/share docdir = ${datarootdir}/doc/${PACKAGE_TARNAME} dvidir = ${docdir} embprefix = /usr/local exec_prefix = ${prefix} host = x86_64-unknown-linux-gnu host_alias = host_cpu = x86_64 host_os = linux-gnu host_vendor = unknown htmldir = ${docdir} includedir = ${prefix}/include infodir = ${datarootdir}/info install_sh = ${SHELL} /data/scratch/embossdist/embassy/phylipnew/install-sh libdir = ${exec_prefix}/lib libexecdir = ${exec_prefix}/libexec localedir = ${datarootdir}/locale localstatedir = ${prefix}/var mandir = ${datarootdir}/man mkdir_p = $(MKDIR_P) oldincludedir = /usr/include pdfdir = ${docdir} prefix = /usr/local program_transform_name = s,x,x, psdir = ${docdir} sbindir = ${exec_prefix}/sbin sharedstatedir = ${prefix}/com srcdir = . sysconfdir = ${prefix}/etc target_alias = top_build_prefix = ../ top_builddir = .. top_srcdir = .. SUBDIRS = html text all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_doc/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_doc/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" tags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ done ctags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ done cscopelist-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \ cscopelist-recursive ctags-recursive install-am install-strip \ tags-recursive .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am check check-am clean clean-generic clean-libtool \ cscopelist cscopelist-recursive ctags ctags-recursive \ distclean distclean-generic distclean-libtool distclean-tags \ distdir dvi dvi-am html html-am info info-am install \ install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-recursive uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_doc/Makefile.am0000664000175000017500000000002410403301035014420 00000000000000SUBDIRS = html text PHYLIPNEW-3.69.650/emboss_doc/Makefile.in0000664000175000017500000004431012171071677014463 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = emboss_doc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ install-html-recursive install-info-recursive \ install-pdf-recursive install-ps-recursive install-recursive \ installcheck-recursive installdirs-recursive pdf-recursive \ ps-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ distdir ETAGS = etags CTAGS = ctags DIST_SUBDIRS = $(SUBDIRS) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = html text all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu emboss_doc/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu emboss_doc/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" tags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ done ctags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ done cscopelist-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \ cscopelist-recursive ctags-recursive install-am install-strip \ tags-recursive .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am check check-am clean clean-generic clean-libtool \ cscopelist cscopelist-recursive ctags ctags-recursive \ distclean distclean-generic distclean-libtool distclean-tags \ distdir dvi dvi-am html html-am info info-am install \ install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-recursive uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/emboss_doc/.cvsignore0000664000175000017500000000002511326104663014402 00000000000000Makefile.in Makefile PHYLIPNEW-3.69.650/Makefile.in0000664000175000017500000006164212171071677012355 00000000000000# Makefile.in generated by automake 1.12.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = . DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(top_srcdir)/configure AUTHORS COPYING \ ChangeLog INSTALL NEWS config.guess config.sub depcomp \ install-sh ltmain.sh missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/general.m4 \ $(top_srcdir)/m4/hpdf.m4 $(top_srcdir)/m4/java.m4 \ $(top_srcdir)/m4/lf_x11.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/mysql.m4 $(top_srcdir)/m4/pngdriver.m4 \ $(top_srcdir)/m4/postgresql.m4 $(top_srcdir)/m4/sgi.m4 \ $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ install-html-recursive install-info-recursive \ install-pdf-recursive install-ps-recursive install-recursive \ installcheck-recursive installdirs-recursive pdf-recursive \ ps-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ cscope distdir dist dist-all distcheck ETAGS = etags CTAGS = ctags CSCOPE = cscope DIST_SUBDIRS = $(SUBDIRS) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ if test -d "$(distdir)"; then \ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -rf "$(distdir)" \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi am__post_remove_distdir = $(am__remove_distdir) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" DIST_ARCHIVES = $(distdir).tar.gz GZIP_ENV = --best DIST_TARGETS = dist-gzip distuninstallcheck_listfiles = find . -type f -print am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' distcleancheck_listfiles = find . -type f -print ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ ANT = @ANT@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DEVWARN_CFLAGS = @DEVWARN_CFLAGS@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GREP = @GREP@ HAVE_MEMMOVE = @HAVE_MEMMOVE@ HAVE_STRERROR = @HAVE_STRERROR@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JAR = @JAR@ JAVA = @JAVA@ JAVAC = @JAVAC@ JAVA_CFLAGS = @JAVA_CFLAGS@ JAVA_CPPFLAGS = @JAVA_CPPFLAGS@ JAVA_LDFLAGS = @JAVA_LDFLAGS@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MYSQL_CFLAGS = @MYSQL_CFLAGS@ MYSQL_CONFIG = @MYSQL_CONFIG@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@ MYSQL_LDFLAGS = @MYSQL_LDFLAGS@ MYSQL_VERSION = @MYSQL_VERSION@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PCRE_DATE = @PCRE_DATE@ PCRE_LIB_VERSION = @PCRE_LIB_VERSION@ PCRE_MAJOR = @PCRE_MAJOR@ PCRE_MINOR = @PCRE_MINOR@ PCRE_POSIXLIB_VERSION = @PCRE_POSIXLIB_VERSION@ PCRE_VERSION = @PCRE_VERSION@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ POSTGRESQL_CFLAGS = @POSTGRESQL_CFLAGS@ POSTGRESQL_CONFIG = @POSTGRESQL_CONFIG@ POSTGRESQL_CPPFLAGS = @POSTGRESQL_CPPFLAGS@ POSTGRESQL_LDFLAGS = @POSTGRESQL_LDFLAGS@ POSTGRESQL_VERSION = @POSTGRESQL_VERSION@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ WARN_CFLAGS = @WARN_CFLAGS@ XLIB = @XLIB@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ embprefix = @embprefix@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ # ACLOCAL_AMFLAGS = -I m4 SUBDIRS = src emboss_acd data emboss_doc EXTRA_DIST = depcomp ltmain.sh install-sh config.sub config.guess all: all-recursive .SUFFIXES: am--refresh: Makefile @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs distclean-libtool: -rm -f libtool config.lt # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" tags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ done ctags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ done cscopelist-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscope: cscope.files test ! -s cscope.files \ || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) clean-cscope: -rm -f cscope.files cscope.files: clean-cscope cscopelist-recursive cscopelist cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags -rm -f cscope.out cscope.in.out cscope.po.out cscope.files distdir: $(DISTFILES) $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$(top_distdir)" distdir="$(distdir)" \ dist-hook -test -n "$(am__skip_mode_fix)" \ || find "$(distdir)" -type d ! -perm -755 \ -exec chmod u+rwx,go+rx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__post_remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__post_remove_distdir) dist-lzip: distdir tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz $(am__post_remove_distdir) dist-xz: distdir tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz $(am__post_remove_distdir) dist-tarZ: distdir tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__post_remove_distdir) dist-shar: distdir shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz $(am__post_remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__post_remove_distdir) dist dist-all: $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' $(am__post_remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lz*) \ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ *.tar.xz*) \ xz -dc $(distdir).tar.xz | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ esac chmod -R a-w $(distdir); chmod u+w $(distdir) mkdir $(distdir)/_build mkdir $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build \ && ../configure --srcdir=.. --prefix="$$dc_install_base" \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ && cd "$$am__cwd" \ || exit 1 $(am__post_remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @test -n '$(distuninstallcheck_dir)' || { \ echo 'ERROR: trying to run $@ with an empty' \ '$$(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ $(am__cd) '$(distuninstallcheck_dir)' || { \ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -f Makefile distclean-am: clean-am distclean-generic distclean-libtool \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \ cscopelist-recursive ctags-recursive install-am install-strip \ tags-recursive .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am am--refresh check check-am clean clean-cscope \ clean-generic clean-libtool cscope cscopelist \ cscopelist-recursive ctags ctags-recursive dist dist-all \ dist-bzip2 dist-gzip dist-hook dist-lzip dist-shar dist-tarZ \ dist-xz dist-zip distcheck distclean distclean-generic \ distclean-libtool distclean-tags distcleancheck distdir \ distuninstallcheck dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-recursive uninstall uninstall-am # tar to pick up the other directories # then remove any CVS subdirectories dist-hook: tar cBf - emboss_acd | ( cd $(distdir); tar xBf - ; cd emboss_acd; rm -rf CVS ) tar cBf - emboss_doc | ( cd $(distdir); tar xBf - ; cd emboss_doc; rm -rf CVS; rm -rf master) tar cBf - doc | ( cd $(distdir); tar xBf - ; cd doc; rm -rf CVS ) tar cBf - include | ( cd $(distdir); tar xBf - ; cd include; rm -rf CVS ) tar cBf - data | ( cd $(distdir); tar xBf - ; cd data; rm -rf CVS ) tar cBf - test | ( cd $(distdir); tar xBf - ; cd test; rm -rf CVS) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: PHYLIPNEW-3.69.650/NEWS0000664000175000017500000000000007712253200010751 00000000000000PHYLIPNEW-3.69.650/configure0000775000175000017500000241530712171071675012220 00000000000000#! /bin/sh # From configure.in Revision: 1.39 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.69 for PHYLIPNEW 3.69.650. # # Report bugs to . # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then _as_can_reexec=no; export _as_can_reexec; # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 as_fn_exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : else exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO PATH=/empty FPATH=/empty; export PATH FPATH test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1 test \$(( 1 + 1 )) = 2 || exit 1" if (eval "$as_required") 2>/dev/null; then : as_have_required=yes else as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir/$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : CONFIG_SHELL=$as_shell as_have_required=yes if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : break 2 fi fi done;; esac as_found=false done $as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : CONFIG_SHELL=$SHELL as_have_required=yes fi; } IFS=$as_save_IFS if test "x$CONFIG_SHELL" != x; then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi if test x$as_have_required = xno; then : $as_echo "$0: This script requires a shell more modern than all" $as_echo "$0: the shells that I found on your system." if test x${ZSH_VERSION+set} = xset ; then $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else $as_echo "$0: Please tell bug-autoconf@gnu.org and $0: emboss-bug@emboss.open-bio.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall # in an infinite loop. This has already happened in practice. _as_can_reexec=no; export _as_can_reexec # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" SHELL=${CONFIG_SHELL-/bin/sh} test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PHYLIPNEW' PACKAGE_TARNAME='PHYLIPNEW' PACKAGE_VERSION='3.69.650' PACKAGE_STRING='PHYLIPNEW 3.69.650' PACKAGE_BUGREPORT='emboss-bug@emboss.open-bio.org' PACKAGE_URL='http://emboss.open-bio.org/' ac_unique_file="src/dnadist.c" # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS NEEDAJAX_FALSE NEEDAJAX_TRUE ISSHARED_FALSE ISSHARED_TRUE ISAIXIA64_FALSE ISAIXIA64_TRUE ISCYGWIN_FALSE ISCYGWIN_TRUE PURIFY_FALSE PURIFY_TRUE ESYSTEMLIBS_FALSE ESYSTEMLIBS_TRUE embprefix LOCALLINK_FALSE LOCALLINK_TRUE POSIX_MALLOC_THRESHOLD PCRE_POSIXLIB_VERSION PCRE_LIB_VERSION PCRE_VERSION PCRE_DATE PCRE_MINOR PCRE_MAJOR HAVE_STRERROR HAVE_MEMMOVE POSTGRESQL_VERSION POSTGRESQL_LDFLAGS POSTGRESQL_CPPFLAGS POSTGRESQL_CFLAGS POSTGRESQL_CONFIG MYSQL_VERSION MYSQL_LDFLAGS MYSQL_CPPFLAGS MYSQL_CFLAGS MYSQL_CONFIG JAVA_BUILD_FALSE JAVA_BUILD_TRUE JAVA_LDFLAGS JAVA_CPPFLAGS JAVA_CFLAGS JAVAC JAVA JAR ANT AMPDF_FALSE AMPDF_TRUE AMPNG_FALSE AMPNG_TRUE XLIB X_EXTRA_LIBS X_LIBS X_PRE_LIBS X_CFLAGS XMKMF DEVWARN_CFLAGS WARN_CFLAGS CXXCPP OTOOL64 OTOOL LIPO NMEDIT DSYMUTIL MANIFEST_TOOL RANLIB ac_ct_AR AR DLLTOOL OBJDUMP NM ac_ct_DUMPBIN DUMPBIN LD FGREP EGREP GREP SED host_os host_vendor host_cpu host build_os build_vendor build_cpu build LIBTOOL am__fastdepCXX_FALSE am__fastdepCXX_TRUE CXXDEPMODE am__fastdepCC_FALSE am__fastdepCC_TRUE CCDEPMODE am__nodep AMDEPBACKSLASH AMDEP_FALSE AMDEP_TRUE am__quote am__include DEPDIR am__untar am__tar AMTAR am__leading_dot mkdir_p INSTALL_STRIP_PROGRAM STRIP install_sh MAKEINFO AUTOHEADER AUTOMAKE AUTOCONF ACLOCAL VERSION PACKAGE CYGPATH_W am__isrc MKDIR_P SET_MAKE LN_S INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM CPP ac_ct_CXX CXXFLAGS CXX OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC AWK target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking enable_dependency_tracking enable_shared enable_static with_pic enable_fast_install with_gnu_ld with_sysroot enable_libtool_lock enable_64 with_optimisation enable_warnings enable_devwarnings enable_devextrawarnings enable_buildbookdeprecated enable_buildalldeprecated with_sgiabi with_x with_docroot with_gccprofile with_java with_javaos with_auth with_thread with_hpdf with_pngdriver with_mysql with_postgresql enable_localforce enable_debug enable_large enable_systemlibs enable_purify enable_mcheck enable_savestats ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CXX CXXFLAGS CCC CPP CXXCPP XMKMF ANT JAR JAVA JAVAC' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures PHYLIPNEW 3.69.650 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/PHYLIPNEW] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF Program names: --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names X features: --x-includes=DIR X include files are in DIR --x-libraries=DIR X library files are in DIR System types: --build=BUILD configure for building on BUILD [guessed] --host=HOST cross-compile to build programs to run on HOST [BUILD] _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of PHYLIPNEW 3.69.650:";; esac cat <<\_ACEOF Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-dependency-tracking do not reject slow dependency extractors --disable-dependency-tracking speeds up one-time build --enable-shared[=PKGS] build shared libraries [default=yes] --enable-static[=PKGS] build static libraries [default=yes] --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) --enable-64 64 bit pointers on 32 bit machines --enable-warnings compiler warnings --enable-devwarnings strict compiler warnings for developers --enable-devextrawarnings add extra warnings to devwarnings --enable-buildbookdeprecated build deprecated functions used in books for 6.2.0 --enable-buildalldeprecated build all deprecated functions --enable-localforce force compile/link against /usr/local --enable-debug debug (-g option on compiler) --enable-large over 2Gb file support [default=yes] --enable-systemlibs utility for RPM/dpkg bundles --enable-purify purify --enable-mcheck mcheck and mprobe memory allocation test --enable-savestats save AJAX statistics and print with debug output Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use both] --with-gnu-ld assume the C compiler uses GNU ld [default=no] --with-sysroot=DIR Search for dependent libraries within DIR (or the compiler's sysroot if not specified). --without-optimisation Disable compiler optimisation --with-sgiabi=[ARG] SGI compiler flags [default=no] --with-x use the X Window System --with-docroot=DIR root directory path of documentation (defaults to none) --with-gccprofile selects profiling --with-java[=ARG] root directory path of Java installation --with-javaos[=ARG] root directory path of Java OS include --with-auth[=ARG] authorisation mechanism for Jemboss server [default=PAM] --with-thread[=ARG] thread type [default=linux] --with-hpdf=DIR root directory path of hpdf installation [defaults to /usr] --with-pngdriver=[DIR] root directory path of png/gd/zlib installation (defaults to /usr) --with-mysql[=ARG] use MySQL client library [default=yes], optionally specify path to mysql_config --with-postgresql@<:=@ARG] use PostgreSQL library [default=yes], optionally specify path to pg_config Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CXX C++ compiler command CXXFLAGS C++ compiler flags CPP C preprocessor CXXCPP C++ preprocessor XMKMF Path to xmkmf, Makefile generator for X Window System ANT Path to the Apache Ant make tool JAR Path to the Java archive tool JAVA Path to the Java application launcher JAVAC Path to the Java compiler Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . PHYLIPNEW home page: . _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF PHYLIPNEW configure 3.69.650 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_cxx_try_compile LINENO # ---------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_compile # ac_fn_c_try_cpp LINENO # ---------------------- # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile # ac_fn_c_try_run LINENO # ---------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes # that executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then : ac_retval=0 else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run # ac_fn_c_check_func LINENO FUNC VAR # ---------------------------------- # Tests whether FUNC exists, setting the cache variable VAR accordingly ac_fn_c_check_func () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $2 (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef $2 /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $2 (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_$2 || defined __stub___$2 choke me #endif int main () { return $2 (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_func # ac_fn_cxx_try_cpp LINENO # ------------------------ # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_cpp # ac_fn_cxx_try_link LINENO # ------------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_link # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists, giving a warning if it cannot be compiled using # the include files in INCLUDES and setting the cache variable VAR # accordingly. ac_fn_c_check_header_mongrel () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if eval \${$3+:} false; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } else # Is the header compilable? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 $as_echo_n "checking $2 usability... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_header_compiler=yes else ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 $as_echo_n "checking $2 presence... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <$2> _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : ac_header_preproc=yes else ac_header_preproc=no fi rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( yes:no: ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; no:yes:* ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ( $as_echo "## --------------------------------------------- ## ## Report this to emboss-bug@emboss.open-bio.org ## ## --------------------------------------------- ##" ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=\$ac_header_compiler" fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_mongrel # ac_fn_c_check_type LINENO TYPE VAR INCLUDES # ------------------------------------------- # Tests whether TYPE exists after having included INCLUDES, setting cache # variable VAR accordingly. ac_fn_c_check_type () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main () { if (sizeof ($2)) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main () { if (sizeof (($2))) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else eval "$3=yes" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_type cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by PHYLIPNEW $as_me 3.69.650, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo $as_echo "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo $as_echo "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then $as_echo "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then $as_echo "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h $as_echo "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_URL "$PACKAGE_URL" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then # We do not want a PATH search for config.site. case $CONFIG_SITE in #(( -*) ac_site_file1=./$CONFIG_SITE;; */*) ac_site_file1=$CONFIG_SITE;; *) ac_site_file1=./$CONFIG_SITE;; esac elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_config_headers="$ac_config_headers src/config.h" # Make sure CFLAGS is defined to stop AC_PROG_CC adding -g. CFLAGS="${CFLAGS} " # Checks for programs. for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_AWK+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_AWK="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 $as_echo "$AWK" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$AWK" && break done ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then for ac_prog in icc gcc cc do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in icc gcc cc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi if test -z "$ac_file"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if ${ac_cv_objext+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if ${ac_cv_c_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if ${ac_cv_prog_cc_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes else CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if ${ac_cv_prog_cc_c89+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include struct stat; /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then : fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test -z "$CXX"; then if test -n "$CCC"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then for ac_prog in icpc g++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CXX+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CXX"; then ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CXX=$ac_cv_prog_CXX if test -n "$CXX"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 $as_echo "$CXX" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CXX" && break done fi if test -z "$CXX"; then ac_ct_CXX=$CXX for ac_prog in icpc g++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CXX+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CXX"; then ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CXX="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CXX=$ac_cv_prog_ac_ct_CXX if test -n "$ac_ct_CXX"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 $as_echo "$ac_ct_CXX" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CXX" && break done if test "x$ac_ct_CXX" = x; then CXX="g++" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CXX=$ac_ct_CXX fi fi fi fi # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 $as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } if ${ac_cv_cxx_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_cxx_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 $as_echo "$ac_cv_cxx_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GXX=yes else GXX= fi ac_test_CXXFLAGS=${CXXFLAGS+set} ac_save_CXXFLAGS=$CXXFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 $as_echo_n "checking whether $CXX accepts -g... " >&6; } if ${ac_cv_prog_cxx_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_cxx_werror_flag=$ac_cxx_werror_flag ac_cxx_werror_flag=yes ac_cv_prog_cxx_g=no CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ac_cv_prog_cxx_g=yes else CXXFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : else ac_cxx_werror_flag=$ac_save_cxx_werror_flag CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : ac_cv_prog_cxx_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cxx_werror_flag=$ac_save_cxx_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 $as_echo "$ac_cv_prog_cxx_g" >&6; } if test "$ac_test_CXXFLAGS" = set; then CXXFLAGS=$ac_save_CXXFLAGS elif test $ac_cv_prog_cxx_g = yes; then if test "$GXX" = yes; then CXXFLAGS="-g -O2" else CXXFLAGS="-g" fi else if test "$GXX" = yes; then CXXFLAGS="-O2" else CXXFLAGS= fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if ${ac_cv_prog_CPP+:} false; then : $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_aux_dir= for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do if test -f "$ac_dir/install-sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f "$ac_dir/install.sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break elif test -f "$ac_dir/shtool"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/shtool install -c" break fi done if test -z "$ac_aux_dir"; then as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 fi # These three variables are undocumented and unsupported, # and are intended to be withdrawn in a future Autoconf release. # They can cause serious problems if a builder's source tree is in a directory # whose full name contains unusual characters. ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AmigaOS /C/install, which installs bootblocks on floppy discs # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # OS/2's system install, which has a completely different semantic # ./install, which can be erroneously created by make from ./install.sh. # Reject install programs that cannot install multiple files. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 $as_echo_n "checking for a BSD-compatible install... " >&6; } if test -z "$INSTALL"; then if ${ac_cv_path_install+:} false; then : $as_echo_n "(cached) " >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. # Account for people who put trailing slashes in PATH elements. case $as_dir/ in #(( ./ | .// | /[cC]/* | \ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ /usr/ucb/* ) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then if test $ac_prog = install && grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : elif test $ac_prog = install && grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # program-specific install script used by HP pwplus--don't use. : else rm -rf conftest.one conftest.two conftest.dir echo one > conftest.one echo two > conftest.two mkdir conftest.dir if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && test -s conftest.one && test -s conftest.two && test -s conftest.dir/conftest.one && test -s conftest.dir/conftest.two then ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" break 3 fi fi fi done done ;; esac done IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir fi if test "${ac_cv_path_install+set}" = set; then INSTALL=$ac_cv_path_install else # As a last resort, use the slow shell script. Don't cache a # value for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. INSTALL=$ac_install_sh fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 $as_echo "$INSTALL" >&6; } # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 $as_echo_n "checking whether ln -s works... " >&6; } LN_S=$as_ln_s if test "$LN_S" = "ln -s"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 $as_echo "no, using $LN_S" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 $as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } set x ${MAKE-make} ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : $as_echo_n "(cached) " >&6 else cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' _ACEOF # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. case `${MAKE-make} -f conftest.make 2>/dev/null` in *@@@%%%=?*=@@@%%%*) eval ac_cv_prog_make_${ac_make}_set=yes;; *) eval ac_cv_prog_make_${ac_make}_set=no;; esac rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } SET_MAKE= else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } SET_MAKE="MAKE=${MAKE-make}" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 $as_echo_n "checking for a thread-safe mkdir -p... " >&6; } if test -z "$MKDIR_P"; then if ${ac_cv_path_mkdir+:} false; then : $as_echo_n "(cached) " >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in mkdir gmkdir; do for ac_exec_ext in '' $ac_executable_extensions; do as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir (GNU coreutils) '* | \ 'mkdir (coreutils) '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext break 3;; esac done done done IFS=$as_save_IFS fi test -d ./--version && rmdir ./--version if test "${ac_cv_path_mkdir+set}" = set; then MKDIR_P="$ac_cv_path_mkdir -p" else # As a last resort, use the slow shell script. Don't cache a # value for MKDIR_P within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. MKDIR_P="$ac_install_sh -d" fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 $as_echo "$MKDIR_P" >&6; } am__api_version='1.12' { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 $as_echo_n "checking whether build environment is sane... " >&6; } # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[\\\"\#\$\&\'\`$am_lf]*) as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; esac case $srcdir in *[\\\"\#\$\&\'\`$am_lf\ \ ]*) as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; esac # Do 'set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( am_has_slept=no for am_try in 1 2; do echo "timestamp, slept: $am_has_slept" > conftest.file set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$*" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi if test "$*" != "X $srcdir/configure conftest.file" \ && test "$*" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". as_fn_error $? "ls -t appears to fail. Make sure there is not a broken alias in your environment" "$LINENO" 5 fi if test "$2" = conftest.file || test $am_try -eq 2; then break fi # Just in case. sleep 1 am_has_slept=yes done test "$2" = conftest.file ) then # Ok. : else as_fn_error $? "newly created file is older than distributed files! Check your system clock" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= if grep 'slept: no' conftest.file >/dev/null 2>&1; then ( sleep 1 ) & am_sleep_pid=$! fi rm -f conftest.file test "$program_prefix" != NONE && program_transform_name="s&^&$program_prefix&;$program_transform_name" # Use a double $ so make ignores it. test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. # By default was `s,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` if test x"${MISSING+set}" != xset; then case $am_aux_dir in *\ * | *\ *) MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; *) MISSING="\${SHELL} $am_aux_dir/missing" ;; esac fi # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " else am_missing_run= { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi # Installed binaries are usually stripped using 'strip' when the user # run "make install-strip". However 'strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the 'STRIP' environment variable to overrule this program. if test "$cross_compiling" != no; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 $as_echo "$STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 $as_echo "$ac_ct_STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 $as_echo_n "checking for style of include used by $am_make... " >&6; } am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # Ignore all kinds of additional output from 'make'. case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=include am__quote= _am_result=GNU ;; esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=.include am__quote="\"" _am_result=BSD ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 $as_echo "$_am_result" >&6; } rm -f confinc confmf # Check whether --enable-dependency-tracking was given. if test "${enable_dependency_tracking+set}" = set; then : enableval=$enable_dependency_tracking; fi if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi if test "x$enable_dependency_tracking" != xno; then AMDEP_TRUE= AMDEP_FALSE='#' else AMDEP_TRUE='#' AMDEP_FALSE= fi if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." am__isrc=' -I$(srcdir)' # test to see if srcdir already configured if test -f $srcdir/config.status; then as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi # Define the identity of the package. PACKAGE='PHYLIPNEW' VERSION='3.69.650' cat >>confdefs.h <<_ACEOF #define PACKAGE "$PACKAGE" _ACEOF cat >>confdefs.h <<_ACEOF #define VERSION "$VERSION" _ACEOF # Some tools Automake needs. ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: # # mkdir_p='$(MKDIR_P)' # We need awk for the "check" target. The system "awk" is bad on # some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' depcc="$CC" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CC_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 $as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi depcc="$CXX" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CXX_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CXX_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CXX_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CXX_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 $as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; } CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then am__fastdepCXX_TRUE= am__fastdepCXX_FALSE='#' else am__fastdepCXX_TRUE='#' am__fastdepCXX_FALSE= fi # Use libtool to make a shared library. case `pwd` in *\ * | *\ *) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 $as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; esac macro_version='2.4.2' macro_revision='1.3337' ltmain="$ac_aux_dir/ltmain.sh" # Make sure we can run config.sub. $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 $as_echo_n "checking build system type... " >&6; } if ${ac_cv_build+:} false; then : $as_echo_n "(cached) " >&6 else ac_build_alias=$build_alias test "x$ac_build_alias" = x && ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` test "x$ac_build_alias" = x && as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 $as_echo "$ac_cv_build" >&6; } case $ac_cv_build in *-*-*) ;; *) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; esac build=$ac_cv_build ac_save_IFS=$IFS; IFS='-' set x $ac_cv_build shift build_cpu=$1 build_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: build_os=$* IFS=$ac_save_IFS case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 $as_echo_n "checking host system type... " >&6; } if ${ac_cv_host+:} false; then : $as_echo_n "(cached) " >&6 else if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 $as_echo "$ac_cv_host" >&6; } case $ac_cv_host in *-*-*) ;; *) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; esac host=$ac_cv_host ac_save_IFS=$IFS; IFS='-' set x $ac_cv_host shift host_cpu=$1 host_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: host_os=$* IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac # Backslashify metacharacters that are still active within # double-quoted strings. sed_quote_subst='s/\(["`$\\]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\(["`\\]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to delay expansion of an escaped single quote. delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 $as_echo_n "checking how to print strings... " >&6; } # Test print first, because it will be a builtin if present. if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='print -r --' elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='printf %s\n' else # Use this function as a fallback that always works. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $1 _LTECHO_EOF' } ECHO='func_fallback_echo' fi # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "" } case "$ECHO" in printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5 $as_echo "printf" >&6; } ;; print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 $as_echo "print -r" >&6; } ;; *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5 $as_echo "cat" >&6; } ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 $as_echo_n "checking for a sed that does not truncate output... " >&6; } if ${ac_cv_path_SED+:} false; then : $as_echo_n "(cached) " >&6 else ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" done echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed { ac_script=; unset ac_script;} if test -z "$SED"; then ac_path_SED_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED case `"$ac_path_SED" --version 2>&1` in *GNU*) ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo '' >> "conftest.nl" "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_SED_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_SED="$ac_path_SED" ac_path_SED_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_SED_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_SED"; then as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 fi else ac_cv_path_SED=$SED fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 $as_echo "$ac_cv_path_SED" >&6; } SED="$ac_cv_path_SED" rm -f conftest.sed test -z "$SED" && SED=sed Xsed="$SED -e 1s/^X//" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if ${ac_cv_path_EGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 $as_echo_n "checking for fgrep... " >&6; } if ${ac_cv_path_FGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 then ac_cv_path_FGREP="$GREP -F" else if test -z "$FGREP"; then ac_path_FGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in fgrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_FGREP" || continue # Check for GNU ac_path_FGREP and select it if it is found. # Check for GNU $ac_path_FGREP case `"$ac_path_FGREP" --version 2>&1` in *GNU*) ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'FGREP' >> "conftest.nl" "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_FGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_FGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_FGREP"; then as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_FGREP=$FGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 $as_echo "$ac_cv_path_FGREP" >&6; } FGREP="$ac_cv_path_FGREP" test -z "$GREP" && GREP=grep # Check whether --with-gnu-ld was given. if test "${with_gnu_ld+set}" = set; then : withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes else with_gnu_ld=no fi ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 $as_echo_n "checking for ld used by $CC... " >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 $as_echo_n "checking for GNU ld... " >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 $as_echo_n "checking for non-GNU ld... " >&6; } fi if ${lt_cv_path_LD+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 $as_echo "$LD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 $as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } if ${lt_cv_prog_gnu_ld+:} false; then : $as_echo_n "(cached) " >&6 else # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 $as_echo "$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 $as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; } if ${lt_cv_path_NM+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. tmp_nm="$ac_dir/$lt_tmp_nm" if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then # Check to see if the nm accepts a BSD-compat flag. # Adding the `sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in */dev/null* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS="$lt_save_ifs" done : ${lt_cv_path_NM=no} fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 $as_echo "$lt_cv_path_NM" >&6; } if test "$lt_cv_path_NM" != "no"; then NM="$lt_cv_path_NM" else # Didn't find any BSD compatible name lister, look for dumpbin. if test -n "$DUMPBIN"; then : # Let the user override the test. else if test -n "$ac_tool_prefix"; then for ac_prog in dumpbin "link -dump" do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_DUMPBIN+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$DUMPBIN"; then ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DUMPBIN=$ac_cv_prog_DUMPBIN if test -n "$DUMPBIN"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 $as_echo "$DUMPBIN" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$DUMPBIN" && break done fi if test -z "$DUMPBIN"; then ac_ct_DUMPBIN=$DUMPBIN for ac_prog in dumpbin "link -dump" do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_DUMPBIN"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN if test -n "$ac_ct_DUMPBIN"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 $as_echo "$ac_ct_DUMPBIN" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_DUMPBIN" && break done if test "x$ac_ct_DUMPBIN" = x; then DUMPBIN=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DUMPBIN=$ac_ct_DUMPBIN fi fi case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols" ;; *) DUMPBIN=: ;; esac fi if test "$DUMPBIN" != ":"; then NM="$DUMPBIN" fi fi test -z "$NM" && NM=nm { $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 $as_echo_n "checking the name lister ($NM) interface... " >&6; } if ${lt_cv_nm_interface+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&5 (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&5 (eval echo "\"\$as_me:$LINENO: output\"" >&5) cat conftest.out >&5 if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi rm -f conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 $as_echo "$lt_cv_nm_interface" >&6; } # find the maximum length of command line arguments { $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 $as_echo_n "checking the maximum length of command line arguments... " >&6; } if ${lt_cv_sys_max_cmd_len+:} false; then : $as_echo_n "(cached) " >&6 else i=0 teststring="ABCD" case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; mint*) # On MiNT this can take a long time and run out of memory. lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; os2*) # The test takes a long time on OS/2. lt_cv_sys_max_cmd_len=8192 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else # Make teststring a little bigger before we do anything with it. # a 1K string should be a reasonable start. for i in 1 2 3 4 5 6 7 8 ; do teststring=$teststring$teststring done SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} # If test is not a shell built-in, we'll probably end up computing a # maximum length that is only half of the actual maximum length, but # we can't tell. while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ = "X$teststring$teststring"; } >/dev/null 2>&1 && test $i != 17 # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done # Only check the string length outside the loop. lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` teststring= # Add a significant safety factor because C++ compilers can tack on # massive amounts of additional arguments before passing them to the # linker. It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac fi if test -n $lt_cv_sys_max_cmd_len ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 $as_echo "$lt_cv_sys_max_cmd_len" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 $as_echo "none" >&6; } fi max_cmd_len=$lt_cv_sys_max_cmd_len : ${CP="cp -f"} : ${MV="mv -f"} : ${RM="rm -f"} { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5 $as_echo_n "checking whether the shell understands some XSI constructs... " >&6; } # Try some XSI features xsi_shell=no ( _lt_dummy="a/b/c" test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ = c,a/b,b/c, \ && eval 'test $(( 1 + 1 )) -eq 2 \ && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ && xsi_shell=yes { $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5 $as_echo "$xsi_shell" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5 $as_echo_n "checking whether the shell understands \"+=\"... " >&6; } lt_shell_append=no ( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \ >/dev/null 2>&1 \ && lt_shell_append=yes { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5 $as_echo "$lt_shell_append" >&6; } if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then lt_unset=unset else lt_unset=false fi # test EBCDIC or ASCII case `echo X|tr X '\101'` in A) # ASCII based system # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr lt_SP2NL='tr \040 \012' lt_NL2SP='tr \015\012 \040\040' ;; *) # EBCDIC based system lt_SP2NL='tr \100 \n' lt_NL2SP='tr \r\n \100\100' ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 $as_echo_n "checking how to convert $build file names to $host format... " >&6; } if ${lt_cv_to_host_file_cmd+:} false; then : $as_echo_n "(cached) " >&6 else case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 ;; esac ;; *-*-cygwin* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_noop ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin ;; esac ;; * ) # unhandled hosts (and "normal" native builds) lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac fi to_host_file_cmd=$lt_cv_to_host_file_cmd { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 $as_echo "$lt_cv_to_host_file_cmd" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 $as_echo_n "checking how to convert $build file names to toolchain format... " >&6; } if ${lt_cv_to_tool_file_cmd+:} false; then : $as_echo_n "(cached) " >&6 else #assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac fi to_tool_file_cmd=$lt_cv_to_tool_file_cmd { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 $as_echo "$lt_cv_to_tool_file_cmd" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 $as_echo_n "checking for $LD option to reload object files... " >&6; } if ${lt_cv_ld_reload_flag+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ld_reload_flag='-r' fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 $as_echo "$lt_cv_ld_reload_flag" >&6; } reload_flag=$lt_cv_ld_reload_flag case $reload_flag in "" | " "*) ;; *) reload_flag=" $reload_flag" ;; esac reload_cmds='$LD$reload_flag -o $output$reload_objs' case $host_os in cygwin* | mingw* | pw32* | cegcc*) if test "$GCC" != yes; then reload_cmds=false fi ;; darwin*) if test "$GCC" = yes; then reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' else reload_cmds='$LD$reload_flag -o $output$reload_objs' fi ;; esac if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. set dummy ${ac_tool_prefix}objdump; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_OBJDUMP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$OBJDUMP"; then ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OBJDUMP=$ac_cv_prog_OBJDUMP if test -n "$OBJDUMP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 $as_echo "$OBJDUMP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_OBJDUMP"; then ac_ct_OBJDUMP=$OBJDUMP # Extract the first word of "objdump", so it can be a program name with args. set dummy objdump; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_OBJDUMP"; then ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OBJDUMP="objdump" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP if test -n "$ac_ct_OBJDUMP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 $as_echo "$ac_ct_OBJDUMP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_OBJDUMP" = x; then OBJDUMP="false" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OBJDUMP=$ac_ct_OBJDUMP fi else OBJDUMP="$ac_cv_prog_OBJDUMP" fi test -z "$OBJDUMP" && OBJDUMP=objdump { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 $as_echo_n "checking how to recognize dependent libraries... " >&6; } if ${lt_cv_deplibs_check_method+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= lt_cv_deplibs_check_method='unknown' # Need to set the preceding variable on all platforms that support # interlibrary dependencies. # 'none' -- dependencies not supported. # `unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. # 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # which responds to the $file_magic_cmd with a given extended regex. # If you have `file' or equivalent on your system and you're not sure # whether `pass_all' will *always* work, you probably want this one. case $host_os in aix[4-9]*) lt_cv_deplibs_check_method=pass_all ;; beos*) lt_cv_deplibs_check_method=pass_all ;; bsdi[45]*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' lt_cv_file_magic_cmd='/usr/bin/file -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; cygwin*) # func_win32_libid is a shell function defined in ltmain.sh lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' ;; mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin. if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else # Keep this pattern in sync with the one in func_win32_libid. lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; cegcc*) # use the weaker test based on 'objdump'. See mingw*. lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' lt_cv_file_magic_cmd='$OBJDUMP -f' ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; gnu*) lt_cv_deplibs_check_method=pass_all ;; haiku*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=/usr/bin/file case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[3-9]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) lt_cv_deplibs_check_method=pass_all ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; *nto* | *qnx*) lt_cv_deplibs_check_method=pass_all ;; openbsd*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; tpf*) lt_cv_deplibs_check_method=pass_all ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 $as_echo "$lt_cv_deplibs_check_method" >&6; } file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` fi ;; esac fi file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. set dummy ${ac_tool_prefix}dlltool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_DLLTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$DLLTOOL"; then ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DLLTOOL=$ac_cv_prog_DLLTOOL if test -n "$DLLTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 $as_echo "$DLLTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_DLLTOOL"; then ac_ct_DLLTOOL=$DLLTOOL # Extract the first word of "dlltool", so it can be a program name with args. set dummy dlltool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_DLLTOOL"; then ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DLLTOOL="dlltool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL if test -n "$ac_ct_DLLTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 $as_echo "$ac_ct_DLLTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_DLLTOOL" = x; then DLLTOOL="false" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DLLTOOL=$ac_ct_DLLTOOL fi else DLLTOOL="$ac_cv_prog_DLLTOOL" fi test -z "$DLLTOOL" && DLLTOOL=dlltool { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 $as_echo_n "checking how to associate runtime and link libraries... " >&6; } if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh # decide which to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in *--identify-strict*) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib ;; *) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback ;; esac ;; *) # fallback: assume linklib IS sharedlib lt_cv_sharedlib_from_linklib_cmd="$ECHO" ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 $as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; } sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO if test -n "$ac_tool_prefix"; then for ac_prog in ar do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_AR+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_AR="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 $as_echo "$AR" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$AR" && break done fi if test -z "$AR"; then ac_ct_AR=$AR for ac_prog in ar do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_AR+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_AR="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 $as_echo "$ac_ct_AR" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_AR" && break done if test "x$ac_ct_AR" = x; then AR="false" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac AR=$ac_ct_AR fi fi : ${AR=ar} : ${AR_FLAGS=cru} { $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 $as_echo_n "checking for archiver @FILE support... " >&6; } if ${lt_cv_ar_at_file+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ar_at_file=no cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : echo conftest.$ac_objext > conftest.lst lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 (eval $lt_ar_try) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test "$ac_status" -eq 0; then # Ensure the archiver fails upon bogus file names. rm -f conftest.$ac_objext libconftest.a { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 (eval $lt_ar_try) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test "$ac_status" -ne 0; then lt_cv_ar_at_file=@ fi fi rm -f conftest.* libconftest.a fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 $as_echo "$lt_cv_ar_at_file" >&6; } if test "x$lt_cv_ar_at_file" = xno; then archiver_list_spec= else archiver_list_spec=$lt_cv_ar_at_file fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 $as_echo "$STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 $as_echo "$ac_ct_STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi test -z "$STRIP" && STRIP=: if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. set dummy ${ac_tool_prefix}ranlib; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_RANLIB+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 $as_echo "$RANLIB" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_RANLIB"; then ac_ct_RANLIB=$RANLIB # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_RANLIB="ranlib" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 $as_echo "$ac_ct_RANLIB" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_RANLIB" = x; then RANLIB=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac RANLIB=$ac_ct_RANLIB fi else RANLIB="$ac_cv_prog_RANLIB" fi test -z "$RANLIB" && RANLIB=: # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in darwin*) lock_old_archive_extraction=yes ;; *) lock_old_archive_extraction=no ;; esac # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Check for command to grab the raw symbol name followed by C symbol from nm. { $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 $as_echo_n "checking command to parse $NM output from $compiler object... " >&6; } if ${lt_cv_sys_global_symbol_pipe+:} false; then : $as_echo_n "(cached) " >&6 else # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[BCDEGRST]' # Regexp to match symbols that can be accessed directly from C. sympat='\([_A-Za-z][_A-Za-z0-9]*\)' # Define system-specific variables. case $host_os in aix*) symcode='[BCDT]' ;; cygwin* | mingw* | pw32* | cegcc*) symcode='[ABCDGISTW]' ;; hpux*) if test "$host_cpu" = ia64; then symcode='[ABCDEGRST]' fi ;; irix* | nonstopux*) symcode='[BCDEGRST]' ;; osf*) symcode='[BCDEGQRST]' ;; solaris*) symcode='[BDRT]' ;; sco3.2v5*) symcode='[DT]' ;; sysv4.2uw2*) symcode='[DT]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[ABDT]' ;; sysv4) symcode='[DFNSTU]' ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[ABCDGIRSTW]' ;; esac # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (void *) \&\2},/p'" lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"lib\2\", (void *) \&\2},/p'" # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # Try without a prefix underscore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function # and D for any global variable. # Also find C++ and __fastcall symbols from MSVC++, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK '"\ " {last_section=section; section=\$ 3};"\ " /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ " /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ " \$ 0!~/External *\|/{next};"\ " / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ " {if(hide[section]) next};"\ " {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ " {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ " s[1]~/^[@?]/{print s[1], s[1]; next};"\ " s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx" else lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <<_LT_EOF #ifdef __cplusplus extern "C" { #endif char nm_test_var; void nm_test_func(void); void nm_test_func(void){} #ifdef __cplusplus } #endif int main(){nm_test_var='a';nm_test_func();return(0);} _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # Now try to grab the symbols. nlist=conftest.nm if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if $GREP ' nm_test_var$' "$nlist" >/dev/null; then if $GREP ' nm_test_func$' "$nlist" >/dev/null; then cat <<_LT_EOF > conftest.$ac_ext /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) /* DATA imports from DLLs on WIN32 con't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT_DLSYM_CONST #elif defined(__osf__) /* This system does not cope well with relocations in const data. */ # define LT_DLSYM_CONST #else # define LT_DLSYM_CONST const #endif #ifdef __cplusplus extern "C" { #endif _LT_EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' cat <<_LT_EOF >> conftest.$ac_ext /* The mapping between symbol names and symbols. */ LT_DLSYM_CONST struct { const char *name; void *address; } lt__PROGRAM__LTX_preloaded_symbols[] = { { "@PROGRAM@", (void *) 0 }, _LT_EOF $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext cat <<\_LT_EOF >> conftest.$ac_ext {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt__PROGRAM__LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif _LT_EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_globsym_save_LIBS=$LIBS lt_globsym_save_CFLAGS=$CFLAGS LIBS="conftstm.$ac_objext" CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest${ac_exeext}; then pipe_works=yes fi LIBS=$lt_globsym_save_LIBS CFLAGS=$lt_globsym_save_CFLAGS else echo "cannot find nm_test_func in $nlist" >&5 fi else echo "cannot find nm_test_var in $nlist" >&5 fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 fi else echo "$progname: failed program was:" >&5 cat conftest.$ac_ext >&5 fi rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then break else lt_cv_sys_global_symbol_pipe= fi done fi if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5 $as_echo "failed" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 $as_echo "ok" >&6; } fi # Response file support. if test "$lt_cv_nm_interface" = "MS dumpbin"; then nm_file_list_spec='@' elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then nm_file_list_spec='@' fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 $as_echo_n "checking for sysroot... " >&6; } # Check whether --with-sysroot was given. if test "${with_sysroot+set}" = set; then : withval=$with_sysroot; else with_sysroot=no fi lt_sysroot= case ${with_sysroot} in #( yes) if test "$GCC" = yes; then lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` ;; #( no|'') ;; #( *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5 $as_echo "${with_sysroot}" >&6; } as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 $as_echo "${lt_sysroot:-no}" >&6; } # Check whether --enable-libtool-lock was given. if test "${enable_libtool_lock+set}" = set; then : enableval=$enable_libtool_lock; fi test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `/usr/bin/file conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE="32" ;; *ELF-64*) HPUX_IA64_MODE="64" ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out which ABI we are using. echo '#line '$LINENO' "configure"' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then if test "$lt_cv_prog_gnu_ld" = yes; then case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `/usr/bin/file conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; ppc*-*linux*|powerpc*-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*|s390*-*tpf*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -belf" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 $as_echo_n "checking whether the C compiler needs -belf... " >&6; } if ${lt_cv_cc_needs_belf+:} false; then : $as_echo_n "(cached) " >&6 else ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_cv_cc_needs_belf=yes else lt_cv_cc_needs_belf=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 $as_echo "$lt_cv_cc_needs_belf" >&6; } if test x"$lt_cv_cc_needs_belf" != x"yes"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS="$SAVE_CFLAGS" fi ;; *-*solaris*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `/usr/bin/file conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) case $host in i?86-*-solaris*) LD="${LD-ld} -m elf_x86_64" ;; sparc*-*-solaris*) LD="${LD-ld} -m elf64_sparc" ;; esac # GNU ld 2.21 introduced _sol2 emulations. Use them if available. if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then LD="${LD-ld}_sol2" fi ;; *) if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then LD="${LD-ld} -64" fi ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks="$enable_libtool_lock" if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. set dummy ${ac_tool_prefix}mt; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_MANIFEST_TOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$MANIFEST_TOOL"; then ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL if test -n "$MANIFEST_TOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 $as_echo "$MANIFEST_TOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_MANIFEST_TOOL"; then ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL # Extract the first word of "mt", so it can be a program name with args. set dummy mt; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_MANIFEST_TOOL"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL if test -n "$ac_ct_MANIFEST_TOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 $as_echo "$ac_ct_MANIFEST_TOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_MANIFEST_TOOL" = x; then MANIFEST_TOOL=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL fi else MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" fi test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 $as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } if ${lt_cv_path_mainfest_tool+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&5 if $GREP 'Manifest Tool' conftest.out > /dev/null; then lt_cv_path_mainfest_tool=yes fi rm -f conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 $as_echo "$lt_cv_path_mainfest_tool" >&6; } if test "x$lt_cv_path_mainfest_tool" != xyes; then MANIFEST_TOOL=: fi case $host_os in rhapsody* | darwin*) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_DSYMUTIL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$DSYMUTIL"; then ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DSYMUTIL=$ac_cv_prog_DSYMUTIL if test -n "$DSYMUTIL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 $as_echo "$DSYMUTIL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_DSYMUTIL"; then ac_ct_DSYMUTIL=$DSYMUTIL # Extract the first word of "dsymutil", so it can be a program name with args. set dummy dsymutil; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_DSYMUTIL"; then ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL if test -n "$ac_ct_DSYMUTIL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 $as_echo "$ac_ct_DSYMUTIL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_DSYMUTIL" = x; then DSYMUTIL=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DSYMUTIL=$ac_ct_DSYMUTIL fi else DSYMUTIL="$ac_cv_prog_DSYMUTIL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. set dummy ${ac_tool_prefix}nmedit; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_NMEDIT+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$NMEDIT"; then ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi NMEDIT=$ac_cv_prog_NMEDIT if test -n "$NMEDIT"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 $as_echo "$NMEDIT" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_NMEDIT"; then ac_ct_NMEDIT=$NMEDIT # Extract the first word of "nmedit", so it can be a program name with args. set dummy nmedit; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_NMEDIT"; then ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_NMEDIT="nmedit" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT if test -n "$ac_ct_NMEDIT"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 $as_echo "$ac_ct_NMEDIT" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_NMEDIT" = x; then NMEDIT=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac NMEDIT=$ac_ct_NMEDIT fi else NMEDIT="$ac_cv_prog_NMEDIT" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. set dummy ${ac_tool_prefix}lipo; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_LIPO+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$LIPO"; then ac_cv_prog_LIPO="$LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_LIPO="${ac_tool_prefix}lipo" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi LIPO=$ac_cv_prog_LIPO if test -n "$LIPO"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 $as_echo "$LIPO" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_LIPO"; then ac_ct_LIPO=$LIPO # Extract the first word of "lipo", so it can be a program name with args. set dummy lipo; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_LIPO+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_LIPO"; then ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_LIPO="lipo" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO if test -n "$ac_ct_LIPO"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 $as_echo "$ac_ct_LIPO" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_LIPO" = x; then LIPO=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac LIPO=$ac_ct_LIPO fi else LIPO="$ac_cv_prog_LIPO" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. set dummy ${ac_tool_prefix}otool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_OTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$OTOOL"; then ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_OTOOL="${ac_tool_prefix}otool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OTOOL=$ac_cv_prog_OTOOL if test -n "$OTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 $as_echo "$OTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_OTOOL"; then ac_ct_OTOOL=$OTOOL # Extract the first word of "otool", so it can be a program name with args. set dummy otool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_OTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_OTOOL"; then ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OTOOL="otool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL if test -n "$ac_ct_OTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 $as_echo "$ac_ct_OTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_OTOOL" = x; then OTOOL=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OTOOL=$ac_ct_OTOOL fi else OTOOL="$ac_cv_prog_OTOOL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. set dummy ${ac_tool_prefix}otool64; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_OTOOL64+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$OTOOL64"; then ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OTOOL64=$ac_cv_prog_OTOOL64 if test -n "$OTOOL64"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 $as_echo "$OTOOL64" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_OTOOL64"; then ac_ct_OTOOL64=$OTOOL64 # Extract the first word of "otool64", so it can be a program name with args. set dummy otool64; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_OTOOL64"; then ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OTOOL64="otool64" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 if test -n "$ac_ct_OTOOL64"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 $as_echo "$ac_ct_OTOOL64" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_OTOOL64" = x; then OTOOL64=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OTOOL64=$ac_ct_OTOOL64 fi else OTOOL64="$ac_cv_prog_OTOOL64" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 $as_echo_n "checking for -single_module linker flag... " >&6; } if ${lt_cv_apple_cc_single_mod+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_apple_cc_single_mod=no if test -z "${LT_MULTI_MODULE}"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE # non-empty at configure time, or by adding -multi_module to the # link flags. rm -rf libconftest.dylib* echo "int foo(void){return 1;}" > conftest.c echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c" >&5 $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c 2>conftest.err _lt_result=$? # If there is a non-empty error log, and "single_module" # appears in it, assume the flag caused a linker warning if test -s conftest.err && $GREP single_module conftest.err; then cat conftest.err >&5 # Otherwise, if the output was created with a 0 exit code from # the compiler, it worked. elif test -f libconftest.dylib && test $_lt_result -eq 0; then lt_cv_apple_cc_single_mod=yes else cat conftest.err >&5 fi rm -rf libconftest.dylib* rm -f conftest.* fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 $as_echo "$lt_cv_apple_cc_single_mod" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 $as_echo_n "checking for -exported_symbols_list linker flag... " >&6; } if ${lt_cv_ld_exported_symbols_list+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_cv_ld_exported_symbols_list=yes else lt_cv_ld_exported_symbols_list=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 $as_echo "$lt_cv_ld_exported_symbols_list" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 $as_echo_n "checking for -force_load linker flag... " >&6; } if ${lt_cv_ld_force_load+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 echo "$AR cru libconftest.a conftest.o" >&5 $AR cru libconftest.a conftest.o 2>&5 echo "$RANLIB libconftest.a" >&5 $RANLIB libconftest.a 2>&5 cat > conftest.c << _LT_EOF int main() { return 0;} _LT_EOF echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err _lt_result=$? if test -s conftest.err && $GREP force_load conftest.err; then cat conftest.err >&5 elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then lt_cv_ld_force_load=yes else cat conftest.err >&5 fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 $as_echo "$lt_cv_ld_force_load" >&6; } case $host_os in rhapsody* | darwin1.[012]) _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; darwin*) # darwin 5.x on # if running on 10.5 or later, the deployment target defaults # to the OS version, if on x86, and 10.4, the deployment # target defaults to 10.4. Don't you love it? case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in 10.0,*86*-darwin8*|10.0,*-darwin[91]*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; 10.[012]*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; esac ;; esac if test "$lt_cv_apple_cc_single_mod" = "yes"; then _lt_dar_single_mod='$single_module' fi if test "$lt_cv_ld_exported_symbols_list" = "yes"; then _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' else _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' fi if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then _lt_dsymutil='~$DSYMUTIL $lib || :' else _lt_dsymutil= fi ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default " if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in dlfcn.h do : ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default " if test "x$ac_cv_header_dlfcn_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_DLFCN_H 1 _ACEOF fi done func_stripname_cnf () { case ${2} in .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; esac } # func_stripname_cnf # Set options enable_dlopen=no enable_win32_dll=no # Check whether --enable-shared was given. if test "${enable_shared+set}" = set; then : enableval=$enable_shared; p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS="$lt_save_ifs" ;; esac else enable_shared=yes fi # Check whether --enable-static was given. if test "${enable_static+set}" = set; then : enableval=$enable_static; p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS="$lt_save_ifs" ;; esac else enable_static=yes fi # Check whether --with-pic was given. if test "${with_pic+set}" = set; then : withval=$with_pic; lt_p=${PACKAGE-default} case $withval in yes|no) pic_mode=$withval ;; *) pic_mode=default # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for lt_pkg in $withval; do IFS="$lt_save_ifs" if test "X$lt_pkg" = "X$lt_p"; then pic_mode=yes fi done IFS="$lt_save_ifs" ;; esac else pic_mode=default fi test -z "$pic_mode" && pic_mode=default # Check whether --enable-fast-install was given. if test "${enable_fast_install+set}" = set; then : enableval=$enable_fast_install; p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS="$lt_save_ifs" ;; esac else enable_fast_install=yes fi # This can be used to rebuild libtool when needed LIBTOOL_DEPS="$ltmain" # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' test -z "$LN_S" && LN_S="ln -s" if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 $as_echo_n "checking for objdir... " >&6; } if ${lt_cv_objdir+:} false; then : $as_echo_n "(cached) " >&6 else rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 $as_echo "$lt_cv_objdir" >&6; } objdir=$lt_cv_objdir cat >>confdefs.h <<_ACEOF #define LT_OBJDIR "$lt_cv_objdir/" _ACEOF case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Global variables: ofile=libtool can_build_shared=yes # All known linkers require a `.a' archive for static linking (except MSVC, # which needs '.lib'). libext=a with_gnu_ld="$lt_cv_prog_gnu_ld" old_CC="$CC" old_CFLAGS="$CFLAGS" # Set sane defaults for various variables test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$LD" && LD=ld test -z "$ac_objext" && ac_objext=o for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` # Only perform the check for file, if the check method requires it test -z "$MAGIC_CMD" && MAGIC_CMD=file case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 $as_echo_n "checking for ${ac_tool_prefix}file... " >&6; } if ${lt_cv_path_MAGIC_CMD+:} false; then : $as_echo_n "(cached) " >&6 else case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/${ac_tool_prefix}file; then lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac fi MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 $as_echo "$MAGIC_CMD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5 $as_echo_n "checking for file... " >&6; } if ${lt_cv_path_MAGIC_CMD+:} false; then : $as_echo_n "(cached) " >&6 else case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/file; then lt_cv_path_MAGIC_CMD="$ac_dir/file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac fi MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 $as_echo "$MAGIC_CMD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else MAGIC_CMD=: fi fi fi ;; esac # Use C for the default configuration in the libtool script lt_save_CC="$CC" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o objext=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Save the default compiler, since it gets overwritten when the other # tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. compiler_DEFAULT=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then lt_prog_compiler_no_builtin_flag= if test "$GCC" = yes; then case $cc_basename in nvcc*) lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; *) lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 $as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_rtti_exceptions=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-fno-rtti -fno-exceptions" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_rtti_exceptions=yes fi fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 $as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; } if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" else : fi fi lt_prog_compiler_wl= lt_prog_compiler_pic= lt_prog_compiler_static= if test "$GCC" = yes; then lt_prog_compiler_wl='-Wl,' lt_prog_compiler_static='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) lt_prog_compiler_pic='-fPIC' ;; esac ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic=-Kconform_pic fi ;; *) lt_prog_compiler_pic='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 lt_prog_compiler_wl='-Xlinker ' if test -n "$lt_prog_compiler_pic"; then lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' else lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' fi ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic='-DDLL_EXPORT' ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in # old Intel for x86_64 which still supported -KPIC. ecc*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; # Lahey Fortran 8.1. lf95*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='--shared' lt_prog_compiler_static='--static' ;; nagfor*) # NAG Fortran compiler lt_prog_compiler_wl='-Wl,-Wl,,' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; ccc*) lt_prog_compiler_wl='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-qpic' lt_prog_compiler_static='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='' ;; *Sun\ F* | *Sun*Fortran*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Wl,' ;; *Intel*\ [CF]*Compiler*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; *Portland\ Group*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; esac ;; esac ;; newsos6) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic='-fPIC -shared' ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static='-non_shared' ;; rdos*) lt_prog_compiler_static='-non_shared' ;; solaris*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) lt_prog_compiler_wl='-Qoption ld ';; *) lt_prog_compiler_wl='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl='-Qoption ld ' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then lt_prog_compiler_pic='-Kconform_pic' lt_prog_compiler_static='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; unicos*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_can_build_shared=no ;; uts4*) lt_prog_compiler_pic='-pic' lt_prog_compiler_static='-Bstatic' ;; *) lt_prog_compiler_can_build_shared=no ;; esac fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic= ;; *) lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 $as_echo_n "checking for $compiler option to produce PIC... " >&6; } if ${lt_cv_prog_compiler_pic+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_pic=$lt_prog_compiler_pic fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 $as_echo "$lt_cv_prog_compiler_pic" >&6; } lt_prog_compiler_pic=$lt_cv_prog_compiler_pic # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic"; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 $as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } if ${lt_cv_prog_compiler_pic_works+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_pic_works=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic -DPIC" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works=yes fi fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 $as_echo "$lt_cv_prog_compiler_pic_works" >&6; } if test x"$lt_cv_prog_compiler_pic_works" = xyes; then case $lt_prog_compiler_pic in "" | " "*) ;; *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; esac else lt_prog_compiler_pic= lt_prog_compiler_can_build_shared=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 $as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if ${lt_cv_prog_compiler_static_works+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_static_works=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works=yes fi else lt_cv_prog_compiler_static_works=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 $as_echo "$lt_cv_prog_compiler_static_works" >&6; } if test x"$lt_cv_prog_compiler_static_works" = xyes; then : else lt_prog_compiler_static= fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if ${lt_cv_prog_compiler_c_o+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 $as_echo "$lt_cv_prog_compiler_c_o" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if ${lt_cv_prog_compiler_c_o+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 $as_echo "$lt_cv_prog_compiler_c_o" >&6; } hard_links="nottested" if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 $as_echo_n "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 $as_echo "$hard_links" >&6; } if test "$hard_links" = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 $as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 $as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } runpath_var= allow_undefined_flag= always_export_symbols=no archive_cmds= archive_expsym_cmds= compiler_needs_object=no enable_shared_with_static_runtimes=no export_dynamic_flag_spec= export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' hardcode_automatic=no hardcode_direct=no hardcode_direct_absolute=no hardcode_libdir_flag_spec= hardcode_libdir_separator= hardcode_minus_L=no hardcode_shlibpath_var=unsupported inherit_rpath=no link_all_deplibs=unknown module_cmds= module_expsym_cmds= old_archive_from_new_cmds= old_archive_from_expsyms_cmds= thread_safe_flag_spec= whole_archive_flag_spec= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; esac ld_shlibs=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test "$with_gnu_ld" = yes; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; *\ \(GNU\ Binutils\)\ [3-9]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test "$lt_use_gnu_ld_interface" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' export_dynamic_flag_spec='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else whole_archive_flag_spec= fi supports_anon_versioning=no case `$LD -v 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[3-9]*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='' ;; m68k) archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else ld_shlibs=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec='-L$libdir' export_dynamic_flag_spec='${wl}--export-all-symbols' allow_undefined_flag=unsupported always_export_symbols=no enable_shared_with_static_runtimes=yes export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs=no fi ;; haiku*) archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' link_all_deplibs=yes ;; interix[3-9]*) hardcode_direct=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='${wl}-rpath,$libdir' export_dynamic_flag_spec='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test "$host_os" = linux-dietlibc; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test "$tmp_diet" = no then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 whole_archive_flag_spec= tmp_sharedflag='--shared' ;; xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' compiler_needs_object=yes ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' compiler_needs_object=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi case $cc_basename in xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test "x$supports_anon_versioning" = xyes; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else ld_shlibs=no fi ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac ;; sunos4*) archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct=yes hardcode_shlibpath_var=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac if test "$ld_shlibs" = no; then runpath_var= hardcode_libdir_flag_spec= export_dynamic_flag_spec= whole_archive_flag_spec= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag=unsupported always_export_symbols=yes archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct=unsupported fi ;; aix[4-9]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global # defined symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds='' hardcode_direct=yes hardcode_direct_absolute=yes hardcode_libdir_separator=':' link_all_deplibs=yes file_list_spec='${wl}-f,' if test "$GCC" = yes; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L=yes hardcode_libdir_flag_spec='-L$libdir' hardcode_libdir_separator= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi export_dynamic_flag_spec='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag='-berok' # Determine the default libpath from the value encoded in an # empty executable. if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else if ${lt_cv_aix_libpath_+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_="/usr/lib:/lib" fi fi aix_libpath=$lt_cv_aix_libpath_ fi hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' allow_undefined_flag="-z nodefs" archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else if ${lt_cv_aix_libpath_+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_="/usr/lib:/lib" fi fi aix_libpath=$lt_cv_aix_libpath_ fi hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag=' ${wl}-bernotok' allow_undefined_flag=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec='$convenience' fi archive_cmds_need_lc=yes # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='' ;; m68k) archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes ;; esac ;; bsdi[45]*) export_dynamic_flag_spec=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl*) # Native MSVC hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported always_export_symbols=yes file_list_spec='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, )='true' enable_shared_with_static_runtimes=yes exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib old_postinstall_cmds='chmod 644 $oldlib' postlink_cmds='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC wrapper hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_from_new_cmds='true' # FIXME: Should let the user specify the lib program. old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' enable_shared_with_static_runtimes=yes ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc=no hardcode_direct=no hardcode_automatic=yes hardcode_shlibpath_var=unsupported if test "$lt_cv_ld_force_load" = "yes"; then whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' else whole_archive_flag_spec='' fi link_all_deplibs=yes allow_undefined_flag="$_lt_dar_allow_undefined" case $cc_basename in ifort*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test "$_lt_dar_can_shared" = "yes"; then output_verbose_link_cmd=func_echo_all archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" else ld_shlibs=no fi ;; dgux*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; hpux9*) if test "$GCC" = yes; then archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: hardcode_direct=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes export_dynamic_flag_spec='${wl}-E' ;; hpux10*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: hardcode_direct=yes hardcode_direct_absolute=yes export_dynamic_flag_spec='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes fi ;; hpux11*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) # Older versions of the 11.00 compiler do not understand -b yet # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 $as_echo_n "checking if $CC understands -b... " >&6; } if ${lt_cv_prog_compiler__b+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler__b=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -b" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler__b=yes fi else lt_cv_prog_compiler__b=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 $as_echo "$lt_cv_prog_compiler__b" >&6; } if test x"$lt_cv_prog_compiler__b" = xyes; then archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi ;; esac fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: case $host_cpu in hppa*64*|ia64*) hardcode_direct=no hardcode_shlibpath_var=no ;; *) hardcode_direct=yes hardcode_direct_absolute=yes export_dynamic_flag_spec='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 $as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } if ${lt_cv_irix_exported_symbol+:} false; then : $as_echo_n "(cached) " >&6 else save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo (void) { return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_cv_irix_exported_symbol=yes else lt_cv_irix_exported_symbol=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 $as_echo "$lt_cv_irix_exported_symbol" >&6; } if test "$lt_cv_irix_exported_symbol" = yes; then archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' fi else archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' fi archive_cmds_need_lc='no' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: inherit_rpath=yes link_all_deplibs=yes ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; newsos6) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: hardcode_shlibpath_var=no ;; *nto* | *qnx*) ;; openbsd*) if test -f /usr/libexec/ld.so; then hardcode_direct=yes hardcode_shlibpath_var=no hardcode_direct_absolute=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec='${wl}-rpath,$libdir' export_dynamic_flag_spec='${wl}-E' else case $host_os in openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-R$libdir' ;; *) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='${wl}-rpath,$libdir' ;; esac fi else ld_shlibs=no fi ;; os2*) hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes allow_undefined_flag=unsupported archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' fi archive_cmds_need_lc='no' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec='-rpath $libdir' fi archive_cmds_need_lc='no' hardcode_libdir_separator=: ;; solaris*) no_undefined_flag=' -z defs' if test "$GCC" = yes; then wlarc='${wl}' archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='${wl}' archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi hardcode_libdir_flag_spec='-R$libdir' hardcode_shlibpath_var=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else whole_archive_flag_spec='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec='-L$libdir' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; sysv4) case $host_vendor in sni) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds='$CC -r -o $output$reload_objs' hardcode_direct=no ;; motorola) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var=no ;; sysv4.3*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no export_dynamic_flag_spec='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag='${wl}-z,text' archive_cmds_need_lc=no hardcode_shlibpath_var=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag='${wl}-z,text' allow_undefined_flag='${wl}-z,nodefs' archive_cmds_need_lc=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='${wl}-R,$libdir' hardcode_libdir_separator=':' link_all_deplibs=yes export_dynamic_flag_spec='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; *) ld_shlibs=no ;; esac if test x$host_vendor = xsni; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) export_dynamic_flag_spec='${wl}-Blargedynsym' ;; esac fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 $as_echo "$ld_shlibs" >&6; } test "$ld_shlibs" = no && can_build_shared=no with_gnu_ld=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc" in x|xyes) # Assume -lc should be added archive_cmds_need_lc=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $archive_cmds in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 $as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } if ${lt_cv_archive_cmds_need_lc+:} false; then : $as_echo_n "(cached) " >&6 else $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl pic_flag=$lt_prog_compiler_pic compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag allow_undefined_flag= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc=no else lt_cv_archive_cmds_need_lc=yes fi allow_undefined_flag=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 $as_echo "$lt_cv_archive_cmds_need_lc" >&6; } archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc ;; esac fi ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 $as_echo_n "checking dynamic linker characteristics... " >&6; } if test "$GCC" = yes; then case $host_os in darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; *) lt_awk_arg="/^libraries:/" ;; esac case $host_os in mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;; *) lt_sed_strip_eq="s,=/,/,g" ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` case $lt_search_path_spec in *\;*) # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` ;; *) lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` ;; esac # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary. lt_tmp_lt_search_path_spec= lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path/$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" else test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' BEGIN {RS=" "; FS="/|\n";} { lt_foo=""; lt_count=0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo="/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[lt_foo]++; } if (lt_freq[lt_foo] == 1) { print lt_foo; } }'` # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's,/\([A-Za-z]:\),\1,g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl*) # Native MSVC libname_spec='$name' soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' library_names_spec='${libname}.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec="$LIB" if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC wrapper library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=yes sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if ${lt_cv_shlibpath_overrides_runpath+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Add ABI-specific directories to the system library path. sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[89] | openbsd2.[89].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 $as_echo "$dynamic_linker" >&6; } test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" fi if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 $as_echo_n "checking how to hardcode library paths into programs... " >&6; } hardcode_action= if test -n "$hardcode_libdir_flag_spec" || test -n "$runpath_var" || test "X$hardcode_automatic" = "Xyes" ; then # We can hardcode non-existent directories. if test "$hardcode_direct" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no && test "$hardcode_minus_L" != no; then # Linking always hardcodes the temporary library directory. hardcode_action=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action=unsupported fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 $as_echo "$hardcode_action" >&6; } if test "$hardcode_action" = relink || test "$inherit_rpath" = yes; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen="load_add_on" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32* | cegcc*) lt_cv_dlopen="LoadLibrary" lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen="dlopen" lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 $as_echo_n "checking for dlopen in -ldl... " >&6; } if ${ac_cv_lib_dl_dlopen+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dl_dlopen=yes else ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 $as_echo "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes; then : lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" else lt_cv_dlopen="dyld" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes fi ;; *) ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" if test "x$ac_cv_func_shl_load" = xyes; then : lt_cv_dlopen="shl_load" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 $as_echo_n "checking for shl_load in -ldld... " >&6; } if ${ac_cv_lib_dld_shl_load+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char shl_load (); int main () { return shl_load (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dld_shl_load=yes else ac_cv_lib_dld_shl_load=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 $as_echo "$ac_cv_lib_dld_shl_load" >&6; } if test "x$ac_cv_lib_dld_shl_load" = xyes; then : lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" else ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" if test "x$ac_cv_func_dlopen" = xyes; then : lt_cv_dlopen="dlopen" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 $as_echo_n "checking for dlopen in -ldl... " >&6; } if ${ac_cv_lib_dl_dlopen+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dl_dlopen=yes else ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 $as_echo "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes; then : lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 $as_echo_n "checking for dlopen in -lsvld... " >&6; } if ${ac_cv_lib_svld_dlopen+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lsvld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_svld_dlopen=yes else ac_cv_lib_svld_dlopen=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 $as_echo "$ac_cv_lib_svld_dlopen" >&6; } if test "x$ac_cv_lib_svld_dlopen" = xyes; then : lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 $as_echo_n "checking for dld_link in -ldld... " >&6; } if ${ac_cv_lib_dld_dld_link+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dld_link (); int main () { return dld_link (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dld_dld_link=yes else ac_cv_lib_dld_dld_link=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 $as_echo "$ac_cv_lib_dld_dld_link" >&6; } if test "x$ac_cv_lib_dld_dld_link" = xyes; then : lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" fi fi fi fi fi fi ;; esac if test "x$lt_cv_dlopen" != xno; then enable_dlopen=yes else enable_dlopen=no fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS="$CPPFLAGS" test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS="$LDFLAGS" wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS="$LIBS" LIBS="$lt_cv_dlopen_libs $LIBS" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 $as_echo_n "checking whether a program can dlopen itself... " >&6; } if ${lt_cv_dlopen_self+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : lt_cv_dlopen_self=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF #line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisbility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; } _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; esac else : # compilation failed lt_cv_dlopen_self=no fi fi rm -fr conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 $as_echo "$lt_cv_dlopen_self" >&6; } if test "x$lt_cv_dlopen_self" = xyes; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 $as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; } if ${lt_cv_dlopen_self_static+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : lt_cv_dlopen_self_static=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF #line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisbility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; } _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; esac else : # compilation failed lt_cv_dlopen_self_static=no fi fi rm -fr conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 $as_echo "$lt_cv_dlopen_self_static" >&6; } fi CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi striplib= old_striplib= { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 $as_echo_n "checking whether stripping libraries is possible... " >&6; } if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" test -z "$striplib" && striplib="$STRIP --strip-unneeded" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else # FIXME - insert some real tests, host_os isn't really good enough case $host_os in darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" old_striplib="$STRIP -S" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi ;; *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } ;; esac fi # Report which library types will actually be built { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 $as_echo_n "checking if libtool supports shared libraries... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 $as_echo "$can_build_shared" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 $as_echo_n "checking whether to build shared libraries... " >&6; } test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[4-9]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 $as_echo "$enable_shared" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 $as_echo_n "checking whether to build static libraries... " >&6; } # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 $as_echo "$enable_static" >&6; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC="$lt_save_CC" if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5 $as_echo_n "checking how to run the C++ preprocessor... " >&6; } if test -z "$CXXCPP"; then if ${ac_cv_prog_CXXCPP+:} false; then : $as_echo_n "(cached) " >&6 else # Double quotes because CXXCPP needs to be expanded for CXXCPP in "$CXX -E" "/lib/cpp" do ac_preproc_ok=false for ac_cxx_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_cxx_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_cxx_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : break fi done ac_cv_prog_CXXCPP=$CXXCPP fi CXXCPP=$ac_cv_prog_CXXCPP else ac_cv_prog_CXXCPP=$CXXCPP fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5 $as_echo "$CXXCPP" >&6; } ac_preproc_ok=false for ac_cxx_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_cxx_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_cxx_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu else _lt_caught_CXX_error=yes fi ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu archive_cmds_need_lc_CXX=no allow_undefined_flag_CXX= always_export_symbols_CXX=no archive_expsym_cmds_CXX= compiler_needs_object_CXX=no export_dynamic_flag_spec_CXX= hardcode_direct_CXX=no hardcode_direct_absolute_CXX=no hardcode_libdir_flag_spec_CXX= hardcode_libdir_separator_CXX= hardcode_minus_L_CXX=no hardcode_shlibpath_var_CXX=unsupported hardcode_automatic_CXX=no inherit_rpath_CXX=no module_cmds_CXX= module_expsym_cmds_CXX= link_all_deplibs_CXX=unknown old_archive_cmds_CXX=$old_archive_cmds reload_flag_CXX=$reload_flag reload_cmds_CXX=$reload_cmds no_undefined_flag_CXX= whole_archive_flag_spec_CXX= enable_shared_with_static_runtimes_CXX=no # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o objext_CXX=$objext # No sense in running all these tests if we already determined that # the CXX compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_caught_CXX_error" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} CFLAGS=$CXXFLAGS compiler=$CC compiler_CXX=$CC for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` if test -n "$compiler"; then # We don't want -fno-exception when compiling C++ code, so set the # no_builtin_flag separately if test "$GXX" = yes; then lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin' else lt_prog_compiler_no_builtin_flag_CXX= fi if test "$GXX" = yes; then # Set up default GNU C++ configuration # Check whether --with-gnu-ld was given. if test "${with_gnu_ld+set}" = set; then : withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes else with_gnu_ld=no fi ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 $as_echo_n "checking for ld used by $CC... " >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 $as_echo_n "checking for GNU ld... " >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 $as_echo_n "checking for non-GNU ld... " >&6; } fi if ${lt_cv_path_LD+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 $as_echo "$LD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 $as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } if ${lt_cv_prog_gnu_ld+:} false; then : $as_echo_n "(cached) " >&6 else # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 $as_echo "$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test "$with_gnu_ld" = yes; then archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='${wl}' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else whole_archive_flag_spec_CXX= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 $as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } ld_shlibs_CXX=yes case $host_os in aix3*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aix[4-9]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds_CXX='' hardcode_direct_CXX=yes hardcode_direct_absolute_CXX=yes hardcode_libdir_separator_CXX=':' link_all_deplibs_CXX=yes file_list_spec_CXX='${wl}-f,' if test "$GXX" = yes; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct_CXX=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L_CXX=yes hardcode_libdir_flag_spec_CXX='-L$libdir' hardcode_libdir_separator_CXX= fi esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi export_dynamic_flag_spec_CXX='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to # export. always_export_symbols_CXX=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag_CXX='-berok' # Determine the default libpath from the value encoded in an empty # executable. if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else if ${lt_cv_aix_libpath__CXX+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO"; then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX="/usr/lib:/lib" fi fi aix_libpath=$lt_cv_aix_libpath__CXX fi hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then hardcode_libdir_flag_spec_CXX='${wl}-R $libdir:/usr/lib:/lib' allow_undefined_flag_CXX="-z nodefs" archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else if ${lt_cv_aix_libpath__CXX+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO"; then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX="/usr/lib:/lib" fi fi aix_libpath=$lt_cv_aix_libpath__CXX fi hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag_CXX=' ${wl}-bernotok' allow_undefined_flag_CXX=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec_CXX='$convenience' fi archive_cmds_need_lc_CXX=yes # This is similar to how AIX traditionally builds its shared # libraries. archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag_CXX=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else ld_shlibs_CXX=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in ,cl* | no,cl*) # Native MSVC # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. hardcode_libdir_flag_spec_CXX=' ' allow_undefined_flag_CXX=unsupported always_export_symbols_CXX=yes file_list_spec_CXX='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true' enable_shared_with_static_runtimes_CXX=yes # Don't use ranlib old_postinstall_cmds_CXX='chmod 644 $oldlib' postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ func_to_tool_file "$lt_outputfile"~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # g++ # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec_CXX='-L$libdir' export_dynamic_flag_spec_CXX='${wl}--export-all-symbols' allow_undefined_flag_CXX=unsupported always_export_symbols_CXX=no enable_shared_with_static_runtimes_CXX=yes if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs_CXX=no fi ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc_CXX=no hardcode_direct_CXX=no hardcode_automatic_CXX=yes hardcode_shlibpath_var_CXX=unsupported if test "$lt_cv_ld_force_load" = "yes"; then whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' else whole_archive_flag_spec_CXX='' fi link_all_deplibs_CXX=yes allow_undefined_flag_CXX="$_lt_dar_allow_undefined" case $cc_basename in ifort*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test "$_lt_dar_can_shared" = "yes"; then output_verbose_link_cmd=func_echo_all archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" module_expsym_cmds_CXX="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" if test "$lt_cv_apple_cc_single_mod" != "yes"; then archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" fi else ld_shlibs_CXX=no fi ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; freebsd2.*) # C++ shared libraries reported to be fairly broken before # switch to ELF ld_shlibs_CXX=no ;; freebsd-elf*) archive_cmds_need_lc_CXX=no ;; freebsd* | dragonfly*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions ld_shlibs_CXX=yes ;; gnu*) ;; haiku*) archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' link_all_deplibs_CXX=yes ;; hpux9*) hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir' hardcode_libdir_separator_CXX=: export_dynamic_flag_spec_CXX='${wl}-E' hardcode_direct_CXX=yes hardcode_minus_L_CXX=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aCC*) archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes; then archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; hpux10*|hpux11*) if test $with_gnu_ld = no; then hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir' hardcode_libdir_separator_CXX=: case $host_cpu in hppa*64*|ia64*) ;; *) export_dynamic_flag_spec_CXX='${wl}-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) hardcode_direct_CXX=no hardcode_shlibpath_var_CXX=no ;; *) hardcode_direct_CXX=yes hardcode_direct_absolute_CXX=yes hardcode_minus_L_CXX=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aCC*) case $host_cpu in hppa*64*) archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes; then if test $with_gnu_ld = no; then case $host_cpu in hppa*64*) archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; interix[3-9]*) hardcode_direct_CXX=no hardcode_shlibpath_var_CXX=no hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' export_dynamic_flag_spec_CXX='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds_CXX='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test "$GXX" = yes; then if test "$with_gnu_ld" = no; then archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' fi fi link_all_deplibs_CXX=yes ;; esac hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_CXX=: inherit_rpath_CXX=yes ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc* | ecpc* ) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; esac archive_cmds_need_lc_CXX=no hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive' ;; pgCC* | pgcpp*) # Portland Group C++ compiler case `$CC -V` in *pgCC\ [1-5].* | *pgcpp\ [1-5].*) prelink_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' old_archive_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ $RANLIB $oldlib' archive_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' archive_expsym_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' ;; *) # Version 6 and above use weak symbols archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' ;; esac hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' whole_archive_flag_spec_CXX='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' ;; cxx*) # Compaq C++ archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec_CXX='-rpath $libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' ;; xl* | mpixl* | bgxl*) # IBM XL 8.0 on PPC, with GNU ld hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 no_undefined_flag_CXX=' -zdefs' archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' hardcode_libdir_flag_spec_CXX='-R$libdir' whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' compiler_needs_object_CXX=yes # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; m88k*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= hardcode_libdir_flag_spec_CXX='-R$libdir' hardcode_direct_CXX=yes hardcode_shlibpath_var_CXX=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; *nto* | *qnx*) ld_shlibs_CXX=yes ;; openbsd2*) # C++ shared libraries are fairly broken ld_shlibs_CXX=no ;; openbsd*) if test -f /usr/libexec/ld.so; then hardcode_direct_CXX=yes hardcode_shlibpath_var_CXX=no hardcode_direct_absolute_CXX=yes archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' export_dynamic_flag_spec_CXX='${wl}-E' whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' fi output_verbose_link_cmd=func_echo_all else ld_shlibs_CXX=no fi ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' hardcode_libdir_separator_CXX=: # Archives containing C++ object files must be created using # the KAI C++ compiler. case $host in osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;; esac ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; cxx*) case $host in osf3*) allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' ;; *) allow_undefined_flag_CXX=' -expect_unresolved \*' archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ $RM $lib.exp' hardcode_libdir_flag_spec_CXX='-rpath $libdir' ;; esac hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' case $host in osf3*) archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' ;; *) archive_cmds_CXX='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' ;; esac hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ archive_cmds_need_lc_CXX=yes no_undefined_flag_CXX=' -zdefs' archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' hardcode_libdir_flag_spec_CXX='-R$libdir' hardcode_shlibpath_var_CXX=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract' ;; esac link_all_deplibs_CXX=yes output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test "$GXX" = yes && test "$with_gnu_ld" = no; then no_undefined_flag_CXX=' ${wl}-z ${wl}defs' if $CC --version | $GREP -v '^2\.7' > /dev/null; then archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else # g++ 2.7 appears to require `-G' NOT `-shared' on this # platform. archive_cmds_CXX='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' fi hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir' case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag_CXX='${wl}-z,text' archive_cmds_need_lc_CXX=no hardcode_shlibpath_var_CXX=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag_CXX='${wl}-z,text' allow_undefined_flag_CXX='${wl}-z,nodefs' archive_cmds_need_lc_CXX=no hardcode_shlibpath_var_CXX=no hardcode_libdir_flag_spec_CXX='${wl}-R,$libdir' hardcode_libdir_separator_CXX=':' link_all_deplibs_CXX=yes export_dynamic_flag_spec_CXX='${wl}-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~ '"$old_archive_cmds_CXX" reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~ '"$reload_cmds_CXX" ;; *) archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 $as_echo "$ld_shlibs_CXX" >&6; } test "$ld_shlibs_CXX" = no && can_build_shared=no GCC_CXX="$GXX" LD_CXX="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... # Dependencies to place before and after the object being linked: predep_objects_CXX= postdep_objects_CXX= predeps_CXX= postdeps_CXX= compiler_lib_search_path_CXX= cat > conftest.$ac_ext <<_LT_EOF class Foo { public: Foo (void) { a = 0; } private: int a; }; _LT_EOF _lt_libdeps_save_CFLAGS=$CFLAGS case "$CC $CFLAGS " in #( *\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; *\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; *\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; esac if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no for p in `eval "$output_verbose_link_cmd"`; do case ${prev}${p} in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test $p = "-L" || test $p = "-R"; then prev=$p continue fi # Expand the sysroot to ease extracting the directories later. if test -z "$prev"; then case $p in -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; esac fi case $p in =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; esac if test "$pre_test_object_deps_done" = no; then case ${prev} in -L | -R) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$compiler_lib_search_path_CXX"; then compiler_lib_search_path_CXX="${prev}${p}" else compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} ${prev}${p}" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$postdeps_CXX"; then postdeps_CXX="${prev}${p}" else postdeps_CXX="${postdeps_CXX} ${prev}${p}" fi fi prev= ;; *.lto.$objext) ;; # Ignore GCC LTO objects *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test "$pre_test_object_deps_done" = no; then if test -z "$predep_objects_CXX"; then predep_objects_CXX="$p" else predep_objects_CXX="$predep_objects_CXX $p" fi else if test -z "$postdep_objects_CXX"; then postdep_objects_CXX="$p" else postdep_objects_CXX="$postdep_objects_CXX $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling CXX test program" fi $RM -f confest.$objext CFLAGS=$_lt_libdeps_save_CFLAGS # PORTME: override above test on systems where it is broken case $host_os in interix[3-9]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. predep_objects_CXX= postdep_objects_CXX= postdeps_CXX= ;; linux*) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac if test "$solaris_use_stlport4" != yes; then postdeps_CXX='-library=Cstd -library=Crun' fi ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac # Adding this requires a known-good setup of shared libraries for # Sun compiler versions before 5.6, else PIC objects from an old # archive will be linked into the output, leading to subtle bugs. if test "$solaris_use_stlport4" != yes; then postdeps_CXX='-library=Cstd -library=Crun' fi ;; esac ;; esac case " $postdeps_CXX " in *" -lc "*) archive_cmds_need_lc_CXX=no ;; esac compiler_lib_search_dirs_CXX= if test -n "${compiler_lib_search_path_CXX}"; then compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` fi lt_prog_compiler_wl_CXX= lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX= # C++ specific cases for pic, static, wl, etc. if test "$GXX" = yes; then lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_CXX='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic_CXX='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic_CXX='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_CXX='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all lt_prog_compiler_pic_CXX= ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static_CXX= ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_CXX=-Kconform_pic fi ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) ;; *) lt_prog_compiler_pic_CXX='-fPIC' ;; esac ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_CXX='-fPIC -shared' ;; *) lt_prog_compiler_pic_CXX='-fPIC' ;; esac else case $host_os in aix[4-9]*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_CXX='-Bstatic' else lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic_CXX='-DDLL_EXPORT' ;; dgux*) case $cc_basename in ec++*) lt_prog_compiler_pic_CXX='-KPIC' ;; ghcx*) # Green Hills C++ Compiler lt_prog_compiler_pic_CXX='-pic' ;; *) ;; esac ;; freebsd* | dragonfly*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='${wl}-a ${wl}archive' if test "$host_cpu" != ia64; then lt_prog_compiler_pic_CXX='+Z' fi ;; aCC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='${wl}-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_CXX='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in KCC*) # KAI C++ Compiler lt_prog_compiler_wl_CXX='--backend -Wl,' lt_prog_compiler_pic_CXX='-fPIC' ;; ecpc* ) # old Intel C++ for x86_64 which still supported -KPIC. lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-static' ;; icpc* ) # Intel C++, used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-fPIC' lt_prog_compiler_static_CXX='-static' ;; pgCC* | pgcpp*) # Portland Group C++ compiler lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-fpic' lt_prog_compiler_static_CXX='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX='-non_shared' ;; xlc* | xlC* | bgxl[cC]* | mpixl[cC]*) # IBM XL 8.0, 9.0 on PPC and BlueGene lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-qpic' lt_prog_compiler_static_CXX='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' lt_prog_compiler_wl_CXX='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) lt_prog_compiler_pic_CXX='-W c,exportall' ;; *) ;; esac ;; netbsd*) ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_CXX='-fPIC -shared' ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) lt_prog_compiler_wl_CXX='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 lt_prog_compiler_pic_CXX='-pic' ;; cxx*) # Digital/Compaq C++ lt_prog_compiler_wl_CXX='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' lt_prog_compiler_wl_CXX='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler lt_prog_compiler_pic_CXX='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x lt_prog_compiler_pic_CXX='-pic' lt_prog_compiler_static_CXX='-Bstatic' ;; lcc*) # Lucid lt_prog_compiler_pic_CXX='-pic' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 lt_prog_compiler_pic_CXX='-KPIC' ;; *) ;; esac ;; vxworks*) ;; *) lt_prog_compiler_can_build_shared_CXX=no ;; esac fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic_CXX= ;; *) lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC" ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 $as_echo_n "checking for $compiler option to produce PIC... " >&6; } if ${lt_cv_prog_compiler_pic_CXX+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5 $as_echo "$lt_cv_prog_compiler_pic_CXX" >&6; } lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic_CXX"; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5 $as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; } if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_pic_works_CXX=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works_CXX=yes fi fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5 $as_echo "$lt_cv_prog_compiler_pic_works_CXX" >&6; } if test x"$lt_cv_prog_compiler_pic_works_CXX" = xyes; then case $lt_prog_compiler_pic_CXX in "" | " "*) ;; *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;; esac else lt_prog_compiler_pic_CXX= lt_prog_compiler_can_build_shared_CXX=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\" { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 $as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if ${lt_cv_prog_compiler_static_works_CXX+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_static_works_CXX=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works_CXX=yes fi else lt_cv_prog_compiler_static_works_CXX=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5 $as_echo "$lt_cv_prog_compiler_static_works_CXX" >&6; } if test x"$lt_cv_prog_compiler_static_works_CXX" = xyes; then : else lt_prog_compiler_static_CXX= fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if ${lt_cv_prog_compiler_c_o_CXX+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_c_o_CXX=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_CXX=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 $as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if ${lt_cv_prog_compiler_c_o_CXX+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_c_o_CXX=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_CXX=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 $as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; } hard_links="nottested" if test "$lt_cv_prog_compiler_c_o_CXX" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 $as_echo_n "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 $as_echo "$hard_links" >&6; } if test "$hard_links" = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 $as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 $as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' case $host_os in aix[4-9]*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global defined # symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi ;; pw32*) export_symbols_cmds_CXX="$ltdll_cmds" ;; cygwin* | mingw* | cegcc*) case $cc_basename in cl*) exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' ;; *) export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' ;; esac ;; *) export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 $as_echo "$ld_shlibs_CXX" >&6; } test "$ld_shlibs_CXX" = no && can_build_shared=no with_gnu_ld_CXX=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc_CXX" in x|xyes) # Assume -lc should be added archive_cmds_need_lc_CXX=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $archive_cmds_CXX in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 $as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then : $as_echo_n "(cached) " >&6 else $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl_CXX pic_flag=$lt_prog_compiler_pic_CXX compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag_CXX allow_undefined_flag_CXX= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc_CXX=no else lt_cv_archive_cmds_need_lc_CXX=yes fi allow_undefined_flag_CXX=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5 $as_echo "$lt_cv_archive_cmds_need_lc_CXX" >&6; } archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX ;; esac fi ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 $as_echo_n "checking dynamic linker characteristics... " >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl*) # Native MSVC libname_spec='$name' soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' library_names_spec='${libname}.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec="$LIB" if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC wrapper library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=yes sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if ${lt_cv_shlibpath_overrides_runpath+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\"" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO"; then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Add ABI-specific directories to the system library path. sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[89] | openbsd2.[89].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 $as_echo "$dynamic_linker" >&6; } test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" fi if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 $as_echo_n "checking how to hardcode library paths into programs... " >&6; } hardcode_action_CXX= if test -n "$hardcode_libdir_flag_spec_CXX" || test -n "$runpath_var_CXX" || test "X$hardcode_automatic_CXX" = "Xyes" ; then # We can hardcode non-existent directories. if test "$hardcode_direct_CXX" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" != no && test "$hardcode_minus_L_CXX" != no; then # Linking always hardcodes the temporary library directory. hardcode_action_CXX=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action_CXX=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action_CXX=unsupported fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5 $as_echo "$hardcode_action_CXX" >&6; } if test "$hardcode_action_CXX" = relink || test "$inherit_rpath_CXX" = yes; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi fi # test -n "$compiler" CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld fi # test "$_lt_caught_CXX_error" != yes ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_config_commands="$ac_config_commands libtool" # Only expand once: # Check if 64 bit pointer support is required on 32 bit machines # Disabled by default # Check whether --enable-64 was given. if test "${enable_64+set}" = set; then : enableval=$enable_64; fi if test "x${enable_64}" = "xyes"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for 64bit compilation support" >&5 $as_echo_n "checking for 64bit compilation support... " >&6; } case ${host_os} in #( aix*) : CPPFLAGS="-DAJ_AIX64 ${CPPFLAGS}" case ${CC} in #( gcc) : ;; #( *) : as_fn_append CC " -q64" ;; esac NM="nm -B -X 64" AR="ar -X 64" ;; #( hpux*) : case ${CC} in #( gcc) : ;; #( *) : as_fn_append CC " +DD64" ;; esac $as_echo "#define HPUX64PTRS 1" >>confdefs.h ;; #( *) : ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 $as_echo "done" >&6; } fi # Compiler optimisations # The Solaris 64bit ptr check has to be done here owing to param order # Check whether --with-optimisation was given. if test "${with_optimisation+set}" = set; then : withval=$with_optimisation; fi if test "x${with_optimisation}" != "xno"; then : case ${CC} in #( gcc) : # Intel MacOSX requires reduced optimisation for PCRE code # other OSs just use -O2 case ${host_os} in #( darwin*) : if test "x${host_cpu}" = "xi386"; then : as_fn_append CFLAGS " -O1" else as_fn_append CFLAGS " -O2" fi ;; #( *) : as_fn_append CFLAGS " -O2" ;; esac ;; #( *) : case ${host_os} in #( aix*) : as_fn_append CFLAGS " -O3 -qstrict -qarch=auto -qtune=auto" ;; #( irix*) : LD="/usr/bin/ld -IPA" as_fn_append CFLAGS " -O3" ;; #( hpux*) : as_fn_append CFLAGS " -fast" ;; #( osf*) : as_fn_append CFLAGS " -fast -U_FASTMATH" ;; #( solaris*) : as_fn_append CFLAGS " -O" # test for 64 bit ptr here (see Solaris 64bit above) if test "x${enable_64}" = "xyes"; then : as_fn_append CFLAGS " -xtarget=ultra -xarch=v9" fi ;; #( linux*) : # Default optimisation for non-gcc compilers under Linux as_fn_append CFLAGS " -O2" ;; #( freebsd*) : as_fn_append CFLAGS " -O2" ;; #( *) : ;; esac ;; esac fi # Compiler warning settings: --enable-warnings, defines WARN_CFLAGS # Check whether --enable-warnings was given. if test "${enable_warnings+set}" = set; then : enableval=$enable_warnings; fi if test "x${enable_warnings}" = "xyes"; then : case ${CC} in #( gcc) : # -Wall priovides: # -Waddress # -Warray-bounds (only with -O2) # -Wc++0x-compat # -Wchar-subscripts # -Wenum-compare (in C/Objc; this is on by default in C++) # -Wimplicit-int (C and Objective-C only) # -Wimplicit-function-declaration (C and Objective-C only) # -Wcomment # -Wformat # -Wmain (only for C/ObjC and unless -ffreestanding) # -Wmissing-braces # -Wnonnull # -Wparentheses # -Wpointer-sign # -Wreorder # -Wreturn-type # -Wsequence-point # -Wsign-compare (only in C++) # -Wstrict-aliasing # -Wstrict-overflow=1 # -Wswitch # -Wtrigraphs # -Wuninitialized # -Wunknown-pragmas # -Wunused-function # -Wunused-label # -Wunused-value # -Wunused-variable # -Wvolatile-register-var WARN_CFLAGS="-Wall -fno-strict-aliasing" ;; #( *) : ;; esac fi # Compiler developer warning settings: --enable-devwarnings, # sets DEVWARN_CFLAGS # Check whether --enable-devwarnings was given. if test "${enable_devwarnings+set}" = set; then : enableval=$enable_devwarnings; fi if test "x${enable_devwarnings}" = "xyes"; then : case ${CC} in #( gcc) : # Only -Wstrict-prototypes and -Wmissing-prototypes are set in this # EMBASSY module. DEVWARN_CFLAGS="-Wstrict-prototypes -Wmissing-prototypes" # Diagnostic options for the GNU GCC compiler version 4.6.1. # http://gcc.gnu.org/onlinedocs/gcc-4.6.1/gcc/Warning-Options.html # # -Wextra: more warnings beyond what -Wall provides # -Wclobbered # -Wempty-body # -Wignored-qualifiers # -Wmissing-field-initializers # -Wmissing-parameter-type (C only) # -Wold-style-declaration (C only) # -Woverride-init # -Wsign-compare # -Wtype-limits # -Wuninitialized # -Wunused-parameter (only with -Wunused or -Wall) # -Wunused-but-set-parameter (only with -Wunused or -Wall) # AS_VAR_SET([DEVWARN_CFLAGS], ["-Wextra"]) # Warn if a function is declared or defined without specifying the # argument types. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wstrict-prototypes"]) # Warn if a global function is defined without a previous prototype # declaration. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-prototypes"]) # Warn for obsolescent usages, according to the C Standard, # in a declaration. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wold-style-definition"]) # Warn if a global function is defined without a previous declaration. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-declarations"]) # When compiling C, give string constants the type const char[length] # so that copying the address of one into a non-const char * pointer # will get a warning. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wwrite-strings"]) # Warn whenever a local variable or type declaration shadows another # variable, parameter, type, or class member (in C++), or whenever a # built-in function is shadowed. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wshadow"]) # Warn when a declaration is found after a statement in a block. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wdeclaration-after-statement"]) # Warn if an undefined identifier is evaluated in an `#if' directive. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wundef"]) # Warn about anything that depends on the "size of" a function type # or of void. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wpointer-arith"]) # Warn whenever a pointer is cast so as to remove a type qualifier # from the target type. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcast-qual"]) # Warn whenever a pointer is cast such that the required alignment # of the target is increased. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcast-align"]) # Warn whenever a function call is cast to a non-matching type. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wbad-function-cast"]) # Warn when a comparison between signed and unsigned values could # produce an incorrect result when the signed value is converted to # unsigned. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wsign-compare"]) # Warn if a structure's initializer has some fields missing. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-field-initializers"]) # An alias of the new option -Wsuggest-attribute=noreturn # Warn for cases where adding an attribute may be beneficial. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wmissing-noreturn"]) # Warn if an extern declaration is encountered within a function. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wnested-externs"]) # Warn if anything is declared more than once in the same scope, # even in cases where multiple declaration is valid and changes # nothing. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wredundant-decls"]) # Warn if the loop cannot be optimized because the compiler could not # assume anything on the bounds of the loop indices. # -Wunsafe-loop-optimizations objects to loops with increments more # than 1 because if the end is at INT_MAX it could run forever ... # rarely # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wunsafe-loop-optimizations"]) # Warn for implicit conversions that may alter a value. # -Wconversion is brain-damaged - complains about char arguments # every time # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wconversion"]) # Warn about certain constructs that behave differently in traditional # and ISO C. # -Wtraditional gives #elif and #error msgs # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wtraditional"]) # Warn if floating point values are used in equality comparisons. # -Wfloat-equal will not allow tests for values still 0.0 # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wfloat-equal"]) # This option is only active when -ftree-vrp is active # (default for -O2 and above). It warns about subscripts to arrays # that are always out of bounds. # -Warray-bounds gives false positives in gcc 4.6.0 # Disable rather than use a non-portable pragma # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wno-array-bounds"]) ;; #( icc) : # Diagnostic options for the Intel(R) C++ compiler version 11.1. # http://software.intel.com/en-us/articles/intel-c-compiler-professional-edition-for-linux-documentation/ # This option specifies the level of diagnostic messages to be # generated by the compiler. DEVWARN_CFLAGS="-w2" # This option determines whether a warning is issued if generated # code is not C++ ABI compliant. as_fn_append DEVWARN_CFLAGS " -Wabi" # This option tells the compiler to display errors, warnings, and # remarks. as_fn_append DEVWARN_CFLAGS " -Wall" # This option tells the compiler to display a shorter form of # diagnostic output. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wbrief"]) # This option warns if cast is used to override pointer type # qualifier as_fn_append DEVWARN_CFLAGS " -Wcast-qual" # This option tells the compiler to perform compile-time code # checking for certain code. as_fn_append DEVWARN_CFLAGS " -Wcheck" # This option determines whether a warning is issued when /* # appears in the middle of a /* */ comment. as_fn_append DEVWARN_CFLAGS " -Wcomment" # Set maximum number of template instantiation contexts shown in # diagnostic. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wcontext-limit=n"]) # This option enables warnings for implicit conversions that may # alter a value. as_fn_append DEVWARN_CFLAGS " -Wconversion" # This option determines whether warnings are issued for deprecated # features. as_fn_append DEVWARN_CFLAGS " -Wdeprecated" # This option enables warnings based on certain C++ programming # guidelines. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Weffc++"]) # This option changes all warnings to errors. # Alternate: -diag-error warn # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Werror"]) # This option changes all warnings and remarks to errors. # Alternate: -diag-error warn, remark # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Werror-all"]) # This option determines whether warnings are issued about extra # tokens at the end of preprocessor directives. as_fn_append DEVWARN_CFLAGS " -Wextra-tokens" # This option determines whether argument checking is enabled for # calls to printf, scanf, and so forth. as_fn_append DEVWARN_CFLAGS " -Wformat" # This option determines whether the compiler issues a warning when # the use of format functions may cause security problems. as_fn_append DEVWARN_CFLAGS " -Wformat-security" # This option enables diagnostics about what is inlined and what is # not inlined. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Winline"]) # This option determines whether a warning is issued if the return # type of main is not expected. as_fn_append DEVWARN_CFLAGS " -Wmain" # This option determines whether warnings are issued for global # functions and variables without prior declaration. as_fn_append DEVWARN_CFLAGS " -Wmissing-declarations" # Determines whether warnings are issued for missing prototypes. as_fn_append DEVWARN_CFLAGS " -Wmissing-prototypes" # This option enables warnings if a multicharacter constant # ('ABC') is used. as_fn_append DEVWARN_CFLAGS " -Wmultichar" # Issue a warning when a class appears to be polymorphic, # yet it declares a non-virtual one. # This option is supported in C++ only. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wnon-virtual-dtor"]) # This option warns about operations that could result in # integer overflow. as_fn_append DEVWARN_CFLAGS " -Woverflow" # This option tells the compiler to display diagnostics for 64-bit # porting. as_fn_append DEVWARN_CFLAGS " -Wp64" # Determines whether warnings are issued for questionable pointer # arithmetic. as_fn_append DEVWARN_CFLAGS " -Wpointer-arith" # his option determines whether a warning is issued about the # use of #pragma once. as_fn_append DEVWARN_CFLAGS " -Wpragma-once" # Issue a warning when the order of member initializers does not # match the order in which they must be executed. # This option is supported with C++ only. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wreorder"]) # This option determines whether warnings are issued when a function # uses the default int return type or when a return statement is # used in a void function. as_fn_append DEVWARN_CFLAGS " -Wreturn-type" # This option determines whether a warning is issued when a variable # declaration hides a previous declaration. as_fn_append DEVWARN_CFLAGS " -Wshadow" # This option warns for code that might violate the optimizer's # strict aliasing rules. Warnings are issued only when using # -fstrict-aliasing or -ansi-alias. # AS_VAR_APPEND([DEVWARN_CFLAGS], [" -Wstrict-aliasing"]) # This option determines whether warnings are issued for functions # declared or defined without specified argument types. as_fn_append DEVWARN_CFLAGS " -Wstrict-prototypes" # This option determines whether warnings are issued if any trigraphs # are encountered that might change the meaning of the program. as_fn_append DEVWARN_CFLAGS " -Wtrigraphs" # This option determines whether a warning is issued if a variable # is used before being initialized. as_fn_append DEVWARN_CFLAGS " -Wuninitialized" # This option determines whether a warning is issued if an unknown # #pragma directive is used. as_fn_append DEVWARN_CFLAGS " -Wunknown-pragmas" # This option determines whether a warning is issued if a declared # function is not used. as_fn_append DEVWARN_CFLAGS " -Wunused-function" # This option determines whether a warning is issued if a local or # non-constant static variable is unused after being declared. as_fn_append DEVWARN_CFLAGS " -Wunused-variable" # This option issues a diagnostic message if const char* is # converted to (non-const) char *. as_fn_append DEVWARN_CFLAGS " -Wwrite-strings" # Disable warning #981 operands are evaluated in unspecified order # http://software.intel.com/en-us/articles/cdiag981/ as_fn_append DEVWARN_CFLAGS " -diag-disable 981" ;; #( *) : ;; esac fi # Compiler extra developer warning settings: --enable-devextrawarnings, # appends DEVWARN_CFLAGS # Will only have an effect if --enable-devwarnings also given # Check whether --enable-devextrawarnings was given. if test "${enable_devextrawarnings+set}" = set; then : enableval=$enable_devextrawarnings; fi if test "x${enable_devwarnings}" = "xyes" && test "x${enable_devextrawarnings}" = "xyes"; then : case ${CC} in #( gcc) : # flags used by Ubuntu 8.10 to check open has 2/3 arguments etc. $as_echo "#define _FORTIFY_SOURCE 2" >>confdefs.h # compiler flags CPPFLAGS="-fstack-protector ${CPPFLAGS}" # warnings used by Ubuntu 8.10 # -Wall already includes: # -Wformat as_fn_append DEVWARN_CFLAGS " -Wformat-security -Wl,-z,relro" # -Wpadded means moving char to end of structs - but also flags # end of struct so need to add padding at end as_fn_append DEVWARN_CFLAGS " -Wpadded" ;; #( *) : ;; esac fi # Compile deprecated functions still used in the book text for 6.2.0 # Check whether --enable-buildbookdeprecated was given. if test "${enable_buildbookdeprecated+set}" = set; then : enableval=$enable_buildbookdeprecated; fi # Compile all deprecated functions # Check whether --enable-buildalldeprecated was given. if test "${enable_buildalldeprecated+set}" = set; then : enableval=$enable_buildalldeprecated; fi if test "x${enable_buildbookdeprecated}" = "xyes" || test "x${enable_buildalldeprecated}" = "xyes"; then : $as_echo "#define AJ_COMPILE_DEPRECATED_BOOK 1" >>confdefs.h fi if test "x${enable_buildalldeprecated}" = "xyes"; then : $as_echo "#define AJ_COMPILE_DEPRECATED 1" >>confdefs.h fi # Add extensions to Solaris for some reentrant functions case ${host_os} in #( solaris*) : as_fn_append CFLAGS " -D__EXTENSIONS__" ;; #( *) : ;; esac # Test whether --with-sgiabi given for IRIX (n32m3 n32m4 64m3 64m4) case ${host_os} in #( irix*) : case ${CC} in #( gcc) : ;; #( cc) : # # Handle SGI compiler flags # { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sgiabi" >&5 $as_echo_n "checking for sgiabi... " >&6; } # Check whether --with-sgiabi was given. if test "${with_sgiabi+set}" = set; then : withval=$with_sgiabi; if test "$withval" != no ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } case $host_os in irix*) if test "$withval" = n32m3 ; then CFLAGS="-n32 -mips3 $CFLAGS" LD="/usr/bin/ld -n32 -mips3 -IPA -L/usr/lib32" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib32 $LDFLAGS" fi fi if test "$withval" = n32m4 ; then CFLAGS="-n32 -mips4 $CFLAGS" LD="/usr/bin/ld -n32 -mips4 -IPA -L/usr/lib32" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib32 $LDFLAGS" fi fi if test "$withval" = 64m3 ; then CFLAGS="-64 -mips3 $CFLAGS" LD="/usr/bin/ld -64 -mips3 -IPA -L/usr/lib64" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib64 $LDFLAGS" fi fi if test "$withval" = 64m4 ; then CFLAGS="-64 -mips4 $CFLAGS" LD="/usr/bin/ld -64 -mips4 -IPA -L/usr/lib64" if test -d /usr/freeware ; then LDFLAGS="-L/usr/freeware/lib64 $LDFLAGS" fi fi ;; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi ;; #( *) : ;; esac ;; #( *) : ;; esac PCRE_MAJOR="7" PCRE_MINOR="9" PCRE_DATE="11-Apr-2009" PCRE_VERSION="${PCRE_MAJOR}.${PCRE_MINOR}" POSIX_MALLOC_THRESHOLD="-DPOSIX_MALLOC_THRESHOLD=10" PCRE_LIB_VERSION="0:1:0" PCRE_POSIXLIB_VERSION="0:0:0" # Checks for header files. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for X" >&5 $as_echo_n "checking for X... " >&6; } # Check whether --with-x was given. if test "${with_x+set}" = set; then : withval=$with_x; fi # $have_x is `yes', `no', `disabled', or empty when we do not yet know. if test "x$with_x" = xno; then # The user explicitly disabled X. have_x=disabled else case $x_includes,$x_libraries in #( *\'*) as_fn_error $? "cannot use X directory names containing '" "$LINENO" 5;; #( *,NONE | NONE,*) if ${ac_cv_have_x+:} false; then : $as_echo_n "(cached) " >&6 else # One or both of the vars are not set, and there is no cached value. ac_x_includes=no ac_x_libraries=no rm -f -r conftest.dir if mkdir conftest.dir; then cd conftest.dir cat >Imakefile <<'_ACEOF' incroot: @echo incroot='${INCROOT}' usrlibdir: @echo usrlibdir='${USRLIBDIR}' libdir: @echo libdir='${LIBDIR}' _ACEOF if (export CC; ${XMKMF-xmkmf}) >/dev/null 2>/dev/null && test -f Makefile; then # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. for ac_var in incroot usrlibdir libdir; do eval "ac_im_$ac_var=\`\${MAKE-make} $ac_var 2>/dev/null | sed -n 's/^$ac_var=//p'\`" done # Open Windows xmkmf reportedly sets LIBDIR instead of USRLIBDIR. for ac_extension in a so sl dylib la dll; do if test ! -f "$ac_im_usrlibdir/libX11.$ac_extension" && test -f "$ac_im_libdir/libX11.$ac_extension"; then ac_im_usrlibdir=$ac_im_libdir; break fi done # Screen out bogus values from the imake configuration. They are # bogus both because they are the default anyway, and because # using them would break gcc on systems where it needs fixed includes. case $ac_im_incroot in /usr/include) ac_x_includes= ;; *) test -f "$ac_im_incroot/X11/Xos.h" && ac_x_includes=$ac_im_incroot;; esac case $ac_im_usrlibdir in /usr/lib | /usr/lib64 | /lib | /lib64) ;; *) test -d "$ac_im_usrlibdir" && ac_x_libraries=$ac_im_usrlibdir ;; esac fi cd .. rm -f -r conftest.dir fi # Standard set of common directories for X headers. # Check X11 before X11Rn because it is often a symlink to the current release. ac_x_header_dirs=' /usr/X11/include /usr/X11R7/include /usr/X11R6/include /usr/X11R5/include /usr/X11R4/include /usr/include/X11 /usr/include/X11R7 /usr/include/X11R6 /usr/include/X11R5 /usr/include/X11R4 /usr/local/X11/include /usr/local/X11R7/include /usr/local/X11R6/include /usr/local/X11R5/include /usr/local/X11R4/include /usr/local/include/X11 /usr/local/include/X11R7 /usr/local/include/X11R6 /usr/local/include/X11R5 /usr/local/include/X11R4 /usr/X386/include /usr/x386/include /usr/XFree86/include/X11 /usr/include /usr/local/include /usr/unsupported/include /usr/athena/include /usr/local/x11r5/include /usr/lpp/Xamples/include /usr/openwin/include /usr/openwin/share/include' if test "$ac_x_includes" = no; then # Guess where to find include files, by looking for Xlib.h. # First, try using that file with no special directory specified. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # We can compile using X headers with no special include directory. ac_x_includes= else for ac_dir in $ac_x_header_dirs; do if test -r "$ac_dir/X11/Xlib.h"; then ac_x_includes=$ac_dir break fi done fi rm -f conftest.err conftest.i conftest.$ac_ext fi # $ac_x_includes = no if test "$ac_x_libraries" = no; then # Check for the libraries. # See if we find them without any special options. # Don't add to $LIBS permanently. ac_save_LIBS=$LIBS LIBS="-lX11 $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { XrmInitialize () ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : LIBS=$ac_save_LIBS # We can link X programs with no special library path. ac_x_libraries= else LIBS=$ac_save_LIBS for ac_dir in `$as_echo "$ac_x_includes $ac_x_header_dirs" | sed s/include/lib/g` do # Don't even attempt the hair of trying to link an X program! for ac_extension in a so sl dylib la dll; do if test -r "$ac_dir/libX11.$ac_extension"; then ac_x_libraries=$ac_dir break 2 fi done done fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi # $ac_x_libraries = no case $ac_x_includes,$ac_x_libraries in #( no,* | *,no | *\'*) # Didn't find X, or a directory has "'" in its name. ac_cv_have_x="have_x=no";; #( *) # Record where we found X for the cache. ac_cv_have_x="have_x=yes\ ac_x_includes='$ac_x_includes'\ ac_x_libraries='$ac_x_libraries'" esac fi ;; #( *) have_x=yes;; esac eval "$ac_cv_have_x" fi # $with_x != no if test "$have_x" != yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $have_x" >&5 $as_echo "$have_x" >&6; } no_x=yes else # If each of the values was on the command line, it overrides each guess. test "x$x_includes" = xNONE && x_includes=$ac_x_includes test "x$x_libraries" = xNONE && x_libraries=$ac_x_libraries # Update the cache value to reflect the command line values. ac_cv_have_x="have_x=yes\ ac_x_includes='$x_includes'\ ac_x_libraries='$x_libraries'" { $as_echo "$as_me:${as_lineno-$LINENO}: result: libraries $x_libraries, headers $x_includes" >&5 $as_echo "libraries $x_libraries, headers $x_includes" >&6; } fi if test "$no_x" = yes; then # Not all programs may use this symbol, but it does not hurt to define it. $as_echo "#define X_DISPLAY_MISSING 1" >>confdefs.h X_CFLAGS= X_PRE_LIBS= X_LIBS= X_EXTRA_LIBS= else if test -n "$x_includes"; then X_CFLAGS="$X_CFLAGS -I$x_includes" fi # It would also be nice to do this for all -L options, not just this one. if test -n "$x_libraries"; then X_LIBS="$X_LIBS -L$x_libraries" # For Solaris; some versions of Sun CC require a space after -R and # others require no space. Words are not sufficient . . . . { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -R must be followed by a space" >&5 $as_echo_n "checking whether -R must be followed by a space... " >&6; } ac_xsave_LIBS=$LIBS; LIBS="$LIBS -R$x_libraries" ac_xsave_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } X_LIBS="$X_LIBS -R$x_libraries" else LIBS="$ac_xsave_LIBS -R $x_libraries" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } X_LIBS="$X_LIBS -R $x_libraries" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: neither works" >&5 $as_echo "neither works" >&6; } fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext ac_c_werror_flag=$ac_xsave_c_werror_flag LIBS=$ac_xsave_LIBS fi # Check for system-dependent libraries X programs must link with. # Do this before checking for the system-independent R6 libraries # (-lICE), since we may need -lsocket or whatever for X linking. if test "$ISC" = yes; then X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl_s -linet" else # Martyn Johnson says this is needed for Ultrix, if the X # libraries were built with DECnet support. And Karl Berry says # the Alpha needs dnet_stub (dnet does not exist). ac_xsave_LIBS="$LIBS"; LIBS="$LIBS $X_LIBS -lX11" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char XOpenDisplay (); int main () { return XOpenDisplay (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dnet_ntoa in -ldnet" >&5 $as_echo_n "checking for dnet_ntoa in -ldnet... " >&6; } if ${ac_cv_lib_dnet_dnet_ntoa+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldnet $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dnet_ntoa (); int main () { return dnet_ntoa (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dnet_dnet_ntoa=yes else ac_cv_lib_dnet_dnet_ntoa=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dnet_dnet_ntoa" >&5 $as_echo "$ac_cv_lib_dnet_dnet_ntoa" >&6; } if test "x$ac_cv_lib_dnet_dnet_ntoa" = xyes; then : X_EXTRA_LIBS="$X_EXTRA_LIBS -ldnet" fi if test $ac_cv_lib_dnet_dnet_ntoa = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dnet_ntoa in -ldnet_stub" >&5 $as_echo_n "checking for dnet_ntoa in -ldnet_stub... " >&6; } if ${ac_cv_lib_dnet_stub_dnet_ntoa+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldnet_stub $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dnet_ntoa (); int main () { return dnet_ntoa (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dnet_stub_dnet_ntoa=yes else ac_cv_lib_dnet_stub_dnet_ntoa=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dnet_stub_dnet_ntoa" >&5 $as_echo "$ac_cv_lib_dnet_stub_dnet_ntoa" >&6; } if test "x$ac_cv_lib_dnet_stub_dnet_ntoa" = xyes; then : X_EXTRA_LIBS="$X_EXTRA_LIBS -ldnet_stub" fi fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS="$ac_xsave_LIBS" # msh@cis.ufl.edu says -lnsl (and -lsocket) are needed for his 386/AT, # to get the SysV transport functions. # Chad R. Larson says the Pyramis MIS-ES running DC/OSx (SVR4) # needs -lnsl. # The nsl library prevents programs from opening the X display # on Irix 5.2, according to T.E. Dickey. # The functions gethostbyname, getservbyname, and inet_addr are # in -lbsd on LynxOS 3.0.1/i386, according to Lars Hecking. ac_fn_c_check_func "$LINENO" "gethostbyname" "ac_cv_func_gethostbyname" if test "x$ac_cv_func_gethostbyname" = xyes; then : fi if test $ac_cv_func_gethostbyname = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gethostbyname in -lnsl" >&5 $as_echo_n "checking for gethostbyname in -lnsl... " >&6; } if ${ac_cv_lib_nsl_gethostbyname+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lnsl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char gethostbyname (); int main () { return gethostbyname (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_nsl_gethostbyname=yes else ac_cv_lib_nsl_gethostbyname=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_nsl_gethostbyname" >&5 $as_echo "$ac_cv_lib_nsl_gethostbyname" >&6; } if test "x$ac_cv_lib_nsl_gethostbyname" = xyes; then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl" fi if test $ac_cv_lib_nsl_gethostbyname = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gethostbyname in -lbsd" >&5 $as_echo_n "checking for gethostbyname in -lbsd... " >&6; } if ${ac_cv_lib_bsd_gethostbyname+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lbsd $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char gethostbyname (); int main () { return gethostbyname (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_bsd_gethostbyname=yes else ac_cv_lib_bsd_gethostbyname=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bsd_gethostbyname" >&5 $as_echo "$ac_cv_lib_bsd_gethostbyname" >&6; } if test "x$ac_cv_lib_bsd_gethostbyname" = xyes; then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lbsd" fi fi fi # lieder@skyler.mavd.honeywell.com says without -lsocket, # socket/setsockopt and other routines are undefined under SCO ODT # 2.0. But -lsocket is broken on IRIX 5.2 (and is not necessary # on later versions), says Simon Leinen: it contains gethostby* # variants that don't use the name server (or something). -lsocket # must be given before -lnsl if both are needed. We assume that # if connect needs -lnsl, so does gethostbyname. ac_fn_c_check_func "$LINENO" "connect" "ac_cv_func_connect" if test "x$ac_cv_func_connect" = xyes; then : fi if test $ac_cv_func_connect = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for connect in -lsocket" >&5 $as_echo_n "checking for connect in -lsocket... " >&6; } if ${ac_cv_lib_socket_connect+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lsocket $X_EXTRA_LIBS $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char connect (); int main () { return connect (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_socket_connect=yes else ac_cv_lib_socket_connect=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_socket_connect" >&5 $as_echo "$ac_cv_lib_socket_connect" >&6; } if test "x$ac_cv_lib_socket_connect" = xyes; then : X_EXTRA_LIBS="-lsocket $X_EXTRA_LIBS" fi fi # Guillermo Gomez says -lposix is necessary on A/UX. ac_fn_c_check_func "$LINENO" "remove" "ac_cv_func_remove" if test "x$ac_cv_func_remove" = xyes; then : fi if test $ac_cv_func_remove = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for remove in -lposix" >&5 $as_echo_n "checking for remove in -lposix... " >&6; } if ${ac_cv_lib_posix_remove+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lposix $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char remove (); int main () { return remove (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_posix_remove=yes else ac_cv_lib_posix_remove=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_posix_remove" >&5 $as_echo "$ac_cv_lib_posix_remove" >&6; } if test "x$ac_cv_lib_posix_remove" = xyes; then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lposix" fi fi # BSDI BSD/OS 2.1 needs -lipc for XOpenDisplay. ac_fn_c_check_func "$LINENO" "shmat" "ac_cv_func_shmat" if test "x$ac_cv_func_shmat" = xyes; then : fi if test $ac_cv_func_shmat = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shmat in -lipc" >&5 $as_echo_n "checking for shmat in -lipc... " >&6; } if ${ac_cv_lib_ipc_shmat+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lipc $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char shmat (); int main () { return shmat (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_ipc_shmat=yes else ac_cv_lib_ipc_shmat=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ipc_shmat" >&5 $as_echo "$ac_cv_lib_ipc_shmat" >&6; } if test "x$ac_cv_lib_ipc_shmat" = xyes; then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lipc" fi fi fi # Check for libraries that X11R6 Xt/Xaw programs need. ac_save_LDFLAGS=$LDFLAGS test -n "$x_libraries" && LDFLAGS="$LDFLAGS -L$x_libraries" # SM needs ICE to (dynamically) link under SunOS 4.x (so we have to # check for ICE first), but we must link in the order -lSM -lICE or # we get undefined symbols. So assume we have SM if we have ICE. # These have to be linked with before -lX11, unlike the other # libraries we check for below, so use a different variable. # John Interrante, Karl Berry { $as_echo "$as_me:${as_lineno-$LINENO}: checking for IceConnectionNumber in -lICE" >&5 $as_echo_n "checking for IceConnectionNumber in -lICE... " >&6; } if ${ac_cv_lib_ICE_IceConnectionNumber+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lICE $X_EXTRA_LIBS $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char IceConnectionNumber (); int main () { return IceConnectionNumber (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_ICE_IceConnectionNumber=yes else ac_cv_lib_ICE_IceConnectionNumber=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ICE_IceConnectionNumber" >&5 $as_echo "$ac_cv_lib_ICE_IceConnectionNumber" >&6; } if test "x$ac_cv_lib_ICE_IceConnectionNumber" = xyes; then : X_PRE_LIBS="$X_PRE_LIBS -lSM -lICE" fi LDFLAGS=$ac_save_LDFLAGS fi ac_header_dirent=no for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do as_ac_Header=`$as_echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_hdr that defines DIR" >&5 $as_echo_n "checking for $ac_hdr that defines DIR... " >&6; } if eval \${$as_ac_Header+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include <$ac_hdr> int main () { if ((DIR *) 0) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$as_ac_Header=yes" else eval "$as_ac_Header=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$as_ac_Header { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_hdr" | $as_tr_cpp` 1 _ACEOF ac_header_dirent=$ac_hdr; break fi done # Two versions of opendir et al. are in -ldir and -lx on SCO Xenix. if test $ac_header_dirent = dirent.h; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing opendir" >&5 $as_echo_n "checking for library containing opendir... " >&6; } if ${ac_cv_search_opendir+:} false; then : $as_echo_n "(cached) " >&6 else ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char opendir (); int main () { return opendir (); ; return 0; } _ACEOF for ac_lib in '' dir; do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO"; then : ac_cv_search_opendir=$ac_res fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext if ${ac_cv_search_opendir+:} false; then : break fi done if ${ac_cv_search_opendir+:} false; then : else ac_cv_search_opendir=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_opendir" >&5 $as_echo "$ac_cv_search_opendir" >&6; } ac_res=$ac_cv_search_opendir if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" fi else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing opendir" >&5 $as_echo_n "checking for library containing opendir... " >&6; } if ${ac_cv_search_opendir+:} false; then : $as_echo_n "(cached) " >&6 else ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char opendir (); int main () { return opendir (); ; return 0; } _ACEOF for ac_lib in '' x; do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO"; then : ac_cv_search_opendir=$ac_res fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext if ${ac_cv_search_opendir+:} false; then : break fi done if ${ac_cv_search_opendir+:} false; then : else ac_cv_search_opendir=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_opendir" >&5 $as_echo "$ac_cv_search_opendir" >&6; } ac_res=$ac_cv_search_opendir if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi for ac_header in unistd.h TargetConfig.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done # Checks for typedefs, structures, and compiler characteristics. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5 $as_echo_n "checking whether byte ordering is bigendian... " >&6; } if ${ac_cv_c_bigendian+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_c_bigendian=unknown # See if we're dealing with a universal compiler. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef __APPLE_CC__ not a universal capable compiler #endif typedef int dummy; _ACEOF if ac_fn_c_try_compile "$LINENO"; then : # Check for potential -arch flags. It is not universal unless # there are at least two -arch flags with different values. ac_arch= ac_prev= for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do if test -n "$ac_prev"; then case $ac_word in i?86 | x86_64 | ppc | ppc64) if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then ac_arch=$ac_word else ac_cv_c_bigendian=universal break fi ;; esac ac_prev= elif test "x$ac_word" = "x-arch"; then ac_prev=arch fi done fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_c_bigendian = unknown; then # See if sys/param.h defines the BYTE_ORDER macro. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main () { #if ! (defined BYTE_ORDER && defined BIG_ENDIAN \ && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \ && LITTLE_ENDIAN) bogus endian macros #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : # It does; now see whether it defined to BIG_ENDIAN or not. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main () { #if BYTE_ORDER != BIG_ENDIAN not big endian #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_bigendian=yes else ac_cv_c_bigendian=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi if test $ac_cv_c_bigendian = unknown; then # See if defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris). cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { #if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN) bogus endian macros #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : # It does; now see whether it defined to _BIG_ENDIAN or not. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { #ifndef _BIG_ENDIAN not big endian #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_bigendian=yes else ac_cv_c_bigendian=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi if test $ac_cv_c_bigendian = unknown; then # Compile a test program. if test "$cross_compiling" = yes; then : # Try to guess by grepping values from an object file. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ short int ascii_mm[] = { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 }; short int ascii_ii[] = { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 }; int use_ascii (int i) { return ascii_mm[i] + ascii_ii[i]; } short int ebcdic_ii[] = { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 }; short int ebcdic_mm[] = { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 }; int use_ebcdic (int i) { return ebcdic_mm[i] + ebcdic_ii[i]; } extern int foo; int main () { return use_ascii (foo) == use_ebcdic (foo); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then ac_cv_c_bigendian=yes fi if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then if test "$ac_cv_c_bigendian" = unknown; then ac_cv_c_bigendian=no else # finding both strings is unlikely to happen, but who knows? ac_cv_c_bigendian=unknown fi fi fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default int main () { /* Are we little or big endian? From Harbison&Steele. */ union { long int l; char c[sizeof (long int)]; } u; u.l = 1; return u.c[sizeof (long int) - 1] == 1; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_c_bigendian=no else ac_cv_c_bigendian=yes fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5 $as_echo "$ac_cv_c_bigendian" >&6; } case $ac_cv_c_bigendian in #( yes) $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h ;; #( no) ;; #( universal) $as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h ;; #( *) as_fn_error $? "unknown endianness presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 $as_echo_n "checking for an ANSI C-conforming const... " >&6; } if ${ac_cv_c_const+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __cplusplus /* Ultrix mips cc rejects this sort of thing. */ typedef int charset[2]; const charset cs = { 0, 0 }; /* SunOS 4.1.1 cc rejects this. */ char const *const *pcpcc; char **ppc; /* NEC SVR4.0.2 mips cc rejects this. */ struct point {int x, y;}; static struct point const zero = {0,0}; /* AIX XL C 1.02.0.0 rejects this. It does not let you subtract one const X* pointer from another in an arm of an if-expression whose if-part is not a constant expression */ const char *g = "string"; pcpcc = &g + (g ? g-g : 0); /* HPUX 7.0 cc rejects these. */ ++pcpcc; ppc = (char**) pcpcc; pcpcc = (char const *const *) ppc; { /* SCO 3.2v4 cc rejects this sort of thing. */ char tx; char *t = &tx; char const *s = 0 ? (char *) 0 : (char const *) 0; *t++ = 0; if (s) return 0; } { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ int x[] = {25, 17}; const int *foo = &x[0]; ++foo; } { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ typedef const int *iptr; iptr p = 0; ++p; } { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ struct s { int j; const int *ap[3]; } bx; struct s *b = &bx; b->j = 5; } { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ const int foo = 10; if (!foo) return 0; } return !cs[0] && !zero.x; #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_const=yes else ac_cv_c_const=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 $as_echo "$ac_cv_c_const" >&6; } if test $ac_cv_c_const = no; then $as_echo "#define const /**/" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5 $as_echo_n "checking for inline... " >&6; } if ${ac_cv_c_inline+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_c_inline=no for ac_kw in inline __inline__ __inline; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef __cplusplus typedef int foo_t; static $ac_kw foo_t static_foo () {return 0; } $ac_kw foo_t foo () {return 0; } #endif _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_inline=$ac_kw fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext test "$ac_cv_c_inline" != no && break done fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5 $as_echo "$ac_cv_c_inline" >&6; } case $ac_cv_c_inline in inline | yes) ;; *) case $ac_cv_c_inline in no) ac_val=;; *) ac_val=$ac_cv_c_inline;; esac cat >>confdefs.h <<_ACEOF #ifndef __cplusplus #define inline $ac_val #endif _ACEOF ;; esac ac_fn_c_check_type "$LINENO" "pid_t" "ac_cv_type_pid_t" "$ac_includes_default" if test "x$ac_cv_type_pid_t" = xyes; then : else cat >>confdefs.h <<_ACEOF #define pid_t int _ACEOF fi ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" if test "x$ac_cv_type_size_t" = xyes; then : else cat >>confdefs.h <<_ACEOF #define size_t unsigned int _ACEOF fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct tm is in sys/time.h or time.h" >&5 $as_echo_n "checking whether struct tm is in sys/time.h or time.h... " >&6; } if ${ac_cv_struct_tm+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main () { struct tm tm; int *p = &tm.tm_sec; return !p; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_struct_tm=time.h else ac_cv_struct_tm=sys/time.h fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_struct_tm" >&5 $as_echo "$ac_cv_struct_tm" >&6; } if test $ac_cv_struct_tm = sys/time.h; then $as_echo "#define TM_IN_SYS_TIME 1" >>confdefs.h fi # Checks for library functions. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether getpgrp requires zero arguments" >&5 $as_echo_n "checking whether getpgrp requires zero arguments... " >&6; } if ${ac_cv_func_getpgrp_void+:} false; then : $as_echo_n "(cached) " >&6 else # Use it with a single arg. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default int main () { getpgrp (0); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_func_getpgrp_void=no else ac_cv_func_getpgrp_void=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_getpgrp_void" >&5 $as_echo "$ac_cv_func_getpgrp_void" >&6; } if test $ac_cv_func_getpgrp_void = yes; then $as_echo "#define GETPGRP_VOID 1" >>confdefs.h fi for ac_func in strftime do : ac_fn_c_check_func "$LINENO" "strftime" "ac_cv_func_strftime" if test "x$ac_cv_func_strftime" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_STRFTIME 1 _ACEOF else # strftime is in -lintl on SCO UNIX. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for strftime in -lintl" >&5 $as_echo_n "checking for strftime in -lintl... " >&6; } if ${ac_cv_lib_intl_strftime+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lintl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char strftime (); int main () { return strftime (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_intl_strftime=yes else ac_cv_lib_intl_strftime=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_intl_strftime" >&5 $as_echo "$ac_cv_lib_intl_strftime" >&6; } if test "x$ac_cv_lib_intl_strftime" = xyes; then : $as_echo "#define HAVE_STRFTIME 1" >>confdefs.h LIBS="-lintl $LIBS" fi fi done for ac_header in vfork.h do : ac_fn_c_check_header_mongrel "$LINENO" "vfork.h" "ac_cv_header_vfork_h" "$ac_includes_default" if test "x$ac_cv_header_vfork_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_VFORK_H 1 _ACEOF fi done for ac_func in fork vfork do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done if test "x$ac_cv_func_fork" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working fork" >&5 $as_echo_n "checking for working fork... " >&6; } if ${ac_cv_func_fork_works+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_func_fork_works=cross else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default int main () { /* By Ruediger Kuhlmann. */ return fork () < 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_func_fork_works=yes else ac_cv_func_fork_works=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_fork_works" >&5 $as_echo "$ac_cv_func_fork_works" >&6; } else ac_cv_func_fork_works=$ac_cv_func_fork fi if test "x$ac_cv_func_fork_works" = xcross; then case $host in *-*-amigaos* | *-*-msdosdjgpp*) # Override, as these systems have only a dummy fork() stub ac_cv_func_fork_works=no ;; *) ac_cv_func_fork_works=yes ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: result $ac_cv_func_fork_works guessed because of cross compilation" >&5 $as_echo "$as_me: WARNING: result $ac_cv_func_fork_works guessed because of cross compilation" >&2;} fi ac_cv_func_vfork_works=$ac_cv_func_vfork if test "x$ac_cv_func_vfork" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working vfork" >&5 $as_echo_n "checking for working vfork... " >&6; } if ${ac_cv_func_vfork_works+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_func_vfork_works=cross else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Thanks to Paul Eggert for this test. */ $ac_includes_default #include #ifdef HAVE_VFORK_H # include #endif /* On some sparc systems, changes by the child to local and incoming argument registers are propagated back to the parent. The compiler is told about this with #include , but some compilers (e.g. gcc -O) don't grok . Test for this by using a static variable whose address is put into a register that is clobbered by the vfork. */ static void #ifdef __cplusplus sparc_address_test (int arg) # else sparc_address_test (arg) int arg; #endif { static pid_t child; if (!child) { child = vfork (); if (child < 0) { perror ("vfork"); _exit(2); } if (!child) { arg = getpid(); write(-1, "", 0); _exit (arg); } } } int main () { pid_t parent = getpid (); pid_t child; sparc_address_test (0); child = vfork (); if (child == 0) { /* Here is another test for sparc vfork register problems. This test uses lots of local variables, at least as many local variables as main has allocated so far including compiler temporaries. 4 locals are enough for gcc 1.40.3 on a Solaris 4.1.3 sparc, but we use 8 to be safe. A buggy compiler should reuse the register of parent for one of the local variables, since it will think that parent can't possibly be used any more in this routine. Assigning to the local variable will thus munge parent in the parent process. */ pid_t p = getpid(), p1 = getpid(), p2 = getpid(), p3 = getpid(), p4 = getpid(), p5 = getpid(), p6 = getpid(), p7 = getpid(); /* Convince the compiler that p..p7 are live; otherwise, it might use the same hardware register for all 8 local variables. */ if (p != p1 || p != p2 || p != p3 || p != p4 || p != p5 || p != p6 || p != p7) _exit(1); /* On some systems (e.g. IRIX 3.3), vfork doesn't separate parent from child file descriptors. If the child closes a descriptor before it execs or exits, this munges the parent's descriptor as well. Test for this by closing stdout in the child. */ _exit(close(fileno(stdout)) != 0); } else { int status; struct stat st; while (wait(&status) != child) ; return ( /* Was there some problem with vforking? */ child < 0 /* Did the child fail? (This shouldn't happen.) */ || status /* Did the vfork/compiler bug occur? */ || parent != getpid() /* Did the file descriptor bug occur? */ || fstat(fileno(stdout), &st) != 0 ); } } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_func_vfork_works=yes else ac_cv_func_vfork_works=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_vfork_works" >&5 $as_echo "$ac_cv_func_vfork_works" >&6; } fi; if test "x$ac_cv_func_fork_works" = xcross; then ac_cv_func_vfork_works=$ac_cv_func_vfork { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: result $ac_cv_func_vfork_works guessed because of cross compilation" >&5 $as_echo "$as_me: WARNING: result $ac_cv_func_vfork_works guessed because of cross compilation" >&2;} fi if test "x$ac_cv_func_vfork_works" = xyes; then $as_echo "#define HAVE_WORKING_VFORK 1" >>confdefs.h else $as_echo "#define vfork fork" >>confdefs.h fi if test "x$ac_cv_func_fork_works" = xyes; then $as_echo "#define HAVE_WORKING_FORK 1" >>confdefs.h fi for ac_func in vprintf do : ac_fn_c_check_func "$LINENO" "vprintf" "ac_cv_func_vprintf" if test "x$ac_cv_func_vprintf" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_VPRINTF 1 _ACEOF ac_fn_c_check_func "$LINENO" "_doprnt" "ac_cv_func__doprnt" if test "x$ac_cv_func__doprnt" = xyes; then : $as_echo "#define HAVE_DOPRNT 1" >>confdefs.h fi fi done for ac_func in strdup strstr strchr erand48 memmove do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done if test "x${with_x}" != "xno"; then : CFLAGS="$CFLAGS $X_CFLAGS" case $host_os in irix*) XLIB="-lX11 $X_EXTRA_LIBS" ;; *) XLIB="$X_LIBS -lX11 $X_EXTRA_LIBS" ;; esac ac_fn_c_check_header_mongrel "$LINENO" "X11/Xlib.h" "ac_cv_header_X11_Xlib_h" "$ac_includes_default" if test "x$ac_cv_header_X11_Xlib_h" = xyes; then : $as_echo "#define PLD_xwin 1" >>confdefs.h else echo "" echo "X11 graphics have been selected but no X11 header files" echo "have been found." echo "" echo "This error usually happens on Linux/MacOSX distributions" echo "where the optional X11 development files have not been installed." echo "On Linux RPM systems this package is usually called something" echo "like xorg-x11-proto-devel whereas on Debian/Ubuntu it may" echo "be called x-dev. On MacOSX installation DVDs the X11 files" echo "can usually be found as an explicitly named optional" echo "installation." echo "" echo "After installing the X11 development files you should do a" echo "'make clean' and perform the configure stage again." echo "" echo "Alternatively, to install EMBOSS without X11 support, you can add" echo "the --without-x switch to the configure command." echo "" exit $? fi havexawh="1" ac_fn_c_check_header_mongrel "$LINENO" "X11/Xaw/Label.h" "ac_cv_header_X11_Xaw_Label_h" "$ac_includes_default" if test "x$ac_cv_header_X11_Xaw_Label_h" = xyes; then : else havexawh="0" fi if test "x${havexawh}" = "x0"; then : ### FIXME: Should be an error condition. { $as_echo "$as_me:${as_lineno-$LINENO}: You need to install the Xaw development files for your system" >&5 $as_echo "$as_me: You need to install the Xaw development files for your system" >&6;} exit $? fi havexawlib="1" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for XawInitializeWidgetSet in -lXaw" >&5 $as_echo_n "checking for XawInitializeWidgetSet in -lXaw... " >&6; } if ${ac_cv_lib_Xaw_XawInitializeWidgetSet+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lXaw ${XLIB} $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char XawInitializeWidgetSet (); int main () { return XawInitializeWidgetSet (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_Xaw_XawInitializeWidgetSet=yes else ac_cv_lib_Xaw_XawInitializeWidgetSet=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_Xaw_XawInitializeWidgetSet" >&5 $as_echo "$ac_cv_lib_Xaw_XawInitializeWidgetSet" >&6; } if test "x$ac_cv_lib_Xaw_XawInitializeWidgetSet" = xyes; then : XLIB="$XLIB -lXaw" else havexawlib="0" fi if test "x${havexawlib}" = "x0"; then : ### FIXME: Should be an error condition. { $as_echo "$as_me:${as_lineno-$LINENO}: You need to install the Xaw library files for your system" >&5 $as_echo "$as_me: You need to install the Xaw library files for your system" >&6;} exit $? fi havextlib="1" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for XtToolkitInitialize in -lXt" >&5 $as_echo_n "checking for XtToolkitInitialize in -lXt... " >&6; } if ${ac_cv_lib_Xt_XtToolkitInitialize+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lXt ${XLIB} $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char XtToolkitInitialize (); int main () { return XtToolkitInitialize (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_Xt_XtToolkitInitialize=yes else ac_cv_lib_Xt_XtToolkitInitialize=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_Xt_XtToolkitInitialize" >&5 $as_echo "$ac_cv_lib_Xt_XtToolkitInitialize" >&6; } if test "x$ac_cv_lib_Xt_XtToolkitInitialize" = xyes; then : XLIB="$XLIB -lXt" else havextlib="0" fi if test "x${havextlib}" = "x0"; then : ### FIXME: Should be an error condition. { $as_echo "$as_me:${as_lineno-$LINENO}: You need to install the Xt library files for your system" >&5 $as_echo "$as_me: You need to install the Xt library files for your system" >&6;} exit $? fi ### FIXME: This is already defined in the Autoconf module libraries.m4 # AC_SUBST(XLIB) fi # Library checks. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for socket in -lc" >&5 $as_echo_n "checking for socket in -lc... " >&6; } if ${ac_cv_lib_c_socket+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lc $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char socket (); int main () { return socket (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_c_socket=yes else ac_cv_lib_c_socket=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_c_socket" >&5 $as_echo "$ac_cv_lib_c_socket" >&6; } if test "x$ac_cv_lib_c_socket" = xyes; then : LIBS="${LIBS}" else LIBS="${LIBS} -lsocket" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lm" >&5 $as_echo_n "checking for main in -lm... " >&6; } if ${ac_cv_lib_m_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_main=yes else ac_cv_lib_m_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_main" >&5 $as_echo "$ac_cv_lib_m_main" >&6; } if test "x$ac_cv_lib_m_main" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" fi # GD for FreeBSD requires libiconv case ${host_os} in #( freebsd*) : if test "x${with_pngdriver}" != "xno"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -liconv" >&5 $as_echo_n "checking for main in -liconv... " >&6; } if ${ac_cv_lib_iconv_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-liconv $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_iconv_main=yes else ac_cv_lib_iconv_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_iconv_main" >&5 $as_echo "$ac_cv_lib_iconv_main" >&6; } if test "x$ac_cv_lib_iconv_main" = xyes; then : LIBS="${LIBS}" else LIBS="-liconv ${LIBS}" fi fi ;; #( *) : ;; esac if false; then AMPNG_TRUE= AMPNG_FALSE='#' else AMPNG_TRUE='#' AMPNG_FALSE= fi if false; then AMPDF_TRUE= AMPDF_FALSE='#' else AMPDF_TRUE='#' AMPDF_FALSE= fi # # Handle general setup e.g. documentation directory # { $as_echo "$as_me:${as_lineno-$LINENO}: checking if docroot is given" >&5 $as_echo_n "checking if docroot is given... " >&6; } # Check whether --with-docroot was given. if test "${with_docroot+set}" = set; then : withval=$with_docroot; if test "$withval" != no ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } CPPFLAGS="$CPPFLAGS -DDOC_ROOT=\\\"$withval\\\"" fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi # GCC profiling { $as_echo "$as_me:${as_lineno-$LINENO}: checking if gcc profiling is selected" >&5 $as_echo_n "checking if gcc profiling is selected... " >&6; } # Check whether --with-gccprofile was given. if test "${with_gccprofile+set}" = set; then : withval=$with_gccprofile; if test "$withval" != no ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } CFLAGS="$CFLAGS -g -pg" LDFLAGS="$LDFLAGS -pg" fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi JAVA_CFLAGS="" JAVA_CPPFLAGS="" JAVA_LDFLAGS="" have_java="yes" auth_java="" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Java JNI" >&5 $as_echo_n "checking for Java JNI... " >&6; } # Check whether --with-java was given. if test "${with_java+set}" = set; then : withval=$with_java; { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${withval}" >&5 $as_echo "${withval}" >&6; } if test "x${withval}" = "xno"; then : have_java="no" fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } have_java="no" fi if test "x${have_java}" = "xyes"; then : # If specified, the Java JNI include directory has to exist. if test -d ${with_java}; then : JAVA_CPPFLAGS="-I${withval}" else have_java="no" as_fn_error $? "Java include directory ${withval} does not exist" "$LINENO" 5 fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Java JNI OS" >&5 $as_echo_n "checking for Java JNI OS... " >&6; } # Check whether --with-javaos was given. if test "${with_javaos+set}" = set; then : withval=$with_javaos; { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${withval}" >&5 $as_echo "${withval}" >&6; } if test "x${withval}" != "xno"; then : # If specified, the Java JNI OS include directory has to exist. if test "x${have_java}" = "xyes" && test -d ${withval}; then : as_fn_append JAVA_CPPFLAGS " -I${withval}" else have_java="no" as_fn_error $? "Java OS include directory ${withval} does not exist" "$LINENO" 5 fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi # Authorisation type { $as_echo "$as_me:${as_lineno-$LINENO}: checking for authorisation type" >&5 $as_echo_n "checking for authorisation type... " >&6; } # Check whether --with-auth was given. if test "${with_auth+set}" = set; then : withval=$with_auth; if test "x${withval}" != "xno"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } case ${withval} in #( yes) : auth_java="PAM" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lpam" >&5 $as_echo_n "checking for main in -lpam... " >&6; } if ${ac_cv_lib_pam_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lpam $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_pam_main=yes else ac_cv_lib_pam_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pam_main" >&5 $as_echo "$ac_cv_lib_pam_main" >&6; } if test "x$ac_cv_lib_pam_main" = xyes; then : as_fn_append JAVA_LDFLAGS " -lpam" fi ;; #( pam) : auth_java="PAM" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lpam" >&5 $as_echo_n "checking for main in -lpam... " >&6; } if ${ac_cv_lib_pam_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lpam $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_pam_main=yes else ac_cv_lib_pam_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pam_main" >&5 $as_echo "$ac_cv_lib_pam_main" >&6; } if test "x$ac_cv_lib_pam_main" = xyes; then : as_fn_append JAVA_LDFLAGS " -lpam" fi ;; #( shadow) : auth_java="N_SHADOW" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lcrypy" >&5 $as_echo_n "checking for main in -lcrypy... " >&6; } if ${ac_cv_lib_crypy_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lcrypy $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_crypy_main=yes else ac_cv_lib_crypy_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_crypy_main" >&5 $as_echo "$ac_cv_lib_crypy_main" >&6; } if test "x$ac_cv_lib_crypy_main" = xyes; then : as_fn_append JAVA_LDFLAGS " -lcrypt" fi ;; #( rshadow) : auth_java="R_SHADOW" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lcrypy" >&5 $as_echo_n "checking for main in -lcrypy... " >&6; } if ${ac_cv_lib_crypy_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lcrypy $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_crypy_main=yes else ac_cv_lib_crypy_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_crypy_main" >&5 $as_echo "$ac_cv_lib_crypy_main" >&6; } if test "x$ac_cv_lib_crypy_main" = xyes; then : as_fn_append JAVA_LDFLAGS " -lcrypt" fi ;; #( noshadow) : auth_java="NO_SHADOW" ;; #( rnoshadow) : auth_java="RNO_SHADOW" ;; #( aixshadow) : auth_java="AIX_SHADOW" ;; #( hpuxshadow) : auth_java="HPUX_SHADOW" ;; #( *) : ;; esac else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test -n "${auth_java}"; then : as_fn_append JAVA_CPPFLAGS " -D${auth_java}" else as_fn_append JAVA_CPPFLAGS " -DNO_AUTH" fi # Threading type { $as_echo "$as_me:${as_lineno-$LINENO}: checking for threading type" >&5 $as_echo_n "checking for threading type... " >&6; } # Check whether --with-thread was given. if test "${with_thread+set}" = set; then : withval=$with_thread; if test "x${withval}" != "xno"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } case ${withval} in #( yes) : as_fn_append JAVA_CPPFLAGS " -D_REENTRANT" as_fn_append JAVA_LDFLAGS " -lpthread" # AS_VAR_APPEND([LIBS], [" -lpthread"]) ;; #( freebsd) : as_fn_append JAVA_CPPFLAGS " -D_THREAD_SAFE" as_fn_append JAVA_LDFLAGS " -pthread" # AS_VAR_APPEND([LIBS], [" -lc_r"]) ;; #( linux) : as_fn_append JAVA_CPPFLAGS " -D_REENTRANT" as_fn_append JAVA_LDFLAGS " -lpthread" # AS_VAR_APPEND([LIBS], [" -lpthread"]) ;; #( solaris) : as_fn_append JAVA_CPPFLAGS " -D_POSIX_C_SOURCE=199506L" as_fn_append JAVA_LDFLAGS " -lpthread" # AS_VAR_APPEND([LIBS], [" -lpthread"]) ;; #( macos) : # AS_VAR_APPEND([JAVA_CPPFLAGS], [""]) # AS_VAR_APPEND([JAVA_LDFLAGS], [" -lpthread"]) # AS_VAR_APPEND([LIBS], [" -lpthread"]) ;; #( hpux) : as_fn_append JAVA_CFLAGS " -Ae +z" as_fn_append JAVA CPPFLAGS " -DNATIVE -D_POSIX_C_SOURCE=199506L" as_fn_append JAVA_LDFLAGS " -lpthread" # AS_VAR_APPEND([LIBS], [" -lpthread"]) ;; #( irix) : # AS_VAR_APPEND([JAVA_CFLAGS], [""]) as_fn_append JAVA_LDFLAGS " -lpthread" as_fn_append LIBS " -lpthread" ;; #( aix) : as_fn_append JAVA_CPPFLAGS " -D_REENTRANT" as_fn_append JAVA_LDFLAGS " -lpthread" as_fn_append LIBS " -lpthread" ;; #( osf) : as_fn_append JAVA_CPPFLAGS " -D_REENTRANT -D_OSF_SOURCE" as_fn_append JAVA_LDFLAGS " -lpthread" as_fn_append LIBS " -lpthread" ;; #( *) : ;; esac else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi # Test for programs ant, jar, java and javac. if test "x${have_java}" = "xyes"; then : # Extract the first word of "ant", so it can be a program name with args. set dummy ant; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_path_ANT+:} false; then : $as_echo_n "(cached) " >&6 else case $ANT in [\\/]* | ?:[\\/]*) ac_cv_path_ANT="$ANT" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_path_ANT="$as_dir/$ac_word$ac_exec_ext" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_ANT" && ac_cv_path_ANT="no" ;; esac fi ANT=$ac_cv_path_ANT if test -n "$ANT"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ANT" >&5 $as_echo "$ANT" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x${ANT}" = "xno"; then : have_java="no" fi # Extract the first word of "jar", so it can be a program name with args. set dummy jar; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_path_JAR+:} false; then : $as_echo_n "(cached) " >&6 else case $JAR in [\\/]* | ?:[\\/]*) ac_cv_path_JAR="$JAR" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_path_JAR="$as_dir/$ac_word$ac_exec_ext" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_JAR" && ac_cv_path_JAR="no" ;; esac fi JAR=$ac_cv_path_JAR if test -n "$JAR"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAR" >&5 $as_echo "$JAR" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x${JAR}" = "xno"; then : have_java="no" fi # Extract the first word of "java", so it can be a program name with args. set dummy java; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_path_JAVA+:} false; then : $as_echo_n "(cached) " >&6 else case $JAVA in [\\/]* | ?:[\\/]*) ac_cv_path_JAVA="$JAVA" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_path_JAVA="$as_dir/$ac_word$ac_exec_ext" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_JAVA" && ac_cv_path_JAVA="no" ;; esac fi JAVA=$ac_cv_path_JAVA if test -n "$JAVA"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVA" >&5 $as_echo "$JAVA" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x${JAVA}" = "xno"; then : have_java="no" fi # Extract the first word of "javac", so it can be a program name with args. set dummy javac; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_path_JAVAC+:} false; then : $as_echo_n "(cached) " >&6 else case $JAVAC in [\\/]* | ?:[\\/]*) ac_cv_path_JAVAC="$JAVAC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_path_JAVAC="$as_dir/$ac_word$ac_exec_ext" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_JAVAC" && ac_cv_path_JAVAC="no" ;; esac fi JAVAC=$ac_cv_path_JAVAC if test -n "$JAVAC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVAC" >&5 $as_echo "$JAVAC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x${JAVAC}" = "xno"; then : have_java="no" fi fi if test "x${have_java}" = "xyes"; then : $as_echo "#define HAVE_JAVA 1" >>confdefs.h ### FIXME: Append -DDEBIAN for the moment. # Debian uses PAM service "ssh" instead of "login", see ajjava.c # This could use AC_DEFINE() if no better option was avialable. # Ultimately, this should be configurable via server configuration # files. if test -f "/etc/debian_release" || test -f /etc/debian_version; then : as_fn_append JAVA_CPPFLAGS " -DDEBIAN" fi fi if test "x${have_java}" = "xyes"; then JAVA_BUILD_TRUE= JAVA_BUILD_FALSE='#' else JAVA_BUILD_TRUE='#' JAVA_BUILD_FALSE= fi # # Handle user hints # { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to look for pdf support" >&5 $as_echo_n "checking whether to look for pdf support... " >&6; } # Check whether --with-hpdf was given. if test "${with_hpdf+set}" = set; then : withval=$with_hpdf; if test "$withval" != no ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } ALT_HOME="$withval" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } ALT_HOME=/usr fi # # Locate hpdf # if test -d "${ALT_HOME}" then # # Keep a copy if it fails # ALT_LDFLAGS="$LDFLAGS" ALT_CPPFLAGS="$CPPFLAGS" # # Set # LDFLAGS="${LDFLAGS} -L${ALT_HOME}/lib" CPPFLAGS="$CPPFLAGS -I$ALT_HOME/include" # # Check for libharu in ALT_HOME # { $as_echo "$as_me:${as_lineno-$LINENO}: checking for HPDF_New in -lhpdf" >&5 $as_echo_n "checking for HPDF_New in -lhpdf... " >&6; } if ${ac_cv_lib_hpdf_HPDF_New+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lhpdf -L${ALT_HOME}/lib $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char HPDF_New (); int main () { return HPDF_New (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_hpdf_HPDF_New=yes else ac_cv_lib_hpdf_HPDF_New=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hpdf_HPDF_New" >&5 $as_echo "$ac_cv_lib_hpdf_HPDF_New" >&6; } if test "x$ac_cv_lib_hpdf_HPDF_New" = xyes; then : CHECK=1 else CHECK=0 fi # # # If everything found okay then proceed to include png driver in config. # if test $CHECK = "1" ; then LIBS="$LIBS -lhpdf" case $host_os in solaris*) LDFLAGS="$LDFLAGS -R$ALT_HOME/lib" ;; esac $as_echo "#define PLD_pdf 1" >>confdefs.h if true; then AMPDF_TRUE= AMPDF_FALSE='#' else AMPDF_TRUE='#' AMPDF_FALSE= fi echo PDF support found if test $ALT_HOME = "/usr" ; then LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" fi else # # If not okay then reset FLAGS. # if false; then AMPDF_TRUE= AMPDF_FALSE='#' else AMPDF_TRUE='#' AMPDF_FALSE= fi LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" echo "No pdf support (libhpdf) found." fi else if test $withval != "no"; then echo "Directory $ALT_HOME does not exist" exit 0 fi fi # # Handle user hints # { $as_echo "$as_me:${as_lineno-$LINENO}: checking if png driver is wanted" >&5 $as_echo_n "checking if png driver is wanted... " >&6; } # Check whether --with-pngdriver was given. if test "${with_pngdriver+set}" = set; then : withval=$with_pngdriver; if test "$withval" != no ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } ALT_HOME="$withval" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } ALT_HOME=/usr fi # # Locate png/gd/zlib, if wanted # if test -d "${ALT_HOME}" then # # Keep a copy if it fails # ALT_LDFLAGS="$LDFLAGS" ALT_CPPFLAGS="$CPPFLAGS" # # Set # LDFLAGS="${LDFLAGS} -L${ALT_HOME}/lib" CPPFLAGS="$CPPFLAGS -I$ALT_HOME/include" ICCHECK=0 case $host_os in solaris*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libiconv_close in -liconv" >&5 $as_echo_n "checking for libiconv_close in -liconv... " >&6; } if ${ac_cv_lib_iconv_libiconv_close+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-liconv -L${ALT_HOME}/lib -liconv $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char libiconv_close (); int main () { return libiconv_close (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_iconv_libiconv_close=yes else ac_cv_lib_iconv_libiconv_close=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_iconv_libiconv_close" >&5 $as_echo "$ac_cv_lib_iconv_libiconv_close" >&6; } if test "x$ac_cv_lib_iconv_libiconv_close" = xyes; then : ICCHECK=1 else ICCHECK=0 fi if test $ICCHECK = "1" ; then LDFLAGS="${LDFLAGS} -L${ALT_HOME}/lib -liconv" fi LDFLAGS="$LDFLAGS -R$ALT_HOME/lib" ;; esac # # Check for zlib in ALT_HOME # { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inflateEnd in -lz" >&5 $as_echo_n "checking for inflateEnd in -lz... " >&6; } if ${ac_cv_lib_z_inflateEnd+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lz -L${ALT_HOME}/lib -lz $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char inflateEnd (); int main () { return inflateEnd (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_z_inflateEnd=yes else ac_cv_lib_z_inflateEnd=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_inflateEnd" >&5 $as_echo "$ac_cv_lib_z_inflateEnd" >&6; } if test "x$ac_cv_lib_z_inflateEnd" = xyes; then : CHECK=1 else CHECK=0 fi # # # Check for png # if test $CHECK = "1" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for png_destroy_read_struct in -lpng" >&5 $as_echo_n "checking for png_destroy_read_struct in -lpng... " >&6; } if ${ac_cv_lib_png_png_destroy_read_struct+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lpng -L${ALT_HOME}/lib -lz $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char png_destroy_read_struct (); int main () { return png_destroy_read_struct (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_png_png_destroy_read_struct=yes else ac_cv_lib_png_png_destroy_read_struct=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_png_png_destroy_read_struct" >&5 $as_echo "$ac_cv_lib_png_png_destroy_read_struct" >&6; } if test "x$ac_cv_lib_png_png_destroy_read_struct" = xyes; then : CHECK=1 else CHECK=0 fi fi # # Check for gd # if test $CHECK = "1"; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gdImageCreateFromPng in -lgd" >&5 $as_echo_n "checking for gdImageCreateFromPng in -lgd... " >&6; } if ${ac_cv_lib_gd_gdImageCreateFromPng+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lgd -L${ALT_HOME}/lib -lgd -lpng -lz -lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char gdImageCreateFromPng (); int main () { return gdImageCreateFromPng (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_gd_gdImageCreateFromPng=yes else ac_cv_lib_gd_gdImageCreateFromPng=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gd_gdImageCreateFromPng" >&5 $as_echo "$ac_cv_lib_gd_gdImageCreateFromPng" >&6; } if test "x$ac_cv_lib_gd_gdImageCreateFromPng" = xyes; then : CHECK=1 else CHECK=0 fi if test $CHECK = "0"; then echo need to upgrade gd for png driver for plplot fi fi # # If everything found okay then proceed to include png driver in config. # if test $CHECK = "1" ; then LIBS="$LIBS -lgd -lpng -lz -lm" if test $ICCHECK = "1" ; then LIBS="$LIBS -liconv" fi case $host_os in solaris*) LDFLAGS="$LDFLAGS -R$ALT_HOME/lib" ;; esac $as_echo "#define PLD_png 1" >>confdefs.h if true; then AMPNG_TRUE= AMPNG_FALSE='#' else AMPNG_TRUE='#' AMPNG_FALSE= fi echo PNG libraries found if test $ALT_HOME = "/usr" ; then LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" fi else # # If not okay then reset FLAGS. # if false; then AMPNG_TRUE= AMPNG_FALSE='#' else AMPNG_TRUE='#' AMPNG_FALSE= fi LDFLAGS="$ALT_LDFLAGS" CPPFLAGS="$ALT_CPPFLAGS" echo No png driver will be made due to librarys missing/old. fi # echo PNG STUFF FOLLOWS!!! # echo CHECK = $CHECK # echo LIBS = $LIBS # echo LDFLAGS = $LDFLAGS # echo CPPFLAGS = $CPPFLAGS else if test $withval != "no"; then echo "Directory $ALT_HOME does not exist" exit 0 fi fi MYSQL_CFLAGS="" MYSQL_CPPFLAGS="" MYSQL_LDFLAGS="" MYSQL_CONFIG="" MYSQL_VERSION="" # Check whether --with-mysql was given. if test "${with_mysql+set}" = set; then : withval=$with_mysql; if test "x${withval}" = "xno"; then : want_mysql="no" elif test "x${withval}" = "xyes"; then : want_mysql="yes" else want_mysql="yes" MYSQL_CONFIG="${withval}" fi else want_mysql="yes" fi if test "x${want_mysql}" = "xyes"; then : if test -z "${MYSQL_CONFIG}" -o test; then : # Extract the first word of "mysql_config", so it can be a program name with args. set dummy mysql_config; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_path_MYSQL_CONFIG+:} false; then : $as_echo_n "(cached) " >&6 else case $MYSQL_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_MYSQL_CONFIG="$MYSQL_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_path_MYSQL_CONFIG="$as_dir/$ac_word$ac_exec_ext" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_MYSQL_CONFIG" && ac_cv_path_MYSQL_CONFIG="no" ;; esac fi MYSQL_CONFIG=$ac_cv_path_MYSQL_CONFIG if test -n "$MYSQL_CONFIG"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MYSQL_CONFIG" >&5 $as_echo "$MYSQL_CONFIG" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test "x${MYSQL_CONFIG}" != "xno"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for MySQL libraries" >&5 $as_echo_n "checking for MySQL libraries... " >&6; } MYSQL_CFLAGS="`${MYSQL_CONFIG} --cflags`" MYSQL_CPPFLAGS="`${MYSQL_CONFIG} --include`" MYSQL_LDFLAGS="`${MYSQL_CONFIG} --libs`" MYSQL_VERSION=`${MYSQL_CONFIG} --version` EMBCPPFLAGS="${CPPFLAGS}" EMBLDFLAGS="${LDFLAGS}" CPPFLAGS="${MYSQL_CPPFLAGS} ${EMBCPPFLAGS}" LDFLAGS="${MYSQL_LDFLAGS} ${EMBLDFLAGS}" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include "mysql.h" int main () { mysql_info(NULL) ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : havemysql="yes" else havemysql="no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CPPFLAGS="${EMBCPPFLAGS}" LDFLAGS="${EMBLDFLAGS}" if test "x${havemysql}" = "xyes"; then : $as_echo "#define HAVE_MYSQL 1" >>confdefs.h found_mysql="yes" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else MYSQL_CFLAGS="" MYSQL_CPPFLAGS="" MYSQL_LDFLAGS="" found_mysql="no" { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else found_mysql="no" fi fi mysql_version_req= if test "x${found_mysql}" = "xyes" -a -n "${mysql_version_req}"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking if MySQL version is >= ${mysql_version_req}" >&5 $as_echo_n "checking if MySQL version is >= ${mysql_version_req}... " >&6; } mysql_version_req_major=`expr ${mysql_version_req} : '\([0-9]*\)'` mysql_version_req_minor=`expr ${mysql_version_req} : '[0-9]*\.\([0-9]*\)'` mysql_version_req_micro=`expr ${mysql_version_req} : '[0-9]*\.[0-9]*\.\([0-9]*\)'` if test "x${mysql_version_req_micro}" = "x"; then : mysql_version_req_micro="0" fi mysql_version_req_number=`expr ${mysql_version_req_major} \* 1000000 \ \+ ${mysql_version_req_minor} \* 1000 \ \+ ${mysql_version_req_micro}` mysql_version_major=`expr ${MYSQL_VERSION} : '\([0-9]*\)'` mysql_version_minor=`expr ${MYSQL_VERSION} : '[0-9]*\.\([0-9]*\)'` mysql_version_micro=`expr ${MYSQL_VERSION} : '[0-9]*\.[0-9]*\.\([0-9]*\)'` if test "x${mysql_version_micro}" = "x"; then : mysql_version_micro="0" fi mysql_version_number=`expr ${mysql_version_major} \* 1000000 \ \+ ${mysql_version_minor} \* 1000 \ \+ ${mysql_version_micro}` mysql_version_check=`expr ${mysql_version_number} \>\= ${mysql_version_req_number}` if test "x${mysql_version_check}" = "x1"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi POSTGRESQL_CFLAGS="" POSTGRESQL_CPPFLAGS="" POSTGRESQL_LDFLAGS="" POSTGRESQL_CONFIG="" POSTGRESQL_VERSION="" # Check whether --with-postgresql was given. if test "${with_postgresql+set}" = set; then : withval=$with_postgresql; if test "x${withval}" = "xno"; then : want_postgresql="no" elif test "x${withval}" = "xyes"; then : want_postgresql="yes" else want_postgresql="yes" POSTGRESQL_CONFIG="${withval}" fi else want_postgresql="yes" fi if test "x${want_postgresql}" = "xyes"; then : if test -z "${POSTGRESQL_CONFIG}" -o test; then : # Extract the first word of "pg_config", so it can be a program name with args. set dummy pg_config; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_path_POSTGRESQL_CONFIG+:} false; then : $as_echo_n "(cached) " >&6 else case $POSTGRESQL_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_POSTGRESQL_CONFIG="$POSTGRESQL_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_path_POSTGRESQL_CONFIG="$as_dir/$ac_word$ac_exec_ext" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_POSTGRESQL_CONFIG" && ac_cv_path_POSTGRESQL_CONFIG="no" ;; esac fi POSTGRESQL_CONFIG=$ac_cv_path_POSTGRESQL_CONFIG if test -n "$POSTGRESQL_CONFIG"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $POSTGRESQL_CONFIG" >&5 $as_echo "$POSTGRESQL_CONFIG" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test "x${POSTGRESQL_CONFIG}" != "xno"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PostgreSQL libraries" >&5 $as_echo_n "checking for PostgreSQL libraries... " >&6; } POSTGRESQL_CFLAGS="-I`${POSTGRESQL_CONFIG} --includedir`" POSTGRESQL_CPPFLAGS="-I`${POSTGRESQL_CONFIG} --includedir`" POSTGRESQL_LDFLAGS="-L`${POSTGRESQL_CONFIG} --libdir` -lpq" POSTGRESQL_VERSION=`${POSTGRESQL_CONFIG} --version | sed -e 's#PostgreSQL ##'` EMBCPPFLAGS="${CPPFLAGS}" EMBLDFLAGS="${LDFLAGS}" CPPFLAGS="${POSTGRESQL_CPPFLAGS} ${EMBCPPFLAGS}" LDFLAGS="${POSTGRESQL_LDFLAGS} ${EMBLDFLAGS}" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include "libpq-fe.h" int main () { PQconnectdb(NULL) ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : havepostgresql="yes" else havepostgresql="no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CPPFLAGS="${EMBCPPFLAGS}" LDFLAGS="${EMBLDFLAGS}" if test "x${havepostgresql}" = "xyes"; then : $as_echo "#define HAVE_POSTGRESQL 1" >>confdefs.h found_postgresql="yes" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else POSTGRESQL_CFLAGS="" POSTGRESQL_CPPFLAGS="" POSTGRESQL_LDFLAGS="" found_postgresql="no" { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else found_postgresql="no" fi fi postgresql_version_req= if test "x${found_postgresql}" = "xyes" -a -n "${postgresql_version_req}"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking if PostgreSQL version is >= ${postgresql_version_req}" >&5 $as_echo_n "checking if PostgreSQL version is >= ${postgresql_version_req}... " >&6; } postgresql_version_req_major=`expr ${postgresql_version_req} : '\([0-9]*\)'` postgresql_version_req_minor=`expr ${postgresql_version_req} : '[0-9]*\.\([0-9]*\)'` postgresql_version_req_micro=`expr ${postgresql_version_req} : '[0-9]*\.[0-9]*\.\([0-9]*\)'` if test "x${postgresql_version_req_micro}" = "x"; then : postgresql_version_req_micro="0" fi postgresql_version_req_number=`expr ${postgresql_version_req_major} \* 1000000 \ \+ ${postgresql_version_req_minor} \* 1000 \ \+ ${postgresql_version_req_micro}` postgresql_version_major=`expr ${POSTGRESQL_VERSION} : '\([0-9]*\)'` postgresql_version_minor=`expr ${POSTGRESQL_VERSION} : '[0-9]*\.\([0-9]*\)'` postgresql_version_micro=`expr ${POSTGRESQL_VERSION} : '[0-9]*\.[0-9]*\.\([0-9]*\)'` if test "x${postgresql_version_micro}" = "x"; then : postgresql_version_micro="0" fi postgresql_version_number=`expr ${postgresql_version_major} \* 1000000 \ \+ ${postgresql_version_minor} \* 1000 \ \+ ${postgresql_version_micro}` postgresql_version_check=`expr ${postgresql_version_number} \>\= ${postgresql_version_req_number}` if test "x${postgresql_version_check}" = "x1"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi locallink="no" embprefix="/usr/local" # Check whether --enable-localforce was given. if test "${enable_localforce+set}" = set; then : enableval=$enable_localforce; fi if test "x${enable_localforce}" = "xyes"; then : embprefix="/usr/local" fi if test "x${prefix}" = "xNONE"; then : if test "x${enable_localforce}" != "xyes"; then : locallink="yes" else locallink="no" embprefix="/usr/local" fi else embprefix="${prefix}" fi if test "x${locallink}" = "xyes"; then LOCALLINK_TRUE= LOCALLINK_FALSE='#' else LOCALLINK_TRUE='#' LOCALLINK_FALSE= fi # Enable debugging: --enable-debug, sets CFLAGS # Check whether --enable-debug was given. if test "${enable_debug+set}" = set; then : enableval=$enable_debug; fi if test "x${enable_debug}" = "xyes"; then : as_fn_append CFLAGS " -g" fi # Turn off irritating linker warnings in IRIX case ${host_os} in #( irix*) : CFLAGS="-Wl,-LD_MSG:off=85:off=84:off=16:off=134 ${CFLAGS}" ;; #( *) : ;; esac # Enable the large file interface: --enable-large, appends CPPFLAGS # Check whether --enable-large was given. if test "${enable_large+set}" = set; then : enableval=$enable_large; fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for large file support" >&5 $as_echo_n "checking for large file support... " >&6; } if test "x${enable_large}" = "xno"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } else case ${host_os} in #( linux*) : as_fn_append CPPFLAGS " -DAJ_LinuxLF" as_fn_append CPPFLAGS " -D_LARGEFILE_SOURCE" as_fn_append CPPFLAGS " -D_LARGEFILE64_SOURCE" as_fn_append CPPFLAGS " -D_FILE_OFFSET_BITS=64" ;; #( freebsd*) : as_fn_append CPPFLAGS " -DAJ_FreeBSDLF" ;; #( solaris*) : as_fn_append CPPFLAGS " -DAJ_SolarisLF" as_fn_append CPPFLAGS " -D_LARGEFILE_SOURCE" as_fn_append CPPFLAGS " -D_FILE_OFFSET_BITS=64" ;; #( osf*) : as_fn_append CPPFLAGS " -DAJ_OSF1LF" ;; #( irix*) : as_fn_append CPPFLAGS " -DAJ_IRIXLF" as_fn_append CPPFLAGS " -D_LARGEFILE64_SOURCE" ;; #( aix*) : as_fn_append CPPFLAGS " -DAJ_AIXLF" as_fn_append CPPFLAGS " -D_LARGE_FILES" ;; #( hpux*) : as_fn_append CPPFLAGS " -DAJ_HPUXLF" as_fn_append CPPFLAGS " -D_LARGEFILE_SOURCE" as_fn_append CPPFLAGS " -D_FILE_OFFSET_BITS=64" ;; #( darwin*) : as_fn_append CPPFLAGS " -DAJ_MACOSXLF" ;; #( *) : ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi # Enable libraries provided by the system rather than EMBOSS: # --enable-systemlibs, sets ESYSTEMLIBS # Check whether --enable-systemlibs was given. if test "${enable_systemlibs+set}" = set; then : enableval=$enable_systemlibs; fi if test "x${enable_systemlibs}" = "xyes"; then ESYSTEMLIBS_TRUE= ESYSTEMLIBS_FALSE='#' else ESYSTEMLIBS_TRUE='#' ESYSTEMLIBS_FALSE= fi # Enable the purify tool: --enable-purify, sets CC and LIBTOOL # Check whether --enable-purify was given. if test "${enable_purify+set}" = set; then : enableval=$enable_purify; fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for purify" >&5 $as_echo_n "checking for purify... " >&6; } if test "x${enable_purify}" = "xyes"; then : CC="purify --chain-length=20 -best-effort -windows=yes gcc -g" LIBTOOL="${LIBTOOL} --tag=CC" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x${CC}" = "xcc"; then case "${host}" in alpha*-dec-osf*) CFLAGS="${CFLAGS} -ieee";; esac fi if test "x${enable_purify}" = "xyes"; then PURIFY_TRUE= PURIFY_FALSE='#' else PURIFY_TRUE='#' PURIFY_FALSE= fi platform_cygwin="no" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cygwin" >&5 $as_echo_n "checking for cygwin... " >&6; } case "${host}" in *-*-mingw*|*-*-cygwin*) platform_cygwin="yes" ;; *) platform_cygwin="no" ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${platform_cygwin}" >&5 $as_echo "${platform_cygwin}" >&6; } if test "x${platform_cygwin}" = "xyes"; then ISCYGWIN_TRUE= ISCYGWIN_FALSE='#' else ISCYGWIN_TRUE='#' ISCYGWIN_FALSE= fi needajax="no" case ${host_os} in #( aix*) : if true; then ISAIXIA64_TRUE= ISAIXIA64_FALSE='#' else ISAIXIA64_TRUE='#' ISAIXIA64_FALSE= fi ;; #( *) : if false; then ISAIXIA64_TRUE= ISAIXIA64_FALSE='#' else ISAIXIA64_TRUE='#' ISAIXIA64_FALSE= fi ;; esac if test "x${enable_shared}" = "xyes"; then ISSHARED_TRUE= ISSHARED_FALSE='#' else ISSHARED_TRUE='#' ISSHARED_FALSE= fi case ${host_os} in #( aix*) : if test -d ajax/.libs; then : $as_echo "AIX ajax/.libs exists" else mkdir ajax/.libs fi case ${host_os} in #( aix5*) : needajax="no" ;; #( aix4.3.3*) : needajax="yes" ;; #( *) : needajax="no" ;; esac ;; #( *) : ;; esac if test "x${needajax}" = "xyes"; then NEEDAJAX_TRUE= NEEDAJAX_FALSE='#' else NEEDAJAX_TRUE='#' NEEDAJAX_FALSE= fi # HP-UX needs -lsec for shadow passwords case ${host_os} in #( hpux*) : as_fn_append LDFLAGS " -lsec" ;; #( *) : ;; esac # GNU mcheck functions: --enable-mcheck, defines HAVE_MCHECK # Check whether --enable-mcheck was given. if test "${enable_mcheck+set}" = set; then : enableval=$enable_mcheck; fi if test "x${enable_mcheck}" = "xyes"; then : for ac_func in mcheck do : ac_fn_c_check_func "$LINENO" "mcheck" "ac_cv_func_mcheck" if test "x$ac_cv_func_mcheck" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_MCHECK 1 _ACEOF fi done fi # Collect AJAX statistics: --enable-savestats, defines AJ_SAVESTATS # Check whether --enable-savestats was given. if test "${enable_savestats+set}" = set; then : enableval=$enable_savestats; fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for savestats" >&5 $as_echo_n "checking for savestats... " >&6; } if test "x${enable_savestats}" = "xyes"; then : $as_echo "#define AJ_SAVESTATS 1" >>confdefs.h { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi ac_config_files="$ac_config_files Makefile src/Makefile data/Makefile emboss_acd/Makefile emboss_doc/Makefile emboss_doc/html/Makefile emboss_doc/text/Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs { $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 $as_echo_n "checking that generated files are newer than configure... " >&6; } if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 $as_echo "done" >&6; } if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then as_fn_error $? "conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' else am__EXEEXT_TRUE='#' am__EXEEXT_FALSE= fi if test -z "${AMPNG_TRUE}" && test -z "${AMPNG_FALSE}"; then as_fn_error $? "conditional \"AMPNG\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${AMPDF_TRUE}" && test -z "${AMPDF_FALSE}"; then as_fn_error $? "conditional \"AMPDF\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${JAVA_BUILD_TRUE}" && test -z "${JAVA_BUILD_FALSE}"; then as_fn_error $? "conditional \"JAVA_BUILD\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${AMPDF_TRUE}" && test -z "${AMPDF_FALSE}"; then as_fn_error $? "conditional \"AMPDF\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${AMPDF_TRUE}" && test -z "${AMPDF_FALSE}"; then as_fn_error $? "conditional \"AMPDF\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${AMPNG_TRUE}" && test -z "${AMPNG_FALSE}"; then as_fn_error $? "conditional \"AMPNG\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${AMPNG_TRUE}" && test -z "${AMPNG_FALSE}"; then as_fn_error $? "conditional \"AMPNG\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${LOCALLINK_TRUE}" && test -z "${LOCALLINK_FALSE}"; then as_fn_error $? "conditional \"LOCALLINK\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${ESYSTEMLIBS_TRUE}" && test -z "${ESYSTEMLIBS_FALSE}"; then as_fn_error $? "conditional \"ESYSTEMLIBS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${PURIFY_TRUE}" && test -z "${PURIFY_FALSE}"; then as_fn_error $? "conditional \"PURIFY\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${ISCYGWIN_TRUE}" && test -z "${ISCYGWIN_FALSE}"; then as_fn_error $? "conditional \"ISCYGWIN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${ISAIXIA64_TRUE}" && test -z "${ISAIXIA64_FALSE}"; then as_fn_error $? "conditional \"ISAIXIA64\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${ISAIXIA64_TRUE}" && test -z "${ISAIXIA64_FALSE}"; then as_fn_error $? "conditional \"ISAIXIA64\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${ISSHARED_TRUE}" && test -z "${ISSHARED_FALSE}"; then as_fn_error $? "conditional \"ISSHARED\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${NEEDAJAX_TRUE}" && test -z "${NEEDAJAX_FALSE}"; then as_fn_error $? "conditional \"NEEDAJAX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by PHYLIPNEW $as_me 3.69.650, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac case $ac_config_headers in *" "*) set x $ac_config_headers; shift; ac_config_headers=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" config_headers="$ac_config_headers" config_commands="$ac_config_commands" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE --header=FILE[:TEMPLATE] instantiate the configuration header FILE Configuration files: $config_files Configuration headers: $config_headers Configuration commands: $config_commands Report bugs to . PHYLIPNEW home page: ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ PHYLIPNEW config.status 3.69.650 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" Copyright (C) 2012 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' INSTALL='$INSTALL' MKDIR_P='$MKDIR_P' AWK='$AWK' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) $as_echo "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append CONFIG_HEADERS " '$ac_optarg'" ac_need_defaults=false;; --he | --h) # Conflict between --help and --header as_fn_error $? "ambiguous option: \`$1' Try \`$0 --help' for more information.";; --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # # INIT-COMMANDS # AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`' hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`' predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`' postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`' predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`' postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`' compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`' LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`' reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`' reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`' old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`' GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`' archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`' module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`' allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`' hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`' hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`' hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`' inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`' link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`' always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`' export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`' exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`' include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`' prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`' postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`' file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`' hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`' compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`' predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`' postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`' predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`' postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`' compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`' LTCC='$LTCC' LTCFLAGS='$LTCFLAGS' compiler='$compiler_DEFAULT' # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$1 _LTECHO_EOF' } # Quote evaled strings. for var in SHELL \ ECHO \ PATH_SEPARATOR \ SED \ GREP \ EGREP \ FGREP \ LD \ NM \ LN_S \ lt_SP2NL \ lt_NL2SP \ reload_flag \ OBJDUMP \ deplibs_check_method \ file_magic_cmd \ file_magic_glob \ want_nocaseglob \ DLLTOOL \ sharedlib_from_linklib_cmd \ AR \ AR_FLAGS \ archiver_list_spec \ STRIP \ RANLIB \ CC \ CFLAGS \ compiler \ lt_cv_sys_global_symbol_pipe \ lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ nm_file_list_spec \ lt_prog_compiler_no_builtin_flag \ lt_prog_compiler_pic \ lt_prog_compiler_wl \ lt_prog_compiler_static \ lt_cv_prog_compiler_c_o \ need_locks \ MANIFEST_TOOL \ DSYMUTIL \ NMEDIT \ LIPO \ OTOOL \ OTOOL64 \ shrext_cmds \ export_dynamic_flag_spec \ whole_archive_flag_spec \ compiler_needs_object \ with_gnu_ld \ allow_undefined_flag \ no_undefined_flag \ hardcode_libdir_flag_spec \ hardcode_libdir_separator \ exclude_expsyms \ include_expsyms \ file_list_spec \ variables_saved_for_relink \ libname_spec \ library_names_spec \ soname_spec \ install_override_mode \ finish_eval \ old_striplib \ striplib \ compiler_lib_search_dirs \ predep_objects \ postdep_objects \ predeps \ postdeps \ compiler_lib_search_path \ LD_CXX \ reload_flag_CXX \ compiler_CXX \ lt_prog_compiler_no_builtin_flag_CXX \ lt_prog_compiler_pic_CXX \ lt_prog_compiler_wl_CXX \ lt_prog_compiler_static_CXX \ lt_cv_prog_compiler_c_o_CXX \ export_dynamic_flag_spec_CXX \ whole_archive_flag_spec_CXX \ compiler_needs_object_CXX \ with_gnu_ld_CXX \ allow_undefined_flag_CXX \ no_undefined_flag_CXX \ hardcode_libdir_flag_spec_CXX \ hardcode_libdir_separator_CXX \ exclude_expsyms_CXX \ include_expsyms_CXX \ file_list_spec_CXX \ compiler_lib_search_dirs_CXX \ predep_objects_CXX \ postdep_objects_CXX \ predeps_CXX \ postdeps_CXX \ compiler_lib_search_path_CXX; do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[\\\\\\\`\\"\\\$]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done # Double-quote double-evaled strings. for var in reload_cmds \ old_postinstall_cmds \ old_postuninstall_cmds \ old_archive_cmds \ extract_expsyms_cmds \ old_archive_from_new_cmds \ old_archive_from_expsyms_cmds \ archive_cmds \ archive_expsym_cmds \ module_cmds \ module_expsym_cmds \ export_symbols_cmds \ prelink_cmds \ postlink_cmds \ postinstall_cmds \ postuninstall_cmds \ finish_cmds \ sys_lib_search_path_spec \ sys_lib_dlsearch_path_spec \ reload_cmds_CXX \ old_archive_cmds_CXX \ old_archive_from_new_cmds_CXX \ old_archive_from_expsyms_cmds_CXX \ archive_cmds_CXX \ archive_expsym_cmds_CXX \ module_cmds_CXX \ module_expsym_cmds_CXX \ export_symbols_cmds_CXX \ prelink_cmds_CXX \ postlink_cmds_CXX; do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[\\\\\\\`\\"\\\$]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done ac_aux_dir='$ac_aux_dir' xsi_shell='$xsi_shell' lt_shell_append='$lt_shell_append' # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes INIT. if test -n "\${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi PACKAGE='$PACKAGE' VERSION='$VERSION' TIMESTAMP='$TIMESTAMP' RM='$RM' ofile='$ofile' _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "src/config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/config.h" ;; "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; "data/Makefile") CONFIG_FILES="$CONFIG_FILES data/Makefile" ;; "emboss_acd/Makefile") CONFIG_FILES="$CONFIG_FILES emboss_acd/Makefile" ;; "emboss_doc/Makefile") CONFIG_FILES="$CONFIG_FILES emboss_doc/Makefile" ;; "emboss_doc/html/Makefile") CONFIG_FILES="$CONFIG_FILES emboss_doc/html/Makefile" ;; "emboss_doc/text/Makefile") CONFIG_FILES="$CONFIG_FILES emboss_doc/text/Makefile" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" # Set up the scripts for CONFIG_HEADERS section. # No need to generate them if there are no CONFIG_HEADERS. # This happens for instance with `./config.status Makefile'. if test -n "$CONFIG_HEADERS"; then cat >"$ac_tmp/defines.awk" <<\_ACAWK || BEGIN { _ACEOF # Transform confdefs.h into an awk script `defines.awk', embedded as # here-document in config.status, that substitutes the proper values into # config.h.in to produce config.h. # Create a delimiter string that does not exist in confdefs.h, to ease # handling of long lines. ac_delim='%!_!# ' for ac_last_try in false false :; do ac_tt=`sed -n "/$ac_delim/p" confdefs.h` if test -z "$ac_tt"; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done # For the awk script, D is an array of macro values keyed by name, # likewise P contains macro parameters if any. Preserve backslash # newline sequences. ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* sed -n ' s/.\{148\}/&'"$ac_delim"'/g t rset :rset s/^[ ]*#[ ]*define[ ][ ]*/ / t def d :def s/\\$// t bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3"/p s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p d :bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3\\\\\\n"\\/p t cont s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p t cont d :cont n s/.\{148\}/&'"$ac_delim"'/g t clear :clear s/\\$// t bsnlc s/["\\]/\\&/g; s/^/"/; s/$/"/p d :bsnlc s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p b cont ' >$CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 for (key in D) D_is_set[key] = 1 FS = "" } /^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { line = \$ 0 split(line, arg, " ") if (arg[1] == "#") { defundef = arg[2] mac1 = arg[3] } else { defundef = substr(arg[1], 2) mac1 = arg[2] } split(mac1, mac2, "(") #) macro = mac2[1] prefix = substr(line, 1, index(line, defundef) - 1) if (D_is_set[macro]) { # Preserve the white space surrounding the "#". print prefix "define", macro P[macro] D[macro] next } else { # Replace #undef with comments. This is necessary, for example, # in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. if (defundef == "undef") { print "/*", prefix defundef, macro, "*/" next } } } { print } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 fi # test -n "$CONFIG_HEADERS" eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # case $INSTALL in [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; esac ac_MKDIR_P=$MKDIR_P case $MKDIR_P in [\\/$]* | ?:[\\/]* ) ;; */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; esac _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t s&@MKDIR_P@&$ac_MKDIR_P&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; :H) # # CONFIG_HEADER # if test x"$ac_file" != x-; then { $as_echo "/* $configure_input */" \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" } >"$ac_tmp/config.h" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 $as_echo "$as_me: $ac_file is unchanged" >&6;} else rm -f "$ac_file" mv "$ac_tmp/config.h" "$ac_file" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 fi else $as_echo "/* $configure_input */" \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ || as_fn_error $? "could not create -" "$LINENO" 5 fi # Compute "$ac_file"'s index in $config_headers. _am_arg="$ac_file" _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$_am_arg" : 'X\(//\)[^/]' \| \ X"$_am_arg" : 'X\(//\)$' \| \ X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$_am_arg" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'`/stamp-h$_am_stamp_count ;; :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 $as_echo "$as_me: executing $ac_file commands" >&6;} ;; esac case $ac_file$ac_mode in "depfiles":C) test x"$AMDEP_TRUE" != x"" || { # Autoconf 2.62 quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. case $CONFIG_FILES in *\'*) eval set x "$CONFIG_FILES" ;; *) set x $CONFIG_FILES ;; esac shift for mf do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named 'Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then dirpart=`$as_dirname -- "$mf" || $as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$mf" : 'X\(//\)[^/]' \| \ X"$mf" : 'X\(//\)$' \| \ X"$mf" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running 'make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`$as_dirname -- "$file" || $as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$file" : 'X\(//\)[^/]' \| \ X"$file" : 'X\(//\)$' \| \ X"$file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir=$dirpart/$fdir; as_fn_mkdir_p # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done } ;; "libtool":C) # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi cfgfile="${ofile}T" trap "$RM \"$cfgfile\"; exit 1" 1 2 15 $RM "$cfgfile" cat <<_LT_EOF >> "$cfgfile" #! $SHELL # `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. # Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # NOTE: Changes made to this file will be lost: look at ltmain.sh. # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is part of GNU Libtool. # # GNU Libtool is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Libtool; see the file COPYING. If not, a copy # can be downloaded from http://www.gnu.org/licenses/gpl.html, or # obtained by writing to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # The names of the tagged configurations supported by this script. available_tags="CXX " # ### BEGIN LIBTOOL CONFIG # Which release of libtool.m4 was used? macro_version=$macro_version macro_revision=$macro_revision # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # What type of objects to build. pic_mode=$pic_mode # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # An echo program that protects backslashes. ECHO=$lt_ECHO # The PATH separator for the build system. PATH_SEPARATOR=$lt_PATH_SEPARATOR # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # A sed program that does not truncate output. SED=$lt_SED # Sed that helps us avoid accidentally triggering echo(1) options like -n. Xsed="\$SED -e 1s/^X//" # A grep program that handles long lines. GREP=$lt_GREP # An ERE matcher. EGREP=$lt_EGREP # A literal string matcher. FGREP=$lt_FGREP # A BSD- or MS-compatible name lister. NM=$lt_NM # Whether we need soft or hard links. LN_S=$lt_LN_S # What is the maximum length of a command? max_cmd_len=$max_cmd_len # Object file suffix (normally "o"). objext=$ac_objext # Executable file suffix (normally ""). exeext=$exeext # whether the shell understands "unset". lt_unset=$lt_unset # turn spaces into newlines. SP2NL=$lt_lt_SP2NL # turn newlines into spaces. NL2SP=$lt_lt_NL2SP # convert \$build file names to \$host format. to_host_file_cmd=$lt_cv_to_host_file_cmd # convert \$build files to toolchain format. to_tool_file_cmd=$lt_cv_to_tool_file_cmd # An object symbol dumper. OBJDUMP=$lt_OBJDUMP # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method = "file_magic". file_magic_cmd=$lt_file_magic_cmd # How to find potential files when deplibs_check_method = "file_magic". file_magic_glob=$lt_file_magic_glob # Find potential files using nocaseglob when deplibs_check_method = "file_magic". want_nocaseglob=$lt_want_nocaseglob # DLL creation program. DLLTOOL=$lt_DLLTOOL # Command to associate shared and link libraries. sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd # The archiver. AR=$lt_AR # Flags to create an archive. AR_FLAGS=$lt_AR_FLAGS # How to feed a file listing to the archiver. archiver_list_spec=$lt_archiver_list_spec # A symbol stripping program. STRIP=$lt_STRIP # Commands used to install an old-style archive. RANLIB=$lt_RANLIB old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Whether to use a lock for old archive extraction. lock_old_archive_extraction=$lock_old_archive_extraction # A C compiler. LTCC=$lt_CC # LTCC compiler flags. LTCFLAGS=$lt_CFLAGS # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration. global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair. global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # Transform the output of nm in a C name address pair when lib prefix is needed. global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix # Specify filename containing input files for \$NM. nm_file_list_spec=$lt_nm_file_list_spec # The root where to search for dependent libraries,and in which our libraries should be installed. lt_sysroot=$lt_sysroot # The name of the directory that contains temporary libtool files. objdir=$objdir # Used to examine libraries when file_magic_cmd begins with "file". MAGIC_CMD=$MAGIC_CMD # Must we lock files when doing compilation? need_locks=$lt_need_locks # Manifest tool. MANIFEST_TOOL=$lt_MANIFEST_TOOL # Tool to manipulate archived DWARF debug symbol files on Mac OS X. DSYMUTIL=$lt_DSYMUTIL # Tool to change global to local symbols on Mac OS X. NMEDIT=$lt_NMEDIT # Tool to manipulate fat objects and archives on Mac OS X. LIPO=$lt_LIPO # ldd/readelf like tool for Mach-O binaries on Mac OS X. OTOOL=$lt_OTOOL # ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. OTOOL64=$lt_OTOOL64 # Old archive suffix (normally "a"). libext=$libext # Shared library suffix (normally ".so"). shrext_cmds=$lt_shrext_cmds # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Variables whose values should be saved in libtool wrapper scripts and # restored at link time. variables_saved_for_relink=$lt_variables_saved_for_relink # Do we need the "lib" prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Library versioning type. version_type=$version_type # Shared library runtime path variable. runpath_var=$runpath_var # Shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Permission mode override for installation of shared libraries. install_override_mode=$lt_install_override_mode # Command to use after installation of a shared archive. postinstall_cmds=$lt_postinstall_cmds # Command to use after uninstallation of a shared archive. postuninstall_cmds=$lt_postuninstall_cmds # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # As "finish_cmds", except a single script fragment to be evaled but # not shown. finish_eval=$lt_finish_eval # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Compile-time system search path for libraries. sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries. sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # The linker used to build libraries. LD=$lt_LD # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds # A language specific compiler. CC=$lt_compiler # Is the compiler the GNU compiler? with_gcc=$GCC # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds archive_expsym_cmds=$lt_archive_expsym_cmds # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds module_expsym_cmds=$lt_module_expsym_cmds # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator # Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct # Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \${shlibpath_var} if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms # Symbols that must always be exported. include_expsyms=$lt_include_expsyms # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds # Specify filename containing input files. file_list_spec=$lt_file_list_spec # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action # The directories searched by this compiler when creating a shared library. compiler_lib_search_dirs=$lt_compiler_lib_search_dirs # Dependencies to place before and after the objects being linked to # create a shared library. predep_objects=$lt_predep_objects postdep_objects=$lt_postdep_objects predeps=$lt_predeps postdeps=$lt_postdeps # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path # ### END LIBTOOL CONFIG _LT_EOF case $host_os in aix3*) cat <<\_LT_EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi _LT_EOF ;; esac ltmain="$ac_aux_dir/ltmain.sh" # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? sed '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) if test x"$xsi_shell" = xyes; then sed -e '/^func_dirname ()$/,/^} # func_dirname /c\ func_dirname ()\ {\ \ case ${1} in\ \ */*) func_dirname_result="${1%/*}${2}" ;;\ \ * ) func_dirname_result="${3}" ;;\ \ esac\ } # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_basename ()$/,/^} # func_basename /c\ func_basename ()\ {\ \ func_basename_result="${1##*/}"\ } # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\ func_dirname_and_basename ()\ {\ \ case ${1} in\ \ */*) func_dirname_result="${1%/*}${2}" ;;\ \ * ) func_dirname_result="${3}" ;;\ \ esac\ \ func_basename_result="${1##*/}"\ } # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_stripname ()$/,/^} # func_stripname /c\ func_stripname ()\ {\ \ # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\ \ # positional parameters, so assign one to ordinary parameter first.\ \ func_stripname_result=${3}\ \ func_stripname_result=${func_stripname_result#"${1}"}\ \ func_stripname_result=${func_stripname_result%"${2}"}\ } # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\ func_split_long_opt ()\ {\ \ func_split_long_opt_name=${1%%=*}\ \ func_split_long_opt_arg=${1#*=}\ } # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\ func_split_short_opt ()\ {\ \ func_split_short_opt_arg=${1#??}\ \ func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\ } # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\ func_lo2o ()\ {\ \ case ${1} in\ \ *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\ \ *) func_lo2o_result=${1} ;;\ \ esac\ } # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_xform ()$/,/^} # func_xform /c\ func_xform ()\ {\ func_xform_result=${1%.*}.lo\ } # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_arith ()$/,/^} # func_arith /c\ func_arith ()\ {\ func_arith_result=$(( $* ))\ } # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_len ()$/,/^} # func_len /c\ func_len ()\ {\ func_len_result=${#1}\ } # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: fi if test x"$lt_shell_append" = xyes; then sed -e '/^func_append ()$/,/^} # func_append /c\ func_append ()\ {\ eval "${1}+=\\${2}"\ } # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\ func_append_quoted ()\ {\ \ func_quote_for_eval "${2}"\ \ eval "${1}+=\\\\ \\$func_quote_for_eval_result"\ } # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: # Save a `func_append' function call where possible by direct use of '+=' sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: else # Save a `func_append' function call even when '+=' is not available sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: fi if test x"$_lt_function_replace_fail" = x":"; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5 $as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;} fi mv -f "$cfgfile" "$ofile" || (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" cat <<_LT_EOF >> "$ofile" # ### BEGIN LIBTOOL TAG CONFIG: CXX # The linker used to build libraries. LD=$lt_LD_CXX # How to create reloadable object files. reload_flag=$lt_reload_flag_CXX reload_cmds=$lt_reload_cmds_CXX # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds_CXX # A language specific compiler. CC=$lt_compiler_CXX # Is the compiler the GNU compiler? with_gcc=$GCC_CXX # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_CXX # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_CXX # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_CXX # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_CXX # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object_CXX # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds_CXX archive_expsym_cmds=$lt_archive_expsym_cmds_CXX # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds_CXX module_expsym_cmds=$lt_module_expsym_cmds_CXX # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld_CXX # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_CXX # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_CXX # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX # Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct_CXX # Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \${shlibpath_var} if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute_CXX # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L_CXX # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic_CXX # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath_CXX # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_CXX # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols_CXX # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_CXX # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_CXX # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_CXX # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds_CXX # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds_CXX # Specify filename containing input files. file_list_spec=$lt_file_list_spec_CXX # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_CXX # The directories searched by this compiler when creating a shared library. compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX # Dependencies to place before and after the objects being linked to # create a shared library. predep_objects=$lt_predep_objects_CXX postdep_objects=$lt_postdep_objects_CXX predeps=$lt_predeps_CXX postdeps=$lt_postdeps_CXX # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_CXX # ### END LIBTOOL TAG CONFIG: CXX _LT_EOF ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi