ape/0000755000176200001440000000000014727666602011037 5ustar liggesusersape/MD50000644000176200001440000004656014727666602011362 0ustar liggesuserseb723b61539feef013de476e68b5c50a *COPYING eca4e03e403fea54d2a91142a74ec350 *DESCRIPTION d13f723713b9fb894de8e672c8160501 *NAMESPACE 1f20f68370ca015e28fcf9101465365a *NEWS 4dc75ff83940a80aafe96f6a67eefebf *R/CADM.global.R 4f3a09f6a829d788f273399faaf40285 *R/CADM.post.R fde32bab7748bc8d1e77a006c08159b2 *R/CDF.birth.death.R 7a33f85f11ba69a1ced3d2c6761892da *R/Cheverud.R 76ce31d4fe9867fd23069c495997c8d7 *R/DNA.R 2187a1289b767066d1efe1ebbe7c3b0c *R/MPR.R 74b40c31934a023c64485898b0bd42bb *R/MoranI.R 4286c596e7b321de071ff04fc90afef9 *R/PGLS.R 30b1f0782318dc8ec3212edb4f03c89a *R/RcppExports.R 6be0924b9f043abaee0968de5cf62aa6 *R/SDM.R 315ae41ee77b323b07e63ff861359ae2 *R/SlowinskiGuyer.R 7c3d7fb622af580317c7c6068e20b360 *R/ace.R 4ce79cf3f3ff49bef989454d86d0c891 *R/additive.R 1d76af894213570636d36c5a671192b3 *R/alex.R 9fe874382f024a98f62a0ccfcd6d09ac *R/all.equal.phylo.R 2210c4621d33663e9cc786669edef03e *R/apetools.R cee73b77507cbca29a31b5f48a308a66 *R/as.bitsplits.R c94018d5e792c72e20ce84085b2df9e7 *R/as.matching.R 95c7e4a4f49bf7f953d6ebf47ac8efc0 *R/as.phylo.R 4a7cca180afded35bd832a33cba177ea *R/as.phylo.formula.R 6cbe1ddceb9bb766fb411016eb9de8a2 *R/balance.R 42b5e63a5cd592358378de9d6421faee *R/binaryPGLMM.R dca39eb98aad1da417a695d1888e8178 *R/bind.tree.R 62af6b44a6b901ffdea315613dbebe0a *R/biplot.pcoa.R 472509f4a9168b53118b1ce7b480094c *R/birthdeath.R 6211edae4ef6683dd741b77f50cf5e27 *R/branching.times.R 9a60d5b6ab2075653649ea2a75fcab04 *R/checkValidPhylo.R e43b5dec7eae6d4bf9371e50117bf6ed *R/cherry.R 56491406d404355aefeb79db3d6f52cd *R/chronoMPL.R ccbbb6b601f3c807a51b3854943b4115 *R/chronopl.R 66297b92d2a53e8aff81ce935aae6711 *R/chronos.R f916a0df46651d0a6640a735c54c84a1 *R/clustal.R 97a5676345af66cb619555fff740ecd1 *R/coalescent.intervals.R 40de373d36d31e762b7cf4c3d6d7ffc7 *R/collapse.singles.R 6470157961eb3be476bed7027843833f *R/collapsed.intervals.R 01e979242ba4667e48a80fd32c03f254 *R/compar.gee.R 89ce53eb1bb8c0fb36e95810cf7cd769 *R/compar.lynch.R 207154a3a9b9ebbe5d7c29995565dc82 *R/compar.ou.R be772ef6f74e80bbd9be8dcb0d0516be *R/comparePhylo.R 8d7c71929156744fd0e4fac370bf9456 *R/compute.brtime.R 561a616ff6b7bc134c3f987a695c3c6a *R/cophenetic.phylo.R 13f2dac84d7b8a1da3df0e0c11b4ab1c *R/cophyloplot.R d136bcb6eac7ea91811b64b3a4cf7572 *R/corphylo.R 1c3460b48ed1e772c40e5993cd663d43 *R/dbd.R 3822f0bb0a9ed4c8c19654e86ef34359 *R/def.R 93480a5b64e0d37f5450891629557615 *R/delta.plot.R dfd5bb35f1cb1fd9154d023e0e4cfc2b *R/dist.gene.R e846fb2b9885d020556fd91ba9953614 *R/dist.topo.R b28ced504fedeb7f991f7eba10ad06df *R/diversi.gof.R 8b2ec4004022afdc7e2cb42f2657b628 *R/diversi.time.R 40050a0e0971396dd13e9e4ab81b8115 *R/drop.tip.R cfe4e065d023985ed139143e87b763b1 *R/evonet.R fceafc86fae624fd9037403ad301d35a *R/ewLasso.R 9a8b2b1b7641fecfd58aafa4f0b2f5b2 *R/extract.popsize.R 5c29d3ee785da587f4ad5288ec36b76a *R/gammaStat.R 1a809a99fc3e09b43b9e68028bd56996 *R/howmanytrees.R 68d848281455c4c91e9b91f16170e2f7 *R/identify.phylo.R e3832873f0592f2e9a376a1c29e67093 *R/is.binary.tree.R 62aff88f7f01f382ec0bf82577a54ffa *R/is.compatible.R 20716d4216cea771efd752bd5a49e7b4 *R/is.monophyletic.R a7bd37de10eb3f5c02096dfb9a492307 *R/is.ultrametric.R 6c906b61a1e654e6699979f7f08656d0 *R/ladderize.R c6536c739750042a7c3b6bf34b6fef71 *R/lmorigin.R 8b04202ee3e37522317935a9aa18b875 *R/ltt.plot.R b4f6a07ce62b4ce96bf969874ec7f4c0 *R/makeLabel.R 0b9954800f2733a846a7765aee26fd41 *R/makeNodeLabel.R ddb96055d6308eb4d656f815aac13a02 *R/mantel.test.R d2c16632492bfafd2ee18f2fe3d3d64a *R/matexpo.R 9cceb0fd970de6764deeea78c14f76b6 *R/mcmc.popsize.R 65290ac9653c1fd777ef889cd1d92b0e *R/me.R 9f58cc8e04d340f201ed3ab729a1e647 *R/mrca.R eac60117b8225071d992999cf64965d1 *R/mst.R 0fa04f6f06673eee17d1ebe5e75340eb *R/multi2di.R 0850fdd19c01d37ac632fc308632a463 *R/mvr.R ffc4b4e4586d6ffde3a995e4852b65ef *R/nj.R c51e9e25c8074f3fa765371771e0c0e3 *R/njs.R e7e034c589d3ec7517533e831c8ff8be *R/node.dating.R a7ddef8e10025f9922a6528057ed9799 *R/nodelabels.R ae2aeb0e8aef7f8d4b19939ca61b3482 *R/nodepath.R 56650c63704a0b04e1d522751bece3c6 *R/parafit.R dd95923aac8ab849dd128104355529ce *R/pcoa.R c2357dbf0ca7198c58d97e08ad3d16c4 *R/phydataplot.R e71db002c66c277bfb57f6914ca143d4 *R/phymltest.R 615818acb88bc5c6d232b8b052506176 *R/pic.R 4d3a78fe87c663440dab48d266a66d74 *R/plot.phylo.R 6c744c311f1bc32ff85d639b75ec958f *R/plot.phyloExtra.R 5c12c911fc422f8f8d8e160ef79f312d *R/plot.popsize.R dd009da9120a99c30ca4169b61e5d195 *R/plotPhyloCoor.R 020684fdb44028c4858bb0d2eb5d3848 *R/print.lmorigin.R d0e8bd41d5acc217fdee3578adcf635b *R/print.parafit.R 756e90b3b32ddc0b07b2ad8dbe43d5aa *R/rTrait.R 110836450d6ce9c36c0a086b2730b8df *R/read.GenBank.R b13dfb8f455b1c9e74a364085f72dbce *R/read.caic.R 0a94085734afd8c8144969745e63e9a2 *R/read.dna.R 23f0c2804b21bc58ca30284f1c920bae *R/read.gff.R b483ce78d4918b0364121dad3235cfb3 *R/read.nexus.R f32f0efa7f1831327407314d5551223d *R/read.nexus.data.R 8b445387c7c86013b96cd1f9a574cddc *R/read.tree.R 0d5a267466528603ac433a8a52f7ca11 *R/reconstruct.R 14ed53981b43d3c24b44b737d0877bef *R/reorder.phylo.R d74ea5a3fd628874c76f4abd63fc59f1 *R/root.R f584366b32e7414c669714ba5b84951b *R/rotate.R d5abeb2b5165f3d9613392d0c8f03802 *R/rtree.R 4f1c0fa161136e96ba30bef34b6787ac *R/rtt.R 74b3c054294129b6a5618a0548d3f1a8 *R/scales.R c73717df856fd5c5d575b28e71071324 *R/skyline.R 71bb3f2dcbb807ace238a6ce2af25fe0 *R/skylineplot.R 9c7b02a4625099f715700fb868226b0f *R/speciesTree.R 97c4c3d7cb1606fe6d5519d817156bde *R/subtreeplot.R bcc8f1fc8363728caba82129412d9e31 *R/subtrees.R dea05c19df9a03075f75bbe49cc1bb76 *R/summary.phylo.R 8fbd1589f5d98d76b1154cffb8d4d1f5 *R/treePop.R b5081fca8758fe4458183c3e25e3e661 *R/triangMtd.R aca0e4f1c8aae416e5941adaaf47ba64 *R/unique.multiPhylo.R 732508f059efe8a61dec606f8735337c *R/varcomp.R a40ae9ad30c221d4ed14b90e8b406f93 *R/vcv.phylo.R 31b3bb1feed474692f07fcebe3a61ac7 *R/vcv2phylo.R c9adce0f3d0120434ca22f2bb2a0b0c8 *R/which.edge.R a091953ea941768033d85494e6457691 *R/write.dna.R a6660649caf2e2a28dfbc813f24efc3c *R/write.nexus.R a6d3d8d0fb844c0670d5674a1def2bd0 *R/write.nexus.data.R 9c8dd90c5e633a1b97023863f47efd1e *R/write.phyloXML.R 8393ff7c6b124798909b97e60a9af1b2 *R/write.tree.R 774ce72875903259aade5344f9a70aa4 *R/yule.R c8d3aa3fe64e75e61af07a1b11c74f3f *R/yule.time.R 1eb44ff9e5a036eb845faa1598ce5009 *R/zoom.R 3387c0d0c1f913f8471e1bb34bd2e516 *R/zzz.R 86cc44c74ad6bc4c8f4e623248e99c6c *README.md 76161b65639451c966be75488458b3c3 *build/partial.rdb c99acb8a85df550093b4d4721b5d4dc6 *build/vignette.rds db9083e8750aff839d5ebf3ed982f1f1 *data/HP.links.rda 9d9f9232839665422709ded1e541d038 *data/bird.families.rda a14a6df0f3a735ebc056065077788c90 *data/bird.orders.rda 100769335d485fc11e54b766753a9204 *data/carnivora.csv.gz 4eaf8cbaefa2e8f8d395a9b482ee9967 *data/chiroptera.rda 1c74c3b99d08b0e17eea3ec1065c12d2 *data/cynipids.rda 7fe760c2f3b4deba0554aae6138cb602 *data/gopher.D.rda a50de1a68c246297839c26d592191504 *data/hivtree.newick.rda 8d14f95319d0a5cdc8faa60a1d0085ce *data/hivtree.table.txt.gz 31be81fe3faca11f98d3e74c090bc59e *data/lice.D.rda 38edbd84a0a067322c40db8d71fb1289 *data/lmorigin.ex1.rda e3ce9e3444182fea2e65df2e150ea0db *data/lmorigin.ex2.rda ce7a56faebdf286fdf5ba6c8c3699a79 *data/mat3.RData e2d1339025ed901009bfed58dc6505ff *data/mat5M3ID.RData 101d0ab2e981b0987cde704a2dee1d8d *data/mat5Mrand.RData 39e4fece2bdc527d7a9d4d83d023a947 *data/woodmouse.rda a20dff855d5d35b8072413bd3c5fd126 *inst/CITATION 44e0e7e0eb4c9edcdbe731d15ad245d1 *inst/doc/DrawingPhylogenies.R 9d6935787463e165da55172f2d380fca *inst/doc/DrawingPhylogenies.Rnw 452ffd2d69a0cad8d0d312b8c61b10df *inst/doc/DrawingPhylogenies.pdf ae58e3a830ca118636ac7f12cee14cdd *inst/doc/MoranI.R dcd55bda01ae5c227823285f250a5adf *inst/doc/MoranI.Rnw 35de25b03fc3f54930ccaccfdb87fc2f *inst/doc/MoranI.pdf 6db8a8dd3ff8d7bb64ecb15b3d7e4ac3 *inst/doc/RandomTopologies.R 8b4fc32c71458880a760eb9cc4e018fc *inst/doc/RandomTopologies.Rnw 5fcc49f193a4c4e31a6207d43648e498 *inst/doc/RandomTopologies.pdf 96c03e13b985ac39c71a6558fda39aed *man/AAbin.Rd 7f7b284190c3b52358bd330ea58cdf8e *man/CADM.global.Rd c42fdaf2e7435a197fc9ab1a5824c67a *man/DNAbin.Rd d94f358593695b1713840df5a8c000ba *man/DNAbin2indel.Rd 45e47a857e1cba34b898ddd6e846510d *man/Initialize.corPhyl.Rd ab4685ee7e884b03d622fcf841f5da08 *man/LTT.Rd ab69dfa02f245501d05550e64e6a90a5 *man/MPR.Rd 1c59c53b21d33352ff2d45c97af02e1b *man/MoranI.Rd c7866d0fc32be91d9cfb6ab3e3fc1e1a *man/SDM.Rd bebccfd8695268f20d906e2a2c30706a *man/ace.Rd 1e45e292cea5a4f0154ffeafd67fd832 *man/add.scale.bar.Rd 78bc6e463d7708061f49312b6e412886 *man/additive.Rd 25a2859708b9a281a0d682e4376f3f53 *man/alex.Rd 2ff5d30c6fb1c5458643f1b3c09e76da *man/all.equal.DNAbin.Rd d69fcc8e2e02aff7cdd0decbbd892e40 *man/all.equal.phylo.Rd b9e6f622b239d6ef2614285f3a343ee3 *man/alview.Rd 46dcfadd6545d4a795833202c01b17ff *man/ape-internal.Rd a7274516292fbd249ef8e04d0b2a6c90 *man/ape-package.Rd f52b5e2d34295171a6da2359738c99d5 *man/apetools.Rd 5bba4ae4bfc66b613855cfc182d9b1bc *man/as.alignment.Rd fda9f1e51ff562e28dc79b6c0780ec1a *man/as.bitsplits.Rd 4f014cf2923e2eab6188acd48e8096fa *man/as.matching.Rd 84b636fd9928b77de91504121216da61 *man/as.phylo.Rd 697944c0dca01d11817401e1a57049d2 *man/as.phylo.formula.Rd b485c9432402006594c349a0e5929ec4 *man/axisPhylo.Rd ad514b163e70bfbc11dfd34a450799f8 *man/balance.Rd 1f7aab487d391940a07950332696915e *man/base.freq.Rd 524a1163eac56d1fc7f65bcd6c74a8d0 *man/bd.ext.Rd 54c4abc40d52ae2ac7b7cd2ffe9ca380 *man/bd.time.Rd f929bc1b6391c57a6b0099c4561fd7be *man/binaryPGLMM.Rd e999ac6ef275c168aad217dfec402266 *man/bind.tree.Rd 822558d4f7ee04257b2c903c53ec4344 *man/bionj.Rd 923169087dca05f046cebcac8946e136 *man/bird.families.Rd 0e41770e1e6d0b8d90c4cf51049213cb *man/bird.orders.Rd ef1c15d5d93410c21179997431112209 *man/birthdeath.Rd ad8fb2e5cdb596adbe5b90bc5c45ce12 *man/boot.phylo.Rd 5a64b90d3a6c7a8204946b00f45f4cfc *man/branching.times.Rd 99ffa532ab4397c374eaddd0f2ff8469 *man/c.phylo.Rd 6ddcfd41f33b85334a04f2c252a2f561 *man/carnivora.Rd a35edcfb2299e68b6378961c0755c4ea *man/checkAlignment.Rd c3f19bb492f50bfb1b4f1928a4671abd *man/checkLabel.Rd 5ff8c7e8fad519d978f166948c03059c *man/checkValidPhylo.Rd 64c3996ca6bcc97d0d2e2cf3361f8f71 *man/cherry.Rd 8745774298f92d8415b508c990976fa2 *man/chiroptera.Rd c68be7ff5a08664691df3e2813468490 *man/chronoMPL.Rd c1f01c6200b2f1e2901d45d40daae404 *man/chronopl.Rd 4c8dddb7a9a1c6a0c945c37ef7e40f31 *man/chronos.Rd 8d0fce8a8546e5cc2344ba24d527a791 *man/clustal.Rd 866af6e8d769b3d6972ef8e1ac849a12 *man/coalescent.intervals.Rd b114a09e0cb474323d5398ec4ee83d3c *man/collapse.singles.Rd bff5a7826f5a39767601e32ceb776247 *man/collapsed.intervals.Rd 683bf7d71e50f4ec5cd921073291eddd *man/compar.cheverud.Rd 4d8ee141d7b6b323ef5ee9446000ae32 *man/compar.gee.Rd 84078c1644fb841b9ed5ac7d17cba59b *man/compar.lynch.Rd 8b079bc165c375f823c40074ad9106c6 *man/compar.ou.Rd a49d26747890cef3b75bcc46b0b8b40b *man/comparePhylo.Rd a1d0980f32039aa2ca0a0b1b7a5591f8 *man/compute.brlen.Rd dbb4b5b1d5136bd32660699d9d4cc197 *man/compute.brtime.Rd 2673588811016427759ca9a1ef56c3e9 *man/consensus.Rd cad78cf5a7c43d4aa02bd2506440b8ce *man/cophenetic.phylo.Rd 1ca9ec0cb824468adc03884c940c7aa3 *man/cophyloplot.Rd 643275d7997dfcd82b1ff08057b3f1a7 *man/corBlomberg.Rd 445d8651a1977bb4831df4661a43e3bf *man/corBrownian.Rd 65d2063e328fb466d6126bbebec7d061 *man/corClasses.Rd dfff1f03f7cc26a06b49284bf223aae1 *man/corGrafen.Rd c07362b1c46059f78e603af68add6165 *man/corMartins.Rd 10242491bb412743df6756d544399c4c *man/corPagel.Rd e259ee771509883d3afe8eafd41e9cb1 *man/corphylo.Rd 0a61099bc0490d00f6622d049c54f6d7 *man/correlogram.formula.Rd c199605f9d353b303acad4896f9b39a5 *man/cynipids.Rd 34164e368efd0d5d961fe62e9ede75e8 *man/dbd.Rd c0763a70c4965a6b03df3e5be68e450d *man/def.Rd 207a7b94141275474c4b2f5e51c09511 *man/degree.Rd f9bb6cf986443f16c3caba084e337580 *man/del.gaps.Rd fb449c7b81f8abbc5eaacaa1ce7b77ea *man/delta.plot.Rd 960375a462ce93be3b5a07d14523279f *man/dist.dna.Rd 38011e81d28a120d88eead09e62c154a *man/dist.gene.Rd 3ed534c12cc724155bb9582767db27ad *man/dist.topo.Rd c7cc398115be066740ca4fb037394727 *man/diversi.gof.Rd d646ea0343999bd0e38e86dcf6c12018 *man/diversi.time.Rd da8898476bb15b627b34ee1093b9aeb4 *man/diversity.contrast.test.Rd cf3e0f6cbbc15a6a213b247e01961a64 *man/dnds.Rd 4f95c0e85f211245cc81bb0425fa5e8d *man/drop.tip.Rd 66537062a3d706b200f57791daa86102 *man/edges.Rd 3eb82a23c0ece8a095c8f472736f33a1 *man/evonet.Rd 9697317017dd41a68961522fb26b6222 *man/ewLasso.Rd b81094916d0dda1483e31da6681f3bc2 *man/fastme.Rd eea313e8ee32597b4cec120d23113642 *man/gammaStat.Rd f8edffddff0582a7ea357652a1af7aa1 *man/getAnnotationsGenBank.Rd ccf012737965d0de0b4ca2dd9367d9a6 *man/hivtree.Rd 942b38a17ab6ecfa126118370f59bdf6 *man/howmanytrees.Rd 86c49d080fdffd614d8056021e91cc55 *man/identify.phylo.Rd 45b9403b6c839a7566b42e216565fbe8 *man/image.DNAbin.Rd ce6b1d4cabb4e6f246b147e8ab520435 *man/is.binary.tree.Rd 18329c0c629c76de2bd4823aefecdfb5 *man/is.compatible.Rd d2de8fd9549ef01a1dddeb726dd77fcf *man/is.monophyletic.Rd ad9e7316219c3238b44b02d55c44b4d3 *man/is.ultrametric.Rd 17917c9fed877d1c9b2776e56c46e191 *man/kronoviz.Rd 4d8fee8d142528834332038e49bd2e65 *man/label2table.Rd 2afa6305e48e4c47a7d94d46401f85a3 *man/ladderize.Rd 472dc874a45f046c8f6ac7b394b6552f *man/latag2n.Rd 4b86460e4e5d993abf13c99b9744dbd6 *man/lmorigin.Rd e511f2fff7d9cf6dda5598255f9fd356 *man/ltt.plot.Rd 50eb320c835587c23f5a4a871f9fd8a9 *man/makeLabel.Rd 0bdee0c2fb881b653847a57a7ae2ee7d *man/makeNodeLabel.Rd 12e661774a33b431717195cf652ebe4c *man/mantel.test.Rd 97cf5ddb9352b0545ed225d16d750ffb *man/mat3.Rd f56f6f49c89c6bc850a7aee1bce5e0bd *man/mat5M3ID.Rd 0d8eb60696c80de3cc9cc85ba66373a5 *man/mat5Mrand.Rd 69ae0cb181240bb8ec168e69f1ba44bb *man/matexpo.Rd 63b52766bbe7ad0bd31aa5f04026b21a *man/mcconwaysims.test.Rd 58c13c9bb88f6d36eafeb62619cd970e *man/mcmc.popsize.Rd 5c5b3f307a46d0739d3979693ebb8db5 *man/mixedFontLabel.Rd ab32c16c1290a7a5057d743951f041b0 *man/mrca.Rd e43c99975336bdf600dcdf0bc93cf4e9 *man/mst.Rd 4eb505aab5154fc9bf729a058ec08a72 *man/multi2di.Rd 23a1f4459c13c5d9652816b71d5325c7 *man/multiphylo.Rd 24113582bd3f1363a2910778b0f0056c *man/mvr.Rd 3df9e16b8a09df3f1dba5c4327a635fc *man/nj.Rd b1db2e34c13cc343e4ecdd7ae2ca31aa *man/njs.Rd 66ab55c2ecf8de706b585863a9f20d74 *man/node.dating.Rd a589b4cc04505185dc9ef1817c7ae304 *man/node.depth.Rd 856927a8a59da8bffc5b7c71b0246cdc *man/nodelabels.Rd 447ae03684ff56a4a24932aec182acf3 *man/nodepath.Rd c2e2f35f4e233265c86b7967ec2d0630 *man/parafit.Rd 6714815a437ddde9e4c155595afe9276 *man/pcoa.Rd 0089077e3be11d37650331437e4a8a48 *man/phydataplot.Rd b7aa7ad34185b64fb4eb7ef717c82739 *man/phymltest.Rd 9c769e37126330e8243610d21300f9a8 *man/pic.Rd 0363aa3fa3e86160934e2359c8ac5323 *man/pic.ortho.Rd 265527313d479d3625df7680c7479cd1 *man/plot.correlogram.Rd 25b7c4ba15c7f9daa89f435268207ba4 *man/plot.phylo.Rd 896198fa3a0ce916346c8b507bf707bf *man/plot.phyloExtra.Rd d39058b543c60b3f2a2c22cd88325bb6 *man/plot.varcomp.Rd c6ff668831b791540e543df87d952ad8 *man/plotTreeTime.Rd b24438c42cea969302ec6ba61002426e *man/print.phylo.Rd 33969ca9cf96574407bb764b3c7c5c10 *man/rDNAbin.Rd 34d2d5a79ad69be395a5325d19c78fd9 *man/rTraitCont.Rd 40d93ec990611e624d5e1241118474ba *man/rTraitDisc.Rd 81f756fdf2ec4c968095595dece8338f *man/rTraitMult.Rd 9110df8f3f700c6ba643ebe58d93c94a *man/read.GenBank.Rd 1317e2519ff9b7c61b7e9c21c3b0b5bb *man/read.caic.Rd 9f7aeb56bf1a53a28823fbad943bfe51 *man/read.dna.Rd 4bd8a114f7ab4b9ffffd7fd4d1799b36 *man/read.gff.Rd fdc2393e70328ae27034c54bf8a847c7 *man/read.nexus.Rd 1ff6904a6d77fdb1a515cd4c04d4220e *man/read.nexus.data.Rd d102b3df70bd7a5f764bf2de665025c8 *man/read.tree.Rd e9c081ee9bb174f530be5cab9ec954e5 *man/reconstruct.Rd 29fd18c1d0729228307b82dc3ef70ede *man/reorder.phylo.Rd 23cb928f62f9c7103244fbd752ff9a81 *man/richness.yule.test.Rd 272d66917985780ed4ba250087765d7d *man/rlineage.Rd 60547251bffd73499f63c4de6b84a4cf *man/root.Rd 6b97ea2fd96bf948fca764deab3a6e76 *man/rotate.Rd 5d5f04688b95ccfa0f6c611f19c701d6 *man/rtree.Rd 2d8fe8b90578d7f9070a70032acf44b9 *man/rtt.Rd 2e343e95a46c16a34fe7a80fe0b6df7f *man/seg.sites.Rd f125fe172ee83b8bb4adaede4b4d3b43 *man/skyline.Rd bf851aa61b6310afa2ae1358c189dad7 *man/skylineplot.Rd e19ab3b2473f70bd47533b321bed84fa *man/slowinskiguyer.test.Rd 3e324bedd9f05850b904bf9dc66cbc24 *man/solveAmbiguousBases.Rd 201f06e5f36495dda446d523dbabf354 *man/speciesTree.Rd e6de61e19d1053602ee6a0123ba732b3 *man/stree.Rd 1f1309e2ec6327974952d546241a6024 *man/subtreeplot.Rd ef8aa9721275f20ddd38612cddf8300c *man/subtrees.Rd d684cc640acf041c6f1124e08426c8c8 *man/summary.phylo.Rd 587737eccd2f0e06192977e079d11645 *man/trans.Rd b6c871959a20eb198c02267c9ce7c74e *man/treePop.Rd 7749863f505076f2288c106eb465805e *man/trex.Rd 8d5cf8c4cc27211d87bbed954ca50bbb *man/triangMtd.Rd 9e7d047e16ff821f3b7254b92d027bf0 *man/unique.multiPhylo.Rd e0367dac321ec52f8c962416fb57305d *man/updateLabel.Rd 3fc83bd5ac2be01f581d0aa5a1038b80 *man/varCompPhylip.Rd 8be1d0b6dd12f066000dc70d31c9d831 *man/varcomp.Rd 5c459720196654a10cfff0390bffc14f *man/vcv.phylo.Rd f7ce7760e913c10f1cb7be05315b39fc *man/vcv2phylo.Rd a5d3cbf19df84d0f1e4e3a4650945cbf *man/weight.taxo.Rd f4f11cb67f304e5de80d4768508dd765 *man/where.Rd ef9658f5343bdcbfc3673c7f936934f5 *man/which.edge.Rd 9b83a148295dcfcb3994fdb6eaab0174 *man/woodmouse.Rd a0f59b5f53c0e8f5d45380cbaf71fb58 *man/write.dna.Rd 5863d8e9f91a9f8c5723aee5f69bb3e6 *man/write.nexus.Rd 9813a7c7a95bf532ffa1297d8369457c *man/write.nexus.data.Rd 68c0d139b9f014602ba370bd27933710 *man/write.phyloXML.Rd a549d6c95935c86edecbad3beac8aa1a *man/write.tree.Rd 2568c6529e40fae7effe88b6c73998a1 *man/yule.Rd 7df89ac6996c82b52c4c9b3734807de1 *man/yule.cov.Rd 8612123f3617699a8e29ddf0541dc9ee *man/yule.time.Rd a00006ae345bb9379312e81694de3885 *man/zoom.Rd 225b45505001f9311e1d76f3a880cd70 *src/BIONJ.c 2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars c948a9234ab872e4c076390eda80c465 *src/NNI.c e3213d06c9d8cfe2deec76bc43d7eace *src/RcppExports.cpp 122ba51b574f7f7acd5ea4b6256cea5f *src/SPR.c 9733c82cd67c4bd41aea44981f4ac8b8 *src/additive.c 075000b9bbb05d4d5dadb98062d38d93 *src/ape.c 7eaffd3d85df9a1e4460905e9ca5eced *src/ape.h 19b564aab62c464c092cdf2f0d5cf447 *src/bNNI.c 8e092de02c58cce3c85dc944665f1f03 *src/bionjs.c 1873b242cd0926effe77daca9a8e9a63 *src/bipartition.c 88dc54af812e1a381e81bea4b635fd9e *src/bitsplits.c 81e4ad60b418966cdd09e608e074bb28 *src/delta_plot.c d044c562d8637b2e61f73b23c7500955 *src/dist_dna.c 0a8b6db23d398b9ff13ecb4cede88a58 *src/dist_nodes.c 005ab69356525fbbc1b69950341308c2 *src/ewLasso.c afa27403972145d083d7f7f8b2536b98 *src/heap.c a284dc0d46dd1b091f725aa7c5fe0bf1 *src/mat_expo.c 6a0e35109dbc4fda76817d85e7f65b28 *src/me.c b2232146644d748cdd68153769619d5b *src/me.h cc4c695b4b001305b83658df8075850e *src/me_balanced.c cf2bdcf9eda24eb6b572584c0ab79fb4 *src/me_ols.c 8f25a7d4686c85b25862941c023e5974 *src/mvr.c f9e2d2d972065aa012820f7e191fa97d *src/mvrs.c 3b9a19c41ef0e1a63dd88bc72a5096b0 *src/nj.c c027ccb515c9f76c10ae390e60039a40 *src/njs.c 72e310102d7db22ae9b6794d153889ac *src/pic.c 9643db2c211ee54a978ac50f3909cd43 *src/plot_phylo.c e5d31418f365d5a7c2c243cb78d30a3b *src/prop_part.cpp aa8d9966da3b7e970879a49a40c05a07 *src/rTrait.c cea461f86f87000ed51808282d799600 *src/read_dna.c 295aeaaa825f6f403fb9d1aa94755c7e *src/reorder_Rcpp.cpp 77c49127a8c5b23efcfe90a0b29bd7ba *src/reorder_phylo.c c73f78b21f3cada4ebb5035ac866d0f3 *src/treePop.c 731a1b013f6ccf4ca261a8875da53d32 *src/tree_build.c 42c108c79f26d1940f94b98fcbe271e1 *src/tree_phylo.c b6b0929c7faa1b76a8898f12907aabd5 *src/triangMtd.c cca4da03778dfa7c87fa6bfb97aac13c *src/triangMtds.c 72e04107c752568641219bf57b5731a8 *src/ultrametric.c 9d6935787463e165da55172f2d380fca *vignettes/DrawingPhylogenies.Rnw dcd55bda01ae5c227823285f250a5adf *vignettes/MoranI.Rnw 8b4fc32c71458880a760eb9cc4e018fc *vignettes/RandomTopologies.Rnw 4fce0bc02d12ec1600c49de9c27d85ba *vignettes/ape.bib 31968dd62224a9e5ac7f9969af87f9f0 *vignettes/ape.sty ape/R/0000755000176200001440000000000014726075215011231 5ustar liggesusersape/R/root.R0000644000176200001440000003052514547653003012342 0ustar liggesusers## root.R (2024-11-01) ## Roots Phylogenetic Trees ## Copyright 2004-2024 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.rooted <- function(phy) UseMethod("is.rooted") .is.rooted_ape <- function(phy, ntips) { if (!is.null(phy$root.edge)) return(TRUE) tabulate(phy$edge[, 1])[ntips + 1] <= 2 } is.rooted.phylo <- function (phy) .is.rooted_ape(phy, length(phy$tip.label)) is.rooted.multiPhylo <- function(phy) { phy <- unclass(phy) labs <- attr(phy, "TipLabel") if (is.null(labs)) sapply(phy, is.rooted.phylo) else sapply(phy, .is.rooted_ape, ntips = length(labs)) } unroot <- function(phy, ...) UseMethod("unroot") .unroot_ape <- function(phy, n, collapse.singles = FALSE, keep.root.edge = FALSE) { if (collapse.singles) phy <- collapse.singles(phy) ## n: number of tips of phy N <- dim(phy$edge)[1] if (N < 3) stop("cannot unroot a tree with less than three edges.") dgr <- tabulate(phy$edge, n + phy$Nnode) if (all(dgr < 3)) stop("cannot unroot a tree where all nodes are singleton") ## delete first the root.edge (in case this is sufficient to ## unroot the tree, i.e. there is a multichotomy at the root) if (is.null(phy$root.edge)) { keep.root.edge <- FALSE } else { if (!keep.root.edge) phy$root.edge <- NULL } if (!.is.rooted_ape(phy, n)) return(phy) wbl <- !is.null(phy$edge.length) ROOT <- n + 1L basal <- dgr[ROOT] == 1 if (keep.root.edge) basal <- FALSE if (keep.root.edge) { ## add a terminal edge nds <- phy$edge > n ## increment node #s since we add a tip phy$edge[nds] <- phy$edge[nds] + 1L ROOT <- ROOT + 1L phy$edge <- rbind(phy$edge, c(ROOT, n + 1L)) if (wbl) phy$edge.length <- c(phy$edge.length, phy$root.edge) phy$root.edge <- NULL phy$tip.label <- c(phy$tip.label, "[ROOT]") n <- n + 1L N <- N + 1L } else { if (basal) { ## make the most basal edge terminal i <- which(phy$edge[, 1L] == ROOT) NEWROOT <- phy$edge[i, 2L] ROOT <- ROOT + 1L phy$edge[phy$edge == NEWROOT] <- ROOT n <- n + 1L phy$edge[i, 1L] <- ROOT phy$edge[i, 2L] <- n if (!is.null(phy$node.label)) { newlab <- phy$node.label[1L] phy$node.label <- phy$node.label[-1L] } else { newlab <- "[ROOT]" } phy$tip.label <- c(phy$tip.label, newlab) phy$Nnode <- phy$Nnode - 1L } } ### EDGEROOT[1]: the edge to be deleted ### EDGEROOT[2]: the target where to stick the edge to be deleted ### If the tree is in pruningwise (or postorder) order, then ### the last two edges are those connected to the root; the node ### situated in phy$edge[N - 2L, 1L] will be the new root... ophy <- attr(phy, "order") if (is.null(ophy) || !(ophy %in% c("cladewise", "postorder", "pruningwise", # fixed by KS "preorder"))) { # "preorder" is in TreeTools phy <- .reorder_ape(phy, "cladewise", FALSE, as.integer(n), 1L) ophy <- attr(phy, "order") } if (ophy != "cladewise") { NEWROOT <- phy$edge[N - 2L, 1L] EDGEROOT <- c(N, N - 1L) ## make sure EDGEROOT is ordered as described above: if (phy$edge[EDGEROOT[1L], 2L] != NEWROOT) EDGEROOT <- EDGEROOT[2:1] } else { ### ... otherwise, we remove one of the edges coming from ### the root, and eventually adding the branch length to ### the other one also coming from the root. ### In all cases, the node deleted is the 2nd one (numbered ### nb.tip+2 in 'edge'), so we simply need to renumber the ### nodes by adding 1, except the root (this remains the ### origin of the tree). EDGEROOT <- which(phy$edge[, 1L] == ROOT) ## make sure EDGEROOT is ordered as described above: if (phy$edge[EDGEROOT[1L], 2L] <= n) EDGEROOT <- EDGEROOT[2:1] NEWROOT <- phy$edge[EDGEROOT[1L], 2L] } phy$edge <- phy$edge[-EDGEROOT[1L], ] s <- phy$edge == NEWROOT # renumber the new root phy$edge[s] <- ROOT s <- phy$edge > NEWROOT # renumber all nodes greater than the new root phy$edge[s] <- phy$edge[s] - 1L if (wbl) { phy$edge.length[EDGEROOT[2L]] <- phy$edge.length[EDGEROOT[2L]] + phy$edge.length[EDGEROOT[1L]] phy$edge.length <- phy$edge.length[-EDGEROOT[1L]] } phy$Nnode <- phy$Nnode - 1L if (!is.null(phy$node.label)) { if (NEWROOT == n + 2L) phy$node.label <- phy$node.label[-1] else { lbs <- phy$node.label tmp <- lbs[NEWROOT - n] lbs <- lbs[-c(1, NEWROOT - n)] # fix by KS (2019-06-18) phy$node.label <- c(tmp, lbs) } } phy } unroot.phylo <- function(phy, collapse.singles = FALSE, keep.root.edge = FALSE, ...) .unroot_ape(phy, length(phy$tip.label), collapse.singles = collapse.singles, keep.root.edge = keep.root.edge) unroot.multiPhylo <- function(phy, collapse.singles = FALSE, keep.root.edge = FALSE, ...) { oc <- oldClass(phy) class(phy) <- NULL labs <- attr(phy, "TipLabel") if (is.null(labs)) phy <- lapply(phy, unroot.phylo, collapse.singles = collapse.singles, keep.root.edge = keep.root.edge) else { phy <- lapply(phy, .unroot_ape, n = length(labs), collapse.singles = collapse.singles, keep.root.edge = keep.root.edge) attr(phy, "TipLabel") <- labs } class(phy) <- oc phy } root <- function(phy, ...) UseMethod("root") root.phylo <- function(phy, outgroup, node = NULL, resolve.root = FALSE, interactive = FALSE, edgelabel = FALSE, ...) { if (!inherits(phy, "phylo")) stop('object not of class "phylo"') phy <- reorder(phy) n <- length(phy$tip.label) ROOT <- n + 1L if (interactive) { node <- identify(phy)$nodes cat("You have set resolve.root =", resolve.root, "\n") } ## added to solve some issues (2021-04-15): if (!interactive && is.null(node) && length(outgroup) > 1 && resolve.root) phy <- unroot(phy) ## -> the condition check should insure compatibility e1 <- phy$edge[, 1L] e2 <- phy$edge[, 2L] wbl <- !is.null(phy$edge.length) if (!is.null(node)) { if (node <= n) stop("incorrect node#: should be greater than the number of taxa") outgroup <- NULL newroot <- node } else { if (is.numeric(outgroup)) { if (any(outgroup > n)) stop("incorrect taxa#: should not be greater than the number of taxa") } if (is.character(outgroup)) { outgroup <- match(outgroup, phy$tip.label) if (anyNA(outgroup)) stop("specified outgroup not in labels of the tree") } if (length(outgroup) == n) return(phy) outgroup <- sort(outgroup) # used below ## First check that the outgroup is monophyletic, unless it has only one tip if (length(outgroup) > 1) { pp <- prop.part(phy) ingroup <- (1:n)[-outgroup] newroot <- 0L for (i in 2:phy$Nnode) { if (identical(pp[[i]], ingroup)) { ## inverted with the next if (... (2013-06-16) newroot <- e1[which(e2 == i + n)] break } if (identical(pp[[i]], outgroup)) { newroot <- i + n break } } if (!newroot) stop("the specified outgroup is not monophyletic") MRCA.outgroup <- i + n } else newroot <- e1[which(e2 == outgroup)] } N <- Nedge(phy) oldNnode <- phy$Nnode Nclade <- tabulate(e1)[ROOT] # degree of the root node ## if only 2 edges connect to the root, we have to fuse them: fuseRoot <- Nclade == 2 if (newroot == ROOT) { if (!resolve.root) return(phy) # else (resolve.root == TRUE) if (length(outgroup) > 1) outgroup <- MRCA.outgroup if (!is.null(node)) stop("ambiguous resolution of the root node: please specify an explicit outgroup") k <- which(e1 == ROOT) # find the basal edges if (length(k) > 2) { i <- which(e2 == outgroup) # outgroup is always of length 1 here j <- k[k != i] newnod <- oldNnode + n + 1L phy$edge[j, 1] <- newnod phy$edge <- rbind(c(ROOT, newnod), phy$edge) if (wbl) phy$edge.length <- c(0, phy$edge.length) phy$Nnode <- phy$Nnode + 1L } } else { phy$root.edge <- NULL # just in case INV <- logical(N) w <- which(e2 == newroot) anc <- e1[w] i <- w nod <- anc if (nod != ROOT) { INV[w] <- TRUE i <- w - 1L repeat { if (e2[i] == nod) { if (e1[i] == ROOT) break INV[i] <- TRUE nod <- e1[i] } i <- i - 1L } } ## we keep the edge leading to the old root if needed: if (!fuseRoot) INV[i] <- TRUE ## bind the other clades... if (fuseRoot) { # do we have to fuse the two basal edges? k <- which(e1 == ROOT) k <- if (k[2] > w) k[2] else k[1] phy$edge[k, 1] <- phy$edge[i, 2] if (wbl) phy$edge.length[k] <- phy$edge.length[k] + phy$edge.length[i] } if (fuseRoot) phy$Nnode <- oldNnode - 1L ## added after discussion with Jaime Huerta Cepas (2016-07-30): if (edgelabel) { phy$node.label[e1[INV] - n] <- phy$node.label[e2[INV] - n] phy$node.label[newroot - n] <- "" } phy$edge[INV, ] <- phy$edge[INV, 2:1] if (fuseRoot) { phy$edge <- phy$edge[-i, ] if (wbl) phy$edge.length <- phy$edge.length[-i] N <- N - 1L } if (resolve.root) { newnod <- oldNnode + n + 1L if (length(outgroup) == 1L) { wh <- which(phy$edge[, 2] == outgroup) #phy$edge[1] <- newnod k <- which(phy$edge[, 1] == newroot) # wh should be among k phy$edge[k[k != wh], 1] <- newnod o <- c((1:N)[-wh], wh) phy$edge <- rbind(c(newroot, newnod), phy$edge[o, ]) if (wbl) phy$edge.length <- c(0, phy$edge.length[o]) } else { wh <- which(phy$edge[, 1] == newroot) phy$edge[wh[-1], 1] <- newnod s1 <- 1:(wh[2] - 1) s2 <- wh[2]:N phy$edge <- rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ]) if (wbl) phy$edge.length <- c(phy$edge.length[s1], 0, phy$edge.length[s2]) } phy$Nnode <- phy$Nnode + 1L } } ## The block below renumbers the nodes so that they conform ## to the "phylo" format newNb <- integer(n + phy$Nnode) newNb[newroot] <- n + 1L sndcol <- phy$edge[, 2] > n newNb[sort(phy$edge[sndcol, 2])] <- n + 2:phy$Nnode phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] phy$edge[, 1] <- newNb[phy$edge[, 1]] if (!is.null(phy$node.label)) { newNb <- newNb[-(1:n)] if (fuseRoot) { newNb <- newNb[-1] phy$node.label <- phy$node.label[-1] } phy$node.label <- phy$node.label[order(newNb)] if (resolve.root) { phy$node.label[is.na(phy$node.label)] <- phy$node.label[1] phy$node.label[1] <- "Root" } } attr(phy, "order") <- NULL reorder.phylo(phy) } root.multiPhylo <- function(phy, outgroup, ...) { oc <- oldClass(phy) class(phy) <- NULL labs <- attr(phy, "TipLabel") if (!is.null(labs)) for (i in seq_along(phy)) phy[[i]]$tip.label <- labs phy <- lapply(phy, root.phylo, outgroup = outgroup, ...) if (!is.null(labs)) { for (i in seq_along(phy)) phy[[i]]$tip.label <- NULL attr(phy, "TipLabel") <- labs } class(phy) <- oc phy } ape/R/mvr.R0000644000176200001440000000313712465112403012151 0ustar liggesusers## mvr.R (2012-03-30) ## Minimum Variance Reduction ## Copyright 2011 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. mvr <- function(X, V) { if (is.matrix(X)) X <- as.dist(X) if (is.matrix(V)) V <- as.dist(V) if (any(is.na(X))) stop("missing values are not allowed in the distance matrix") if (any(is.na(V))) stop("missing values are not allowed in the variance matrix") N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_mvr, as.double(X), as.double(V), as.integer(N), integer(2*N - 3), integer(2*N - 3), double(2*N - 3), NAOK = TRUE) obj <- list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } mvrs <- function(X, V, fs = 15) { if (fs < 1) stop("argument 'fs' must be a non-zero positive integer") if (is.matrix(X)) X <- as.dist(X) if (is.matrix(V)) V <- as.dist(V) X[is.na(X)] <- -1 X[X < 0] <- -1 X[is.nan(X)] <- -1 N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_mvrs, as.double(X), as.double(V), as.integer(N), integer(2*N - 3), integer(2*N - 3), double(2*N - 3), as.integer(fs), NAOK = TRUE) obj <- list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } ape/R/compute.brtime.R0000644000176200001440000000323012465112403014274 0ustar liggesusers## compute.brtime.R (2012-03-02) ## Compute and Set Branching Times ## Copyright 2011-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. compute.brtime <- function(phy, method = "coalescent", force.positive = NULL) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') n <- length(phy$tip.label) m <- phy$Nnode N <- Nedge(phy) ## x: branching times (aka, node ages, depths, or heights) if (identical(method, "coalescent")) { # the default x <- 2 * rexp(m)/(as.double((m + 1):2) * as.double(m:1)) ## x <- 2 * rexp(n - 1)/(as.double(n:2) * as.double((n - 1):1)) if (is.null(force.positive)) force.positive <- TRUE } else if (is.numeric(method)) { x <- as.vector(method) if (length(x) != m) stop("number of branching times given is not equal to the number of nodes") if (is.null(force.positive)) force.positive <- FALSE } y <- c(rep(0, n), x) # for all nodes (terminal and internal) e1 <- phy$edge[, 1L] # local copies of the pointers e2 <- phy$edge[, 2L] # if (force.positive) { o <- .Call(seq_root2tip, phy$edge, n, m) list.nodes <- list(n + 1L) i <- 2L repeat { z <- sapply(o, "[", i) z <- unique(z[!(z <= n | is.na(z))]) if (!length(z)) break list.nodes[[i]] <- z i <- i + 1L } nodes <- unlist(lapply(list.nodes, function(x) x[sample(length(x))])) y[nodes] <- sort(x, decreasing = TRUE) } phy$edge.length <- y[e1] - y[e2] phy } ape/R/nj.R0000644000176200001440000000163514046211762011762 0ustar liggesusers## nj.R (2021-05-10) ## Neighbor-Joining Tree Estimation ## Copyright 2004-2021 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. nj <- function(X) { if (is.matrix(X)) X <- as.dist(X) if (anyNA(X)) stop("missing values are not allowed in the distance matrix\nConsider using njs()") if (any(is.infinite(X))) stop("infinite values are not allowed in the distance matrix") N <- as.integer(attr(X, "Size")) if (N < 3) stop("cannot build an NJ tree with less than 3 observations") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) DIST <- numeric(length(X)) DIST[] <- X[] obj <- .Call(C_nj, DIST, N) names(obj) <- c("edge", "edge.length") dim(obj[[1]]) <- c(2L * N - 3L, 2L) obj$tip.label <- labels obj$Nnode <- N - 2L class(obj) <- "phylo" reorder(obj) } ape/R/pcoa.R0000644000176200001440000001510014063053051012257 0ustar liggesuserspcoa <- function(D, correction="none", rn=NULL) # # Principal coordinate analysis (PCoA) of a square distance matrix D # with correction for negative eigenvalues. # # References: # Gower, J. C. 1966. Some distance properties of latent root and vector methods # used in multivariate analysis. Biometrika. 53: 325-338. # Gower, J. C. and P. Legendre. 1986. Metric and Euclidean properties of # dissimilarity coefficients. J. Classif. 3: 5-48. # Legendre, P. and L. Legendre. 1998. Numerical ecology, 2nd English edition. # Elsevier Science BV, Amsterdam. [PCoA: Section 9.2] # # Pierre Legendre, October 2007 { centre <- function(D,n) # Centre a square matrix D by matrix algebra # mat.cen = (I - 11'/n) D (I - 11'/n) { One <- matrix(1,n,n) mat <- diag(n) - One/n mat.cen <- mat %*% D %*% mat lowtri <- lower.tri(mat.cen) mat.cen[lowtri] <- t(mat.cen)[lowtri] mat.cen } bstick.def <- function (n, tot.var = 1, ...) # 'bstick.default' from vegan { res <- rev(cumsum(tot.var/n:1)/n) names(res) <- paste("Stick", seq(len = n), sep = "") return(res) } # ===== The PCoA function begins here ===== # Preliminary actions D <- as.matrix(D) n <- nrow(D) epsilon <- sqrt(.Machine$double.eps) if(length(rn)!=0) { names <- rn } else { names <- rownames(D) } CORRECTIONS <- c("none","lingoes","cailliez") correct <- pmatch(correction, CORRECTIONS) if(is.na(correct)) stop("Invalid correction method") # cat("Correction method =",correct,'\n') # Gower centring of matrix D # delta1 = (I - 11'/n) [-0.5 d^2] (I - 11'/n) delta1 <- centre((-0.5*D^2),n) trace <- sum(diag(delta1)) # Eigenvalue decomposition D.eig <- eigen(delta1) # Negative eigenvalues? min.eig <- min(D.eig$values) zero.eig <- which(abs(D.eig$values) < epsilon) D.eig$values[zero.eig] <- 0 # No negative eigenvalue if(min.eig > -epsilon) { # Curly 1 correct <- 1 eig <- D.eig$values k <- length(which(eig > epsilon)) rel.eig <- eig[1:k]/trace cum.eig <- cumsum(rel.eig) vectors <- sweep(D.eig$vectors[,1:k, drop = FALSE], 2, sqrt(eig[1:k]), FUN="*") bs <- bstick.def(k) cum.bs <- cumsum(bs) res <- data.frame(eig[1:k], rel.eig, bs, cum.eig, cum.bs) colnames(res) <- c("Eigenvalues","Relative_eig","Broken_stick","Cumul_eig","Cumul_br_stick") rownames(res) <- 1:nrow(res) rownames(vectors) <- names colnames(vectors) <- colnames(vectors, do.NULL = FALSE, prefix = "Axis.") note <- paste("There were no negative eigenvalues. No correction was applied") out <- (list(correction=c(correction,correct), note=note, values=res, vectors=vectors, trace=trace)) # Negative eigenvalues present } else { # Curly 1 k <- n eig <- D.eig$values rel.eig <- eig/trace rel.eig.cor <- (eig - min.eig)/(trace - (n-1)*min.eig) # Eq. 9.27 for a single dimension if (length(zero.eig)) # by Jesse Connell rel.eig.cor <- c(rel.eig.cor[-zero.eig[1]], 0) ## the previous line replaces: ## rel.eig.cor = c(rel.eig.cor[1:(zero.eig[1]-1)], rel.eig.cor[(zero.eig[1]+1):n], 0) cum.eig.cor <- cumsum(rel.eig.cor) k2 <- length(which(eig > epsilon)) k3 <- length(which(rel.eig.cor > epsilon)) vectors <- sweep(D.eig$vectors[, 1:k2, drop = FALSE], 2, sqrt(eig[1:k2]), FUN="*") # Only the eigenvectors with positive eigenvalues are shown # Negative eigenvalues: three ways of handling the situation if((correct==2) | (correct==3)) { # Curly 2 if(correct == 2) { # Curly 3 # Lingoes correction: compute c1, then the corrected D c1 <- -min.eig note <- paste("Lingoes correction applied to negative eigenvalues: D' = -0.5*D^2 -",c1,", except diagonal elements") D <- -0.5*(D^2 + 2*c1) # Cailliez correction: compute c2, then the corrected D } else if(correct == 3) { delta2 <- centre((-0.5*D),n) upper <- cbind(matrix(0,n,n), 2*delta1) lower <- cbind(-diag(n), -4*delta2) sp.matrix <- rbind(upper, lower) c2 <- max(Re(eigen(sp.matrix, symmetric=FALSE, only.values=TRUE)$values)) note <- paste("Cailliez correction applied to negative eigenvalues: D' = -0.5*(D +",c2,")^2, except diagonal elements") D <- -0.5*(D + c2)^2 } # End curly 3 diag(D) <- 0 mat.cor <- centre(D,n) toto.cor <- eigen(mat.cor) trace.cor <- sum(diag(mat.cor)) # Negative eigenvalues present? min.eig.cor <- min(toto.cor$values) zero.eig.cor <- which((toto.cor$values < epsilon) & (toto.cor$values > -epsilon)) toto.cor$values[zero.eig.cor] <- 0 # No negative eigenvalue after correction: result OK if(min.eig.cor > -epsilon) { # Curly 4 eig.cor <- toto.cor$values rel.eig.cor <- eig.cor[1:k]/trace.cor cum.eig.cor <- cumsum(rel.eig.cor) k2 <- length(which(eig.cor > epsilon)) vectors.cor <- sweep(toto.cor$vectors[, 1:k2, drop = FALSE], 2, sqrt(eig.cor[1:k2]), FUN="*") rownames(vectors.cor) <- names colnames(vectors.cor) <- colnames(vectors.cor, do.NULL = FALSE, prefix = "Axis.") # bs <- broken.stick(k2)[,2] bs <- bstick.def(k2) bs <- c(bs, rep(0,(k-k2))) cum.bs <- cumsum(bs) # Negative eigenvalues still present after correction: incorrect result } else { if(correct == 2) cat("Problem! Negative eigenvalues are still present after Lingoes",'\n') if(correct == 3) cat("Problem! Negative eigenvalues are still present after Cailliez",'\n') rel.eig.cor <- cum.eig.cor <- bs <- cum.bs <- rep(NA,n) vectors.cor <- matrix(NA,n,2) rownames(vectors.cor) <- names colnames(vectors.cor) <- colnames(vectors.cor, do.NULL = FALSE, prefix = "Axis.") } # End curly 4 res <- data.frame(eig[1:k], eig.cor[1:k], rel.eig.cor, bs, cum.eig.cor, cum.bs) colnames(res) <- c("Eigenvalues", "Corr_eig", "Rel_corr_eig", "Broken_stick", "Cum_corr_eig", "Cum_br_stick") rownames(res) <- 1:nrow(res) rownames(vectors) <- names colnames(vectors) <- colnames(vectors, do.NULL = FALSE, prefix = "Axis.") out <- (list(correction=c(correction,correct), note=note, values=res, vectors=vectors, trace=trace, vectors.cor=vectors.cor, trace.cor=trace.cor)) } else { # Curly 2 note <- "No correction was applied to the negative eigenvalues" bs <- bstick.def(k3) bs <- c(bs, rep(0,(k-k3))) cum.bs <- cumsum(bs) res <- data.frame(eig[1:k], rel.eig, rel.eig.cor, bs, cum.eig.cor, cum.bs) colnames(res) <- c("Eigenvalues","Relative_eig","Rel_corr_eig","Broken_stick","Cum_corr_eig","Cumul_br_stick") rownames(res) <- 1:nrow(res) rownames(vectors) <- names colnames(vectors) <- colnames(vectors, do.NULL = FALSE, prefix = "Axis.") out <- (list(correction=c(correction,correct), note=note, values=res, vectors=vectors, trace=trace)) } # End curly 2: three ways of handling the situation } # End curly 1 class(out) <- "pcoa" out } # End of PCoA ape/R/phydataplot.R0000644000176200001440000001636513165160615013713 0ustar liggesusers## phydataplot.R (2017-10-04) ## Annotate Phylogenies ## Copyright 2014-2017 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. polar2rect <- function(r, angle) list(x = r * cos(angle), y = r * sin(angle)) rect2polar <- function(x, y) list(r = sqrt(x^2 + y^2), angle = atan2(y, x)) .matchDataPhylo <- function(x, phy) { msg <- "'x' has no (row)names: data are assumed to be in the same order than the tips of the tree" labs <- phy$tip.label if (is.vector(x)) { # also for lists if (is.null(names(x))) warning(msg) else x <- x[labs] } else { if (is.null(rownames(x))) warning(msg) else x <- x[labs, ] } x } ring <- function(x, phy, style = "ring", offset = 1, ...) { style <- match.arg(style, c("ring", "segments", "arrows")) x <- .matchDataPhylo(x, phy) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) n <- lastPP$Ntip one2n <- seq_len(n) tmp <- rect2polar(lastPP$xx[one2n], lastPP$yy[one2n]) theta <- tmp$angle r0 <- max(tmp$r) + offset r1 <- r0 + x s0 <- polar2rect(rep.int(r0, 100L), seq(0, 2*pi, length.out = 100L)) s1 <- polar2rect(r1, theta) switch(style, ring = { if (length(x) < n) x <- rep_len(x, n) dx <- dim(x) if (is.null(dx)) dim(x) <- dx <- c(n, 1L) nc <- dx[2] col <- list(...)$col if (is.null(col)) col <- "grey" if (nc == 1) { col <- rep_len(col, n) } else { colvar <- col col <- rep(col[1], n) } iangle <- min(diff(sort(theta))) iangle2 <- iangle / 2 for (i in one2n) { R <- rep(r0, 100) THETA <- seq(theta[i] - iangle2, theta[i] + iangle2, length.out = 100) xy1 <- polar2rect(R, THETA) xy2 <- polar2rect(R + x[i, 1], THETA) polygon(c(xy1$x, rev(xy2$x)), c(xy1$y, rev(xy2$y)), col = col[i], border = NA) if (nc > 1) { for (j in 2:nc) { xy1 <- xy2 xy2 <- polar2rect(R + sum(x[i, 1:j]), THETA) polygon(c(xy1$x, rev(xy2$x)), c(xy1$y, rev(xy2$y)), col = colvar[j], border = NA) } } } ##polypath(c(s0$x, NA, s0$x), c(s0$y, NA, s1$y), rule = "evenodd", ## border = 1, col = "transparent") }, segments = { s0 <- polar2rect(rep.int(r0, n), theta) segments(s0$x, s0$y, s1$x, s1$y, ...) }, arrows = { s0 <- polar2rect(rep.int(r0, n), theta) fancyarrows(s0$x, s0$y, s1$x, s1$y, ...) }) } phydataplot <- function(x, phy, style = "bars", offset = 1, scaling = 1, continuous = FALSE, width = NULL, legend = "below", funcol = rainbow, ...) { style <- match.arg(style, c("bars", "segments", "image", "arrows", "boxplot", "dotchart", "mosaic")) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) circular <- if (lastPP$type %in% c("radial", "fan")) TRUE else FALSE n <- length(phy$tip.label) one2n <- seq_len(n) x <- .matchDataPhylo(x, phy) if (scaling != 1) x <- if (is.list(x)) lapply(x, "*", scaling) else scaling * x if (!circular) { if (lastPP$direction != "rightwards") stop("for the moment, only rightwards trees are supported") x0 <- max(lastPP$xx[one2n]) + offset if (style %in% c("bars", "segments", "arrows")) x1 <- x0 + x y1 <- lastPP$yy[one2n] if (style %in% c("bars", "image", "boxplot", "dotchart", "mosaic")) { o <- order(y1) x <- if (style == "image") x[o, o] else if (is.vector(x)) x[o] else x[o, ] } } else { if (style %in% c("image", "boxplot", "dotchart", "mosaic")) stop(paste(dQuote(style), "not implemented with circular trees")) } switch(style, bars = { if (circular) stop("style = \"bars\" not implemented with circular trees; see function 'ring'") if (!is.null(dim(x))) x <- t(x) barplot(x, width = 1, add = TRUE, horiz = TRUE, offset = x0, axes = FALSE, axisnames = FALSE, space = c(0.5, rep(0, n - 1)), ...) px <- pretty(c(0, x)) axis(1, px + x0, labels = px / scaling, line = 1) }, segments = { if (circular) ring(x, phy, style, offset, ...) else segments(x0, y1, x1, y1, ...) }, image = { if (inherits(x, "DNAbin")) stop('object of class "DNAbin" not supported: use type="mosaic"') x1 <- seq(x0, lastPP$x.lim[2], length.out = n) image(x1, y1[o], x, add = TRUE, ...) mtext(phy$tip.label[o], 1, 1, at = x1, font = lastPP$font, cex = lastPP$cex, col = lastPP$tip.color) }, arrows = { if (circular) ring(x, phy, style, offset, ...) else fancyarrows(rep(x0, length(y1)), y1, x1, y1, ...) }, boxplot = { if (is.matrix(x)) x <- t(x) o <- boxplot(x, plot = FALSE) mini <- min(o$stats) maxi <- max(o$stats) if (length(o$out)) { # in case there is no outlier mini <- min(o$out, mini) maxi <- max(o$out, maxi) } px <- pretty(c(mini, maxi)) x0 <- x0 - mini o$stats <- o$stats + x0 o$out <- o$out + x0 bxp(o, horizontal = TRUE, add = TRUE, axes = FALSE, ...) axis(1, px + x0, labels = px / scaling, line = 1) }, dotchart = { mini <- min(x) maxi <- max(x) x0 <- x0 - mini segments(mini + x0, one2n, maxi + x0, one2n, lty = 3, col = "gray") points(x + x0, 1:n, ...) px <- pretty(x) axis(1, px + x0, labels = px / scaling, line = 1) }, mosaic = { p <- ncol(x) if (is.null(p)) p <- 1L if (is.null(width)) { x1 <- lastPP$x.lim[2] width <- (x1 - x0)/p } else x1 <- x0 + width * p xx <- seq(x0, x1, width) xl <- rep(xx[-length(xx)], each = n) yb <- rep(one2n - 0.5, p) xr <- xl + width yt <- yb + 1 if (!is.null(labx <- colnames(x))) text(xx[-length(xx)] + width/2, max(yt), labx, adj = c(0.5, -0.5), xpd = TRUE) if (continuous) { nux <- if (is.logical(continuous)) 10 else continuous sq <- seq(min(x), max(x), length.out = nux + 1) x <- .bincode(x, sq, FALSE, TRUE) lgd <- paste0("[", sq[-length(sq)], "-", sq[-1], ")") } else { if (is.raw(x)) x <- toupper(as.character(x)) # for DNAbin objects nux <- length(ux <- sort(unique.default(x))) x <- match(x, ux) lgd <- as.character(ux) } co <- funcol(nux) conames <- names(co) if (!is.null(conames)) co <- co[lgd] rect(xl, yb, xr, yt, col = co[x], xpd = TRUE, ...) legend <- match.arg(legend, c("below", "side", "none")) if (legend != "none") { if (legend == "below") legend((x0 + x1)/2, -yinch(0.1), lgd, pch = 22, pt.bg = co, pt.cex = 2, bty = "n", xjust = 0.5, yjust = 0.5, horiz = TRUE, xpd = TRUE) else legend(x1, n, lgd, pch = 22, pt.bg = co, pt.cex = 2, bty = "n", yjust = 1, xpd = TRUE) } }) } ape/R/nodepath.R0000644000176200001440000000303012465112403013137 0ustar liggesusers## nodepath.R (2014-11-06) ## Find Paths of Nodes ## Copyright 2014 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. nodepath <- function(phy, from = NULL, to = NULL) { if (!inherits(phy, "phylo")) stop("object \"phy\" is not of class \"phylo\"") n <- length(phy$tip.label) m <- phy$Nnode root2tip <- .Call(seq_root2tip, phy$edge, n, m) if (is.null(from) || is.null(to)) return(root2tip) if (from < 1 || from > n + m) stop("'from' out of range") if (to < 1 || to > n + m) stop("'to' out of range") if (from == to) return(to) ## find the first occurrence of 'x' in the list root2tip foo <- function(x) { if (x <= n) return(x) # if x is a tip if (x == n + 1L) return(1L) # if x is the root i <- 1L repeat { if (any(root2tip[[i]] == x)) break i <- i + 1L } i } i <- foo(from) j <- foo(to) ## find path of nodes in a single vector 'seq' from root2tip findPath <- function(from, to, seq) { i <- which(seq == from) j <- which(seq == to) seq[i:j] } if (i == j) return(findPath(from, to, root2tip[[i]])) ## find the MRCA of 'from' and 'to' A <- root2tip[[i]] B <- root2tip[[j]] MRCA <- n + 1L # start from the root k <- 2L repeat { if (A[k] != B[k]) break MRCA <- A[k] k <- k + 1L } x <- findPath(MRCA, from, A) y <- findPath(MRCA, to, B) c(rev(x), y[-1]) } ape/R/as.bitsplits.R0000644000176200001440000000660414550501031013762 0ustar liggesusers## as.bitsplits.R (2024-01-13) ## Conversion Among Split Classes ## Copyright 2011-2024 Emmanuel Paradis, 2019 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. as.bitsplits <- function(x) UseMethod("as.bitsplits") as.bitsplits.prop.part <- function(x) { foo <- function(vect, RAWVECT) { res <- RAWVECT for (y in vect) { i <- ceiling(y/8) res[i] <- res[i] | as.raw(2^(8 - ((y - 1) %% 8) - 1)) } res } N <- length(x) # number of splits n <- length(x[[1]]) # number of tips nr <- ceiling(n/8) mat <- raw(N * nr) dim(mat) <- c(nr, N) RAWVECT <- raw(nr) for (i in 1:N) mat[, i] <- foo(x[[i]], RAWVECT) ## add the n trivial splits of size 1... : mat.bis <- raw(n * nr) dim(mat.bis) <- c(nr, n) for (i in 1:n) mat.bis[, i] <- foo(i, RAWVECT) ## ... drop the trivial split of size n... : mat <- cbind(mat.bis, mat[, -1, drop = FALSE]) ## ... update the split frequencies... : freq <- attr(x, "number") freq <- c(rep(freq[1L], n), freq[-1L]) ## ... and numbers: N <- N + n - 1L structure(list(matsplit = mat, labels = attr(x, "labels"), freq = freq), class = "bitsplits") } print.bitsplits <- function(x, ...) { n <- length(x$freq) cat("Object of class \"bitsplits\"\n") cat(" ", length(x$labels), "tips\n") cat(" ", n, "partition") if (n > 1) cat("s") cat("\n") } sort.bitsplits <- function(x, decreasing = FALSE, ...) { o <- order(x$freq, decreasing = decreasing) x$matsplit <- x$matsplit[, o] x$freq <- x$freq[o] x } as.prop.part <- function(x, ...) UseMethod("as.prop.part") as.prop.part.bitsplits <- function(x, include.trivial = FALSE, ...) { decodeBitsplits <- function(x) { f <- function(y) rev(rawToBits(y)) == as.raw(1) which(unlist(lapply(x, f))) } N <- ncol(x$matsplit) # nb of splits n <- length(x$labels) # nb of tips Nres <- if (include.trivial) N + 1L else N res <- vector("list", Nres) if (include.trivial) res[[1]] <- 1:n j <- if (include.trivial) 2L else 1L for (i in 1:N) { res[[j]] <- decodeBitsplits(x$matsplit[, i]) j <- j + 1L } attr(res, "number") <- if (include.trivial) c(N, x$freq) else x$freq attr(res, "labels") <- x$labels class(res) <- "prop.part" res } bitsplits <- function(x) { if (inherits(x, "phylo")) { x <- list(x) class(x) <- "multiPhylo" } else { if (!inherits(x, "multiPhylo")) stop('x is not of class "phylo" or "multiPhylo"') } if (any(is.rooted(x))) stop("bitsplits() accepts only unrooted trees") x <- .compressTipLabel(x) labs <- attr(x, "TipLabel") n <- length(labs) if (n > 46341) warning("Tree(s) with more than 46,341 tips: this is likely too large.") nr <- ceiling(n/8) ans <- .Call(bitsplits_multiPhylo, x, n, nr) nc <- ans[[3]] if (nc) { o <- ans[[1]][1:(nr * nc)] freq <- ans[[2]][1:nc] } else { o <- raw() freq <- integer() } dim(o) <- c(nr, nc) structure(list(matsplit = o, labels = labs, freq = freq), class = "bitsplits") } countBipartitions <- function(phy, X) { split <- bitsplits(phy) SPLIT <- bitsplits(X) .Call("CountBipartitionsFromSplits", split, SPLIT) } ape/R/plot.phylo.R0000644000176200001440000012111414720373441013460 0ustar liggesusers## plot.phylo.R (2024-11-23) ## Plot Phylogenies ## Copyright 2002-2024 Emmanuel Paradis, 2021 Martin Smith, 2022 Damien de Vienne, 2024 Klaus Schliep ## colouring of segments by MS ## tidy trees by DdV ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. plot.phylo <- function(x, type = "phylogram", use.edge.length = TRUE, node.pos = NULL, show.tip.label = TRUE, show.node.label = FALSE, edge.color = NULL, edge.width = NULL, edge.lty = NULL, node.color = NULL, node.width = NULL, node.lty = NULL, font = 3, cex = par("cex"), adj = NULL, srt = 0, no.margin = FALSE, root.edge = FALSE, label.offset = 0, underscore = FALSE, x.lim = NULL, y.lim = NULL, direction = "rightwards", lab4ut = NULL, tip.color = par("col"), plot = TRUE, rotate.tree = 0, open.angle = 0, node.depth = 1, align.tip.label = FALSE, ...) { Ntip <- length(x$tip.label) if (Ntip < 2) { warning("found fewer than 2 tips in the tree") return(NULL) } .nodeHeight <- function(edge, Nedge, yy) .C(node_height, as.integer(edge[, 1]), as.integer(edge[, 2]), as.integer(Nedge), as.double(yy))[[4]] .nodeDepth <- function(Ntip, Nnode, edge, Nedge, node.depth) .C(node_depth, as.integer(Ntip), as.integer(edge[, 1]), as.integer(edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), as.integer(node.depth))[[5]] .nodeDepthEdgelength <- function(Ntip, Nnode, edge, Nedge, edge.length) .C(node_depth_edgelength, as.integer(edge[, 1]), as.integer(edge[, 2]), as.integer(Nedge), as.double(edge.length), double(Ntip + Nnode))[[5]] Nedge <- dim(x$edge)[1] Nnode <- x$Nnode if (any(x$edge < 1) || any(x$edge > Ntip + Nnode)) stop("tree badly conformed; cannot plot. Check the edge matrix.") ROOT <- Ntip + 1 type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", "radial", "tidy")) direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) if (is.null(x$edge.length)) { use.edge.length <- FALSE } else { if (use.edge.length && type != "radial") { tmp <- sum(is.na(x$edge.length)) if (tmp) { warning(paste(tmp, "branch length(s) NA(s): branch lengths ignored in the plot")) use.edge.length <- FALSE } } } if (is.numeric(align.tip.label)) { align.tip.label.lty <- align.tip.label align.tip.label <- TRUE } else { # assumes is.logical(align.tip.labels) == TRUE if (align.tip.label) align.tip.label.lty <- 3 } if (align.tip.label) { if (type %in% c("unrooted", "radial") || !use.edge.length || is.ultrametric(x)) align.tip.label <- FALSE } ## the order of the last two conditions is important: if (type %in% c("unrooted", "radial") || !use.edge.length || is.null(x$root.edge) || !x$root.edge) root.edge <- FALSE phyloORclado <- type %in% c("phylogram", "cladogram", "tidy") horizontal <- direction %in% c("rightwards", "leftwards") ##tidy exception: if (type == "tidy" && any(x$edge.length < 0)) stop("cannot plot in tidy mode with negative branch lengths. Check 'edge.length' vector.") xe <- x$edge # to save if (phyloORclado) { ## we first compute the y-coordinates of the tips. phyOrder <- attr(x, "order") ## make sure the tree is in cladewise order: if (is.null(phyOrder) || phyOrder != "cladewise") { x <- reorder(x) # fix from Klaus Schliep (2007-06-16) if (!identical(x$edge, xe)) { ## modified from Li-San Wang's fix (2007-01-23): ereorder <- match(x$edge[, 2], xe[, 2]) if (length(edge.color) > 1) { edge.color <- rep(edge.color, length.out = Nedge) edge.color <- edge.color[ereorder] } if (length(edge.width) > 1) { edge.width <- rep(edge.width, length.out = Nedge) edge.width <- edge.width[ereorder] } if (length(edge.lty) > 1) { edge.lty <- rep(edge.lty, length.out = Nedge) edge.lty <- edge.lty[ereorder] } } } ### By contrats to ape (< 2.4), the arguments edge.color, etc., are ### not elongated before being passed to segments(), except if needed ### to be reordered yy <- numeric(Ntip + Nnode) TIPS <- x$edge[x$edge[, 2] <= Ntip, 2] yy[TIPS] <- 1:Ntip } ## TIDY ## Function to compute the size of labels ## for each tip in user coordinates getStringLengthbyTip <- function(x, lab, sin, cex) { s <- strwidth(lab, "inches", cex = cex) lim <- getLimit(x, lab, sin, cex) alp <- lim/sin s*alp } ### END TIDY ## Function to compute the axis limit ## x: vector of coordinates, must be positive (or at least the largest value) ## lab: vector of labels, length(x) == length(lab) ## sin: size of the device in inches getLimit <- function(x, lab, sin, cex) { s <- strwidth(lab, "inches", cex = cex) # width of the tip labels ## if at least one string is larger than the device, ## give 1/3 of the plot for the tip labels: if (any(s > sin)) return(1.5 * max(x)) Limit <- 0 while (any(x > Limit)) { i <- which.max(x) ## 'alp' is the conversion coeff from inches to user coordinates: alp <- x[i]/(sin - s[i]) Limit <- x[i] + alp*s[i] x <- x + alp*s } Limit } ## 'z' is the tree in postorder order used in calls to .C z <- reorder(x, order = "postorder") if (phyloORclado) { if (is.null(node.pos)) node.pos <- if (type == "cladogram" && !use.edge.length) 2 else 1 if (node.pos == 1) { yy <- .nodeHeight(z$edge, Nedge, yy) } else { ## node_height_clado requires the number of descendants ## for each node, so we compute `xx' at the same time ans <- .C(node_height_clado, as.integer(Ntip), as.integer(z$edge[, 1]), as.integer(z$edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), as.double(yy)) xx <- ans[[5]] - 1 yy <- ans[[6]] } if (!use.edge.length) { if (node.pos != 2) xx <- .nodeDepth(Ntip, Nnode, z$edge, Nedge, node.depth) - 1 xx <- max(xx) - xx } else { xx <- .nodeDepthEdgelength(Ntip, Nnode, z$edge, Nedge, z$edge.length) } ## TIDY if (type == "tidy") { if (!show.tip.label) { yy <- tidy.xy(z$edge, Ntip, Nnode, xx, yy) } else { # we add to xx the size taken by labels, so that tidying considers labels xx.tips <- xx[1:Ntip] pin1 <- par("pin")[1] # width of the device in inches lab.strlength <- getStringLengthbyTip(xx.tips, x$tip.label, pin1, cex) #size of lab strings xx2 <- xx xx2[1:Ntip] <- xx2[1:Ntip] + lab.strlength yy <- tidy.xy(z$edge, Ntip, Nnode, xx2, yy) #compress taking labels into account } } ### END TIDY } else { twopi <- 2 * pi rotate.tree <- twopi * rotate.tree/360 if (type != "unrooted") { # for "fan" and "radial" trees (open.angle) ## if the tips are not in the same order in tip.label ## and in edge[, 2], we must reorder the angles: we ## use `xx' to store temporarily the angles TIPS <- x$edge[which(x$edge[, 2] <= Ntip), 2] xx <- seq(0, twopi * (1 - 1/Ntip) - twopi * open.angle/360, length.out = Ntip) theta <- double(Ntip) theta[TIPS] <- xx theta <- c(theta, numeric(Nnode)) } switch(type, "fan" = { theta <- .nodeHeight(z$edge, Nedge, theta) if (use.edge.length) { r <- .nodeDepthEdgelength(Ntip, Nnode, z$edge, Nedge, z$edge.length) } else { r <- .nodeDepth(Ntip, Nnode, z$edge, Nedge, node.depth) max_r <- max(r) r <- (max_r - r + 1) / max_r } theta <- theta + rotate.tree if (root.edge) r <- r + x$root.edge xx <- r * cos(theta) yy <- r * sin(theta) }, "unrooted" = { nb.sp <- .nodeDepth(Ntip, Nnode, z$edge, Nedge, node.depth) XY <- if (use.edge.length) unrooted.xy(Ntip, Nnode, z$edge, z$edge.length, nb.sp, rotate.tree) else unrooted.xy(Ntip, Nnode, z$edge, rep(1, Nedge), nb.sp, rotate.tree) ## rescale so that we have only positive values xx <- XY$M[, 1] - min(XY$M[, 1]) yy <- XY$M[, 2] - min(XY$M[, 2]) }, "radial" = { r <- .nodeDepth(Ntip, Nnode, z$edge, Nedge, node.depth) r[r == 1] <- 0 r <- 1 - r/Ntip theta <- .nodeHeight(z$edge, Nedge, theta) + rotate.tree xx <- r * cos(theta) yy <- r * sin(theta) }) } if (phyloORclado) { if (!horizontal) { tmp <- yy yy <- xx xx <- tmp - min(tmp) + 1 } if (root.edge) { if (direction == "rightwards") xx <- xx + x$root.edge if (direction == "upwards") yy <- yy + x$root.edge } } if (no.margin) par(mai = rep(0, 4)) if (show.tip.label) nchar.tip.label <- nchar(x$tip.label) max.yy <- max(yy) # if (is.null(x.lim)) { if (phyloORclado) { if (horizontal) { ## 1.04 comes from that we are using a regular axis system ## with 4% on both sides of the range of x: ## REMOVED (2017-06-14) xx.tips <- xx[1:Ntip]# * 1.04 if (show.tip.label) { pin1 <- par("pin")[1] # width of the device in inches tmp <- getLimit(xx.tips, x$tip.label, pin1, cex) tmp <- tmp + label.offset } else tmp <- max(xx.tips) x_lim <- c(0, tmp) if (direction == "leftwards") x_lim <- x_lim - (tmp - max(xx.tips)) } else { ### TIDY ## x.lim <- c(1, Ntip) # Not true anymore with tidy trees x_lim <- c(1, max(xx[1:Ntip])) # add offset? ### END TIDY } } else switch(type, "fan" = { if (show.tip.label) { offset <- max(nchar.tip.label * 0.018 * max.yy * cex) x_lim <- range(xx) + c(-offset, offset) } else x_lim <- range(xx) }, "unrooted" = { if (show.tip.label) { offset <- max(nchar.tip.label * 0.018 * max.yy * cex) x_lim <- c(0 - offset, max(xx) + offset) } else x_lim <- c(0, max(xx)) }, "radial" = { if (show.tip.label) { offset <- max(nchar.tip.label * 0.03 * cex) x_lim <- c(-1 - offset, 1 + offset) } else x_lim <- c(-1, 1) }) # } if (is.null(x.lim)){ x.lim <- x_lim } else if (length(x.lim) == 1) { x.lim <- c(0, x.lim) if (phyloORclado && !horizontal) x.lim[1] <- 1 if (type %in% c("fan", "unrooted") && show.tip.label) x.lim[1] <- -max(nchar.tip.label * 0.018 * max.yy * cex) if (type == "radial") x.lim[1] <- if (show.tip.label) -1 - max(nchar.tip.label * 0.03 * cex) else -1 } ## mirror the xx: if (phyloORclado && direction == "leftwards") xx <- x_lim[2] - xx # if (is.null(y.lim)) { if (phyloORclado) { if (horizontal) { ### TIDY ## y.lim <- c(1, Ntip) # Not true anymore with tidy trees y_lim <- c(1, max(yy[1:Ntip])) ### END TIDY } else { pin2 <- par("pin")[2] # height of the device in inches ## 1.04 comes from that we are using a regular axis system ## with 4% on both sides of the range of x: ## REMOVED (2017-06-14) yy.tips <- yy[1:Ntip]# * 1.04 if (show.tip.label) { tmp <- getLimit(yy.tips, x$tip.label, pin2, cex) tmp <- tmp + label.offset } else tmp <- max(yy.tips) y_lim <- c(0, tmp) if (direction == "downwards") y_lim <- y_lim - (tmp - max(yy.tips)) } } else switch(type, "fan" = { if (show.tip.label) { offset <- max(nchar.tip.label * 0.018 * max.yy * cex) y_lim <- c(min(yy) - offset, max.yy + offset) } else y_lim <- c(min(yy), max.yy) }, "unrooted" = { if (show.tip.label) { offset <- max(nchar.tip.label * 0.018 * max.yy * cex) y_lim <- c(0 - offset, max.yy + offset) } else y_lim <- c(0, max.yy) }, "radial" = { if (show.tip.label) { offset <- max(nchar.tip.label * 0.03 * cex) y_lim <- c(-1 - offset, 1 + offset) } else y_lim <- c(-1, 1) }) if (is.null(y.lim)) { y.lim <- y_lim } else if (length(y.lim) == 1) { y.lim <- c(0, y.lim) if (phyloORclado && horizontal) y.lim[1] <- 1 if (type %in% c("fan", "unrooted") && show.tip.label) y.lim[1] <- -max(nchar.tip.label * 0.018 * max.yy * cex) if (type == "radial") y.lim[1] <- if (show.tip.label) -1 - max(nchar.tip.label * 0.018 * max.yy * cex) else -1 } ## mirror the yy: if (phyloORclado && direction == "downwards") yy <- y_lim[2] - yy # fix by Klaus if (phyloORclado && root.edge) { if (direction == "leftwards") x.lim[2] <- x.lim[2] + x$root.edge if (direction == "downwards") y.lim[2] <- y.lim[2] + x$root.edge } asp <- if (type %in% c("fan", "radial", "unrooted")) 1 else NA # fixes by Klaus Schliep (2008-03-28 and 2010-08-12) plot.default(0, type = "n", xlim = x.lim, ylim = y.lim, xlab = "", ylab = "", axes = FALSE, asp = asp, ...) if (plot) { if (is.null(adj)) adj <- if (phyloORclado && direction == "leftwards") 1 else 0 if (phyloORclado && show.tip.label) { MAXSTRING <- max(strwidth(x$tip.label, cex = cex)) loy <- 0 if (direction == "rightwards") { lox <- label.offset + MAXSTRING * 1.05 * adj } if (direction == "leftwards") { lox <- -label.offset - MAXSTRING * 1.05 * (1 - adj) ##xx <- xx + MAXSTRING } if (!horizontal) { psr <- par("usr") MAXSTRING <- MAXSTRING * 1.09 * (psr[4] - psr[3])/(psr[2] - psr[1]) loy <- label.offset + MAXSTRING * 1.05 * adj lox <- 0 srt <- 90 + srt if (direction == "downwards") { loy <- -loy ##yy <- yy + MAXSTRING srt <- 180 + srt } } } if (type %in% c("phylogram", "tidy")) { phylogram.plot(x$edge, Ntip, Nnode, xx, yy, horizontal, edge.color, edge.width, edge.lty, node.color, node.width, node.lty) } else { if (is.null(edge.color)) { edge.color <- par('fg') } if (is.null(edge.width)) { edge.width <- par('lwd') } if (is.null(edge.lty)) { edge.lty <- par('lty') } if (type == "fan") { ereorder <- match(z$edge[, 2], x$edge[, 2]) if (length(edge.color) > 1) { edge.color <- rep_len(edge.color, Nedge) edge.color <- edge.color[ereorder] } if (length(edge.width) > 1) { edge.width <- rep_len(edge.width, Nedge) edge.width <- edge.width[ereorder] } if (length(edge.lty) > 1) { edge.lty <- rep_len(edge.lty, Nedge) edge.lty <- edge.lty[ereorder] } circular.plot(z$edge, Ntip, Nnode, xx, yy, theta, r, edge.color, edge.width, edge.lty) } else cladogram.plot(x$edge, xx, yy, edge.color, edge.width, edge.lty) } if (root.edge) { rootcol <- if (length(edge.color) == 1) edge.color else par("fg") rootw <- if (length(edge.width) == 1) edge.width else par("lwd") rootlty <- if (length(edge.lty) == 1) edge.lty else par("lty") if (type == "fan") { tmp <- polar2rect(x$root.edge, theta[ROOT]) segments(0, 0, tmp$x, tmp$y, col = rootcol, lwd = rootw, lty = rootlty) } else { switch(direction, "rightwards" = segments(0, yy[ROOT], x$root.edge, yy[ROOT], col = rootcol, lwd = rootw, lty = rootlty), "leftwards" = segments(xx[ROOT], yy[ROOT], xx[ROOT] + x$root.edge, yy[ROOT], col = rootcol, lwd = rootw, lty = rootlty), "upwards" = segments(xx[ROOT], 0, xx[ROOT], x$root.edge, col = rootcol, lwd = rootw, lty = rootlty), "downwards" = segments(xx[ROOT], yy[ROOT], xx[ROOT], yy[ROOT] + x$root.edge, col = rootcol, lwd = rootw, lty = rootlty)) } } if (show.tip.label) { if (is.expression(x$tip.label)) underscore <- TRUE if (!underscore) x$tip.label <- gsub("_", " ", x$tip.label) if (phyloORclado) { if (align.tip.label) { xx.tmp <- switch(direction, "rightwards" = max(xx[1:Ntip]), "leftwards" = min(xx[1:Ntip]), "upwards" = xx[1:Ntip], "downwards" = xx[1:Ntip]) yy.tmp <- switch(direction, "rightwards" = yy[1:Ntip], "leftwards" = yy[1:Ntip], "upwards" = max(yy[1:Ntip]), "downwards" = min(yy[1:Ntip])) segments(xx[1:Ntip], yy[1:Ntip], xx.tmp, yy.tmp, lty = align.tip.label.lty) } else { xx.tmp <- xx[1:Ntip] yy.tmp <- yy[1:Ntip] } text(xx.tmp + lox, yy.tmp + loy, x$tip.label, adj = adj, font = font, srt = srt, cex = cex, col = tip.color) } else { angle <- if (type == "unrooted") XY$axe else atan2(yy[1:Ntip], xx[1:Ntip]) # in radians lab4ut <- if (is.null(lab4ut)) { if (type == "unrooted") "horizontal" else "axial" } else match.arg(lab4ut, c("horizontal", "axial")) xx.tips <- xx[1:Ntip] yy.tips <- yy[1:Ntip] if (label.offset) { xx.tips <- xx.tips + label.offset * cos(angle) yy.tips <- yy.tips + label.offset * sin(angle) } if (lab4ut == "horizontal") { y.adj <- x.adj <- numeric(Ntip) sel <- abs(angle) > 0.75 * pi x.adj[sel] <- -strwidth(x$tip.label)[sel] * 1.05 sel <- abs(angle) > pi/4 & abs(angle) < 0.75 * pi x.adj[sel] <- -strwidth(x$tip.label)[sel] * (2 * abs(angle)[sel] / pi - 0.5) sel <- angle > pi / 4 & angle < 0.75 * pi y.adj[sel] <- strheight(x$tip.label)[sel] / 2 sel <- angle < -pi / 4 & angle > -0.75 * pi y.adj[sel] <- -strheight(x$tip.label)[sel] * 0.75 text(xx.tips + x.adj * cex, yy.tips + y.adj * cex, x$tip.label, adj = c(adj, 0), font = font, srt = srt, cex = cex, col = tip.color) } else { # if lab4ut == "axial" if (align.tip.label) { POL <- rect2polar(xx.tips, yy.tips) POL$r[] <- max(POL$r) REC <- polar2rect(POL$r, POL$angle) xx.tips <- REC$x yy.tips <- REC$y segments(xx[1:Ntip], yy[1:Ntip], xx.tips, yy.tips, lty = align.tip.label.lty) } if (type == "unrooted") { adj <- abs(angle) > pi/2 angle <- angle * 180/pi # switch to degrees angle[adj] <- angle[adj] - 180 adj <- as.numeric(adj) } else { s <- xx.tips < 0 angle <- angle * 180/pi angle[s] <- angle[s] + 180 adj <- as.numeric(s) } ## `srt' takes only a single value, so can't vectorize this: ## (and need to 'elongate' these vectors:) font <- rep(font, length.out = Ntip) tip.color <- rep(tip.color, length.out = Ntip) cex <- rep(cex, length.out = Ntip) for (i in 1:Ntip) text(xx.tips[i], yy.tips[i], x$tip.label[i], font = font[i], cex = cex[i], srt = angle[i], adj = adj[i], col = tip.color[i]) } } } if (show.node.label) text(xx[ROOT:length(xx)] + label.offset, yy[ROOT:length(yy)], x$node.label, adj = adj, font = font, srt = srt, cex = cex) } L <- list(type = type, use.edge.length = use.edge.length, node.pos = node.pos, node.depth = node.depth, show.tip.label = show.tip.label, show.node.label = show.node.label, font = font, cex = cex, adj = adj, srt = srt, no.margin = no.margin, label.offset = label.offset, x.lim = x.lim, y.lim = y.lim, direction = direction, tip.color = tip.color, Ntip = Ntip, Nnode = Nnode, root.time = x$root.time, align.tip.label = align.tip.label) assign("last_plot.phylo", c(L, list(edge = xe, xx = xx, yy = yy)), envir = .PlotPhyloEnv) invisible(L) } phylogram.plot <- function(edge, Ntip, Nnode, xx, yy, horizontal, edge.color = NULL, edge.width = NULL, edge.lty = NULL, node.color = NULL, node.width = NULL, node.lty = NULL) { nodes <- Ntip + seq_len(Nnode) if (!horizontal) { tmp <- yy yy <- xx xx <- tmp } ## un trait vertical a chaque noeud... x0v <- xx[nodes] y0v <- y1v <- numeric(Nnode) e1 <- edge[, 1] e2 <- edge[, 2] Nedge <- length(e1) ## store the index of each node in the 1st column of edge: NodeInEdge1 <- lapply(Ntip + seq_len(Nnode), function (j) which(e1 == j)) edgeChildren <- lapply(NodeInEdge1, function (nie) e2[nie]) yv <- vapply(edgeChildren, function (i) range(yy[i]), double(2)) y0v <- yv[1, ] y1v <- yv[2, ] ## ... et un trait horizontal partant de chaque tip et chaque noeud ## vers la racine x0h <- xx[e1] x1h <- xx[e2] y0h <- yy[e2] # Node and edge styling .one.style <- function (style) { list(h = rep_len(style, Nedge), v = rep_len(style, Ntip + Nnode)) } .edge.style <- function (node.style) { node.style <- rep_len(node.style, Ntip + Nnode) sapply(seq_len(Nedge), function (e) node.style[e2[e]]) } .node.style <- function (edge.style, fallback) { edge.style <- rep_len(edge.style, Nedge) c(character(Ntip), sapply(Ntip + seq_len(Nnode), function (n) { pendant.styles <- edge.style[e1 == n] if (length(unique(pendant.styles)) == 1L) { pendant.styles[1] } else { fallback } })) } .style <- function (edge.style, node.style, stylePar) { if (missing(edge.style) || is.null(edge.style)) { if (missing(node.style) || is.null(node.style)) { return(.one.style(par(stylePar))) } else { if (length(node.style) == 1L) { return(.one.style(node.style)) } else { return(list(h = .edge.style(node.style), v = rep_len(node.style, Ntip + Nnode))) } } } else if (missing(node.style) || is.null(node.style)) { if (length(edge.style) == 1L) { return(.one.style(edge.style)) } else { return(list(h = rep_len(edge.style, Nedge), v = .node.style(edge.style, par(stylePar)))) } } else { return(list(h = rep_len(edge.style, Nedge), v = rep_len(node.style, Ntip + Nnode))) } } .LtyToStr <- function (x) { if (is.numeric(x)) { c("blank", "solid", "dashed", "dotted", "dotdash", "longdash", "twodash")[x + 1L] } else { x } } colors <- .style(edge.color, node.color, 'fg') widths <- .style(edge.width, node.width, 'lwd') ltys <- .style(.LtyToStr(edge.lty), .LtyToStr(node.lty), 'lty') edge.color <- colors$h edge.width <- widths$h edge.lty <- ltys$h color.v <- colors$v[-seq_len(Ntip)] width.v <- widths$v[-seq_len(Ntip)] lty.v <- ltys$v[-seq_len(Ntip)] DF <- data.frame(edge.color, edge.width, edge.lty, stringsAsFactors = FALSE) DF <- DF[, c(is.null(node.color), is.null(node.width), is.null(node.lty)), drop = FALSE] for (i in seq_len(Nnode)) { br <- NodeInEdge1[[i]] if (length(br) == 2) { A <- br[1] B <- br[2] # We should draw a single line if at all possible, for the # appearance of dotted / dashed line styles. if (any(DF[A, ] != DF[B, ])) { ## add a new line: y0v <- c(y0v, y0v[i]) y1v <- c(y1v, yy[i + Ntip]) x0v <- c(x0v, x0v[i]) ## shorten the old line: y0v[i] <- yy[i + Ntip] if (is.null(node.color)) { # Half-lines may have different colours color.v[i] <- edge.color[B] color.v <- c(color.v, edge.color[A]) } else { # Use node colour for both half-lines color.v <- c(color.v, color.v[i]) } if (is.null(node.width)) { width.v[i] <- edge.width[B] width.v <- c(width.v, edge.width[A]) } else { width.v <- c(width.v, width.v[i]) } if (is.null(node.lty)) { lty.v[i] <- edge.lty[B] lty.v <- c(lty.v, edge.lty[A]) } else { lty.v <- c(lty.v, lty.v[i]) } } } } if (horizontal) { # draw horizontal lines segments(x0h, y0h, x1h, y0h, col = edge.color, lwd = edge.width, lty = edge.lty) # draw vertical lines segments(x0v, y0v, x0v, y1v, col = color.v, lwd = width.v, lty = lty.v) } else { # draws vertical lines segments(y0h, x0h, y0h, x1h, col = edge.color, lwd = edge.width, lty = edge.lty) # draws horizontal lines segments(y0v, x0v, y1v, x0v, col = color.v, lwd = width.v, lty = lty.v) } } cladogram.plot <- function(edge, xx, yy, edge.color, edge.width, edge.lty) segments(xx[edge[, 1]], yy[edge[, 1]], xx[edge[, 2]], yy[edge[, 2]], col = edge.color, lwd = edge.width, lty = edge.lty) circular.plot <- function(edge, Ntip, Nnode, xx, yy, theta, r, edge.color, edge.width, edge.lty) ### 'edge' must be in postorder order { r0 <- r[edge[, 1]] r1 <- r[edge[, 2]] theta0 <- theta[edge[, 2]] costheta0 <- cos(theta0) sintheta0 <- sin(theta0) x0 <- r0 * costheta0 y0 <- r0 * sintheta0 x1 <- r1 * costheta0 y1 <- r1 * sintheta0 segments(x0, y0, x1, y1, col = edge.color, lwd = edge.width, lty = edge.lty) tmp <- which(diff(edge[, 1]) != 0) start <- c(1, tmp + 1) Nedge <- dim(edge)[1] end <- c(tmp, Nedge) ## function dispatching the features to the arcs foo <- function(edge.feat, default) { if (length(edge.feat) == 1) return(as.list(rep(edge.feat, Nnode))) edge.feat <- rep(edge.feat, length.out = Nedge) feat.arc <- as.list(rep(default, Nnode)) for (k in 1:Nnode) { tmp <- edge.feat[start[k]] if (tmp == edge.feat[end[k]]) { # fix by Francois Michonneau (2015-07-24) feat.arc[[k]] <- tmp } else { if (nodedegree[k] == 2) feat.arc[[k]] <- rep(c(tmp, edge.feat[end[k]]), each = 50) } } feat.arc } nodedegree <- tabulate(edge[, 1L])[-seq_len(Ntip)] co <- foo(edge.color, par("fg")) lw <- foo(edge.width, par("lwd")) ly <- foo(edge.lty, par("lty")) for (k in 1:Nnode) { i <- start[k] j <- end[k] # make number of segments dependent on the angle n_segments <- abs(as.integer(2 + abs(theta[edge[j, 2]] - theta[edge[i, 2]]) / 0.03)) X <- rep(r[edge[i, 1]], n_segments) Y <- seq(theta[edge[i, 2]], theta[edge[j, 2]], length.out = n_segments) x <- X * cos(Y); y <- X * sin(Y) x0 <- x[-n_segments]; y0 <- y[-n_segments]; x1 <- x[-1]; y1 <- y[-1] segments(x0, y0, x1, y1, col = co[[k]], lwd = lw[[k]], lty = ly[[k]]) } } unrooted.xy <- function(Ntip, Nnode, edge, edge.length, nb.sp, rotate.tree) { foo <- function(node, ANGLE, AXIS) { ind <- which(edge[, 1] == node) sons <- edge[ind, 2] start <- AXIS - ANGLE/2 for (i in 1:length(sons)) { h <- edge.length[ind[i]] angle[sons[i]] <<- alpha <- ANGLE*nb.sp[sons[i]]/nb.sp[node] axis[sons[i]] <<- beta <- start + alpha/2 start <- start + alpha xx[sons[i]] <<- h*cos(beta) + xx[node] yy[sons[i]] <<- h*sin(beta) + yy[node] } for (i in sons) if (i > Ntip) foo(i, angle[i], axis[i]) } Nedge <- dim(edge)[1] yy <- xx <- numeric(Ntip + Nnode) ## `angle': the angle allocated to each node wrt their nb of tips ## `axis': the axis of each branch axis <- angle <- numeric(Ntip + Nnode) ## start with the root... foo(Ntip + 1L, 2*pi, 0 + rotate.tree) M <- cbind(xx, yy) axe <- axis[1:Ntip] # the axis of the terminal branches (for export) axeGTpi <- axe > pi ## make sure that the returned angles are in [-PI, +PI]: axe[axeGTpi] <- axe[axeGTpi] - 2*pi list(M = M, axe = axe) } node.depth <- function(phy, method = 1) { n <- length(phy$tip.label) m <- phy$Nnode N <- dim(phy$edge)[1] phy <- reorder(phy, order = "postorder") .C(node_depth, as.integer(n), as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]), as.integer(N), double(n + m), as.integer(method))[[5]] } node.depth.edgelength <- function(phy) { n <- length(phy$tip.label) m <- phy$Nnode N <- dim(phy$edge)[1] phy <- reorder(phy, order = "postorder") .C(node_depth_edgelength, as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]), as.integer(N), as.double(phy$edge.length), double(n + m))[[5]] } node.height <- function(phy, clado.style = FALSE) { n <- length(phy$tip.label) m <- phy$Nnode N <- dim(phy$edge)[1] phy <- reorder(phy) yy <- numeric(n + m) e2 <- phy$edge[, 2] yy[e2[e2 <= n]] <- 1:n phy <- reorder(phy, order = "postorder") e1 <- phy$edge[, 1] e2 <- phy$edge[, 2] if (clado.style) .C(node_height_clado, as.integer(n), as.integer(e1), as.integer(e2), as.integer(N), double(n + m), as.double(yy))[[6]] else .C(node_height, as.integer(e1), as.integer(e2), as.integer(N), as.double(yy))[[4]] } plot.multiPhylo <- function(x, layout = 1, ...) { layout(matrix(1:layout, ceiling(sqrt(layout)), byrow = TRUE)) if (!devAskNewPage() && names(dev.cur()) %in% deviceIsInteractive()) { devAskNewPage(TRUE) on.exit(devAskNewPage(FALSE)) } for (i in seq_along(x)) plot(x[[i]], ...) } trex <- function(phy, title = TRUE, subbg = "lightyellow3", return.tree = FALSE, ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) devmain <- dev.cur() # where the main tree is plotted restore <- function() { dev.set(devmain) assign("last_plot.phylo", lastPP, envir = .PlotPhyloEnv) } on.exit(restore()) NEW <- TRUE cat("Click close to a node. Right-click to exit.\n") repeat { x <- identify.phylo(phy, quiet = TRUE) if (is.null(x)) return(invisible(NULL)) else { x <- x$nodes if (is.null(x)) cat("Try again!\n") else { if (NEW) { dev.new() par(bg = subbg) devsub <- dev.cur() NEW <- FALSE } else dev.set(devsub) tr <- extract.clade(phy, x) plot(tr, ...) if (is.character(title)) title(title) else if (title) { tl <- if (is.null(phy$node.label)) paste("From node #", x, sep = "") else paste("From", phy$node.label[x - Ntip(phy)]) title(tl) } if (return.tree) return(tr) restore() } } } } kronoviz <- function(x, layout = length(x), horiz = TRUE, ..., direction = ifelse(horiz, "rightwards", "upwards"), side=2) { op <- par(no.readonly = TRUE) on.exit({ par(op) devAskNewPage(FALSE) }) par(mar = rep(0.5, 4), oma = rep(2, 4)) direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) horiz <- ifelse(direction %in% c("rightwards", "leftwards"), TRUE, FALSE) rts <- sapply(x, function(x) branching.times(x)[1]) maxrts <- max(rts) lim <- cbind(rts - maxrts, rts) if (direction %in% c("leftwards", "downwards")) { lim[,1] <- 0 lim[,2] <- maxrts } Ntree <- length(x) Ntips <- sapply(x, Ntip) if (horiz) { nrow <- layout w <- 1 h <- Ntips } else { nrow <- 1 w <- Ntips h <- 1 } layout(matrix(1:layout, nrow), widths = w, heights = h) if (layout < Ntree && !devAskNewPage() && interactive()) { devAskNewPage(TRUE) } if (horiz) { for (i in 1:Ntree){ plot(x[[i]], x.lim = lim[i, ], direction = direction, ...) if(i == 1 && 1 %in% side) axisPhylo(side=3) } } else { for (i in 1:Ntree){ plot(x[[i]], y.lim = lim[i, ], direction = direction, ...) if(i == 1 && 1 %in% side) axisPhylo(side=2) } } if (2 %in% side) axisPhylo(if (horiz) 1 else 4) invisible(x) } tidy.xy <- function(edge, Ntip, Nnode, xx, yy) { yynew <- yy # will be updated to get the new y coordinates after tidying ## initialrange<-diff(range(yy)) #for computing compression. Remove ? oedge <- edge[match(seq_len(Ntip + Nnode), edge[, 2]), 1] # ordered edges segofnodes <- data.frame(x1 = xx[oedge], y1 = yy, x2 = xx, y2 = yy) # segment associated to each node postordernodes <- edge[,2] nodes <- c(postordernodes[order(xx[postordernodes], decreasing = TRUE)], Ntip + 1) # ensures equal x values to not lead to erroneuos postoder of nodes GetContourPairsFromSegments <- function(seg, which) { if (nrow(seg) > 1) { #solves issue with branch length = 0 allx <- sort(unique(c(seg$x1, seg$x2))) newx2 <- allx[2:length(allx)] if (which == "top") { newy2i <- sapply(newx2, function(cx, se) which(cx > se$x1 & cx <= se$x2)[which.max(se$y1[which(cx > se$x1 & cx <= se$x2)])], se = seg) } if (which == "bottom") { newy2i <- sapply(newx2, function(cx, se) which(cx > se$x1 & cx <= se$x2)[which.min(se$y1[which(cx > se$x1 & cx <= se$x2)])], se = seg) } newx1 <- allx[1:(length(allx) - 1)] newy1i <- newy2i ## we simplify segments by merging thsoe on same horiz (bout a bout) where2mergei <- which((newy1i[2:length(newy1i)] - newy2i[1:(length(newy1i) - 1)]) == 0) if (length(where2mergei) > 0) { newx1 <- newx1[-(where2mergei + 1)] newy1i <- newy1i[-(where2mergei + 1)] newx2 <- newx2[-(where2mergei)] newy2i <- newy2i[-(where2mergei)] } newy1ok <- seg$y1[newy1i] newy2ok <- newy1ok newseg <- data.frame(x1 = newx1, y1 = newy1ok, x2 = newx2, y2 = newy2ok) } else { newseg <- seg } newseg } GetMinDistBetweenContours <- function(topcontour, bottomcontour) { ## efficient way to compare top and bottom contour by only looking ## at necessary pairs (see original publiction) d <- NULL topi <- 1 boti <- 1 while((topi <= nrow(topcontour)) & (boti <= nrow(bottomcontour))) { d <- c(d, bottomcontour[boti, ]$y1 - topcontour[topi, ]$y1) if (bottomcontour[boti, ]$x2 < topcontour[topi, ]$x2) boti <- boti + 1 else topi <- topi + 1 } min(d) } N <- list() # will contain all info for each node. for (n in nodes) { N[[n]] <- list() childs <- edge[edge[, 1] == n, 2] childs.ord <- childs[order(yy[childs])] # childs ordered by y values desc <- c(childs, unlist(lapply(N[childs], function(x) x$desc))) diffiny <- 0 if (n > Ntip) { # we are in a node oldyofcurrentnode <- yynew[n] for (nn in 2:length(childs.ord)) { top <- N[[childs.ord[nn - 1]]]$segtop bot <- N[[childs.ord[nn]]]$segbottom mindist <- GetMinDistBetweenContours(top, bot) if (mindist != 1) { # There is room for tidying or untidy up if branches are tangled mod <- mindist - 1 N[[childs.ord[nn]]]$segbottom[, c(2, 4)] <- N[[childs.ord[nn]]]$segbottom[, c(2, 4)] - mod N[[childs.ord[nn]]]$segtop[, c(2, 4)] <- N[[childs.ord[nn]]]$segtop[, c(2, 4)] - mod yynew[c(childs.ord[nn], N[[childs.ord[nn]]]$desc)] <- yynew[c(childs.ord[nn], N[[childs.ord[nn]]]$desc)] - mod } } newyofcurrentnode <- mean(range(yynew[childs.ord])) yynew[n] <- newyofcurrentnode diffiny <- oldyofcurrentnode - newyofcurrentnode } descseg <- segofnodes[n, ] descseg[, c(2, 4)] <- descseg[, c(2, 4)] - diffiny segtop.pre <- rbind(descseg, do.call(rbind, lapply(N[childs], function(x) x$segtop))) segtop <- GetContourPairsFromSegments(segtop.pre, "top") segbottom.pre <- rbind(descseg, do.call(rbind, lapply(N[childs], function(x) x$segbottom))) segbottom <- GetContourPairsFromSegments(segbottom.pre, "bottom") N[[n]]$childs <- childs.ord N[[n]]$desc <- desc N[[n]]$segtop <- segtop N[[n]]$segbottom <- segbottom } yynew <- yynew - (min(yynew) - 1) ## so that min(y)=1 always ## finalrange <- diff(range(yynew)) #for computing compression. Remove? ## compression <- ((initialrange-finalrange)/initialrange)*100 # Remove? ## print(paste("Compression: ", round(compression,2),"%", sep="")) #Remove? yynew } ape/R/matexpo.R0000644000176200001440000000066612465112403013026 0ustar liggesusers## ladderize.R (2007-10-08) ## Matrix Exponential ## Copyright 2007 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. matexpo <- function(x) { if (!is.matrix(x)) stop('"x" must be a matrix') nr <- dim(x)[1] if (nr != dim(x)[2]) stop('"x" must be a square matrix') ans <- .C(mat_expo, as.double(x), as.integer(nr))[[1]] dim(ans) <- c(nr, nr) ans } ape/R/RcppExports.R0000644000176200001440000000063213135300116013626 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 bipartition2 <- function(orig, nTips) { .Call(`_ape_bipartition2`, orig, nTips) } prop_part2 <- function(trees, nTips) { .Call(`_ape_prop_part2`, trees, nTips) } reorderRcpp <- function(orig, nTips, root, order) { .Call(`_ape_reorderRcpp`, orig, nTips, root, order) } ape/R/read.nexus.data.R0000644000176200001440000001606513714465044014347 0ustar liggesusers## by KS replace.single.quotes <- function(x, start = 1L) { z <- unlist(gregexpr("'", x)) if (length(z) %% 2) { #warning("wrong number of single quotes around labels") warning(paste0("odd number of single quotes (", length(z), "): label(s) unchanged")) return(x) } i <- 1 while (i < length(z)) { tmp <- substr(x, z[i], z[i + 1]) substr(x, z[i], z[i + 1]) <- gsub("\\s+", "_", tmp) i <- i + 2 } gsub("'", "", x) } "read.nexus.data" <- function (file) { # Simplified NEXUS data parser. # # Version: 09/13/2006 01:01:59 PM CEST # (modified by EP 2011-06-01 and by TG 2019-06-25) # # By: Johan Nylander, nylander @ scs.fsu.edu # # WARNING: This is parser reads a restricted nexus format, # see README for details. # # Argument (x) is a nexus formatted data file. # # Returns (Value) a list of data sequences each made of a single # vector of mode character where each element is a character. # # TODO: Error checking, gap/missing, find.datatype, etc. #------------------------------------------------------------------ "find.ntax" <- function (x) { for (i in 1:NROW(x)) { if(any(f <- grep("\\bntax", x[i], ignore.case = TRUE))) { ntax <- as.numeric(sub("(.+?)(ntax\\s*\\=\\s*)(\\d+)(.+)", "\\3", x[i], perl = TRUE, ignore.case = TRUE)) break } } ntax } "find.nchar" <- function (x) { for (i in 1:NROW(x)) { if(any(f <- grep("\\bnchar", x[i], ignore.case = TRUE))) { nchar <- as.numeric(sub("(.+?)(nchar\\s*\\=\\s*)(\\d+)(.+)", "\\3", x[i], perl = TRUE, ignore.case = TRUE)) break } } nchar } "find.matrix.line" <- function (x) { for (i in 1:NROW(x)) { if(any(f <- grep("\\bmatrix\\b", x[i], ignore.case = TRUE))) { matrix.line <- as.numeric(i) break } } matrix.line } "trim.whitespace" <- function (x) { gsub("\\s+", "", x) } "trim.semicolon" <- function (x) { gsub(";", "", x) } #TG: Added get polymorphism function "get.polymorphism" <- function (x) { ## Detect polymorphism function is.poly.start <- function(x) {return("(" == x || "{" == x)} is.poly.end <- function(x) {return(")" == x || "}" == x)} ## Position increment position <- 1 ## Check which position contains a polymorphism while(position <= length(x)) { ## Check whether the position is polymorphic if(is.poly.start(x[position])){ ## Find the polymorphism end poly_end <- position + 1 while(!is.poly.end(x[poly_end])) { poly_end <- poly_end + 1 if(is.poly.start(x[poly_end]) || poly_end > length(x)) { stop("missing closing bracket for a polymorphism at position ", position) } } ## Replace the position by what's in the middle of the polymorphism x[position] <- paste0(x[(position+1):(poly_end-1)], collapse = "/") ## Remove the polymorphism x <- x[-c((position+1):poly_end)] } ## Increment the position position <- position + 1 } return(x) } X <- scan(file = file, what = character(), sep = "\n", quiet = TRUE, comment.char = "[", strip.white = TRUE) ntax <- find.ntax(X) nchar <- find.nchar(X) matrix.line <- find.matrix.line(X) start.reading <- matrix.line + 1 Obj <- list() length(Obj) <- ntax i <- 1 pos <- 0 tot.nchar <- 0 tot.ntax <- 0 ## by KS single.quotes <- grepl("'", X) if (any(single.quotes)) { to.replace <- which(single.quotes) for (j in to.replace) { X[[j]] <- replace.single.quotes(X[[j]]) } } for (j in start.reading:NROW(X)) { Xj <- trim.semicolon(X[j]) if(Xj == "") { break } if(any(jtmp <- grep("\\bend\\b", X[j], perl = TRUE, ignore.case = TRUE))) { break } ts <- unlist(strsplit(Xj, "(?<=\\S)(\\s+)(?=\\S)", perl = TRUE)) if (length(ts) > 2) { stop("nexus parser does not handle spaces in sequences or taxon names (ts>2)") } if (length(ts) !=2) { stop("nexus parser failed to read the sequences (ts!=2)") } Seq <- trim.whitespace(ts[2]) Name <- trim.whitespace(ts[1]) nAME <- paste(c("\\b", Name, "\\b"), collapse = "") if (any(l <- grep(nAME, names(Obj)))) { tsp <- strsplit(Seq, NULL)[[1]] #TG: Convert polymorphisms if(any("(" %in% tsp || "{" %in% tsp)) { tsp <- get.polymorphism(tsp) } for (k in 1:length(tsp)) { p <- k + pos Obj[[l]][p] <- tsp[k] chars.done <- k } } else { names(Obj)[i] <- Name tsp <- strsplit(Seq, NULL)[[1]] #TG: Convert polymorphisms if(any("(" %in% tsp || "{" %in% tsp)) { tsp <- get.polymorphism(tsp) } for (k in 1:length(tsp)) { p <- k + pos Obj[[i]][p] <- tsp[k] chars.done <- k } } tot.ntax <- tot.ntax + 1 if (tot.ntax == ntax) { i <- 1 tot.ntax <- 0 tot.nchar <- tot.nchar + chars.done if (tot.nchar == nchar*ntax) { print("ntot was more than nchar*ntax") break } pos <- tot.nchar } else { i <- i + 1 } } if (tot.ntax != 0) { cat("ntax:",ntax,"differ from actual number of taxa in file?\n") stop("nexus parser did not read names correctly (tot.ntax!=0)") } for (i in 1:length(Obj)) { if (length(Obj[[i]]) != nchar) { cat(names(Obj[i]),"has",length(Obj[[i]]),"characters\n") stop("nchar differ from sequence length (length(Obj[[i]])!=nchar)") } } Obj <- lapply(Obj, tolower) Obj } ## by KS: nexus2DNAbin <- function(x) { bs <- as.raw(._bs_) cs <- ._cs_ fun <- function(x) { res <- as.raw(0) for (i in x) res <- res | bs[match(i, cs)] res <- res & bs[15] # bs[15] == n if (!(res %in% bs)) return(cs[17]) # return(?) cs[match(res, bs)] } y <- unique(unlist(x)) y <- tolower(y) y <- y[is.na(match(y, cs))] if (length(y)) { tmp <- strsplit(y, "/") repl <- sapply(tmp, fun) for (i in seq_along(x)) { for (j in seq_along(y)) { x[[i]] <- gsub(y[j], repl[j], x[[i]]) } } } as.DNAbin(x) } ape/R/is.binary.tree.R0000644000176200001440000000222414370370026014201 0ustar liggesusers## is.binary.tree.R (2023-02-07) ## Test for Binary Tree ## Copyright 2016-2023 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.binary <- function(phy) UseMethod("is.binary") is.binary.phylo <- function(phy) { n <- length(phy$tip.label) m <- phy$Nnode dgr <- tabulate(phy$edge, n + m) ref <- c(rep.int(1L, n), rep.int(3L, m)) ## the root is assumed to be numbered n+1 if (.is.rooted_ape(phy, n)) ref[n + 1L] <- 2L ## can use identical() as long as tabulate() returns integers identical(dgr, ref) } is.binary.tree <- function(phy) { message("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\nis.binary.tree() is deprecated; using is.binary() instead.\n\nis.binary.tree() will be removed soon: see ?is.binary and update your code.\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") is.binary(phy) } is.binary.multiPhylo <- function(phy) { phy <- unclass(phy) n <- length(attr(phy, "TipLabel")) if (n) n - sapply(phy, "[[", "Nnode") + is.rooted.multiPhylo(phy) == 2 else sapply(phy, is.binary.phylo) } ape/R/cherry.R0000644000176200001440000000374712465112403012650 0ustar liggesusers## cherry.R (2009-05-10) ## Number of Cherries and Null Models of Trees ## Copyright 2002-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. cherry <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') n <- length(phy$tip.label) nb.node <- phy$Nnode if (nb.node != n - 1) stop('"phy" is not fully dichotomous') if (n < 4) stop("not enough tips in your phylogeny for this analysis") cherry <- sum(tabulate(phy$edge[, 1][phy$edge[, 2] <= n]) == 2) small.n <- n < 20 if (small.n) { P.yule <- f.cherry.yule(n, cherry) P.uniform <- f.cherry.uniform(n, cherry) } else { P.yule <- 2*(1 - pnorm(abs(cherry - n/3)/sqrt(2*n/45))) mu.unif <- n*(n - 1)/(2*(2*n - 5)) sigma2.unif <- n*(n - 1)*(n - 4)*(n - 5)/(2*(2*n - 5)^2 * (2*n -7)) P.uniform <- 2*(1 - pnorm(abs(cherry - mu.unif)/sqrt(sigma2.unif))) } cat("\nAnalysis of the Number of Cherries in a Tree\n\n") cat("Phylogenetic tree:", deparse(substitute(phy)), "\n") cat("Number of tips:", n, "\n") cat("Number of cherries:", cherry, "\n\n") cat("Null hypothesis: Yule model\n") cat(" P-value =", round(P.yule, 4), "\n\n") cat("Null hypothesis: uniform model\n") cat(" P-value =", round(P.uniform, 4), "\n\n") if (!small.n) cat("(P-values were computed using normal approximations)\n") } f.cherry.yule <- function(n, k) { if (k == 0 || k > floor(n/2)) 0 else if (n == 4) if (k == 1) 2/3 else if (k == 2) 1/3 else 0 else (1 - 2*(k - 1)/(n - 1))*f.cherry.yule(n - 1, k - 1) + 2*k/(n - 1)*f.cherry.yule(n - 1, k) } f.cherry.uniform <- function(n, k) { if (k == 0 || k > floor(n/2)) 0 else if (n == 4) if (k == 1) 4/5 else if (k == 2) 1/5 else 0 else if (k == 1) 0 else (gamma(n + 1)*gamma(n - 2 + 1)*gamma(n - 4 + 1) * 2^(n-2*k)) / (gamma(n - 2*k + 1)*gamma(2*n - 4 + 1)*gamma(k + 1)*gamma(k - 2 + 1)) } ape/R/evonet.R0000644000176200001440000001555114533611500012650 0ustar liggesusers## evonet.R (2017-07-28) ## Evolutionary Networks ## Copyright 2011-2012 Emmanuel Paradis, 2017 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. evonet <- function(phy, from, to = NULL) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo".') if (!is.rooted(phy)) warning("the tree is unrooted") x <- phy if (is.null(to)) { if (is.data.frame(from)) from <- as.matrix(from) if (!is.matrix(from)) stop("'from' must be a matrix or a data frame if 'to' is not given") if (ncol(from) > 2) { warning("'from' has more than two columns: only the first two will be used.") ret <- from[, 1:2] } else if (ncol(from) < 2) { stop("'from' must have at least two columns") } else ret <- from } else { from <- as.vector(from) to <- as.vector(to) if (length(from) != length(to)) stop("'from' and 'to' not of the same length after coercing as vectors") ret <- cbind(from, to) } ## check that values are not out of range: storage.mode(ret) <- "integer" if (any(is.na(ret))) stop("some values are NA's after coercing as integers") if (any(ret < 0) || any(ret > Ntip(phy) + phy$Nnode)) stop("some values are out of range") x$reticulation <- ret class(x) <- c("evonet", "phylo") x } as.phylo.evonet <- function(x, ...) { x$reticulation <- NULL class(x) <- "phylo" x } plot.evonet <- function(x, col = "blue", lty = 1, lwd = 1, alpha = 0.5, arrows = 0, arrow.type = "classical", ...) { ## changed 5/24/17 by Klaus plot.phylo(x, ...) edges(x$reticulation[, 1], x$reticulation[, 2], col = rgb(t(col2rgb(col)), alpha = 255 * alpha, maxColorValue = 255), lty = lty, lwd = lwd, arrows = arrows, type = arrow.type) } as.networx.evonet <- function(x, weight = NA, ...) { if (any(x$reticulation <= Ntip(x))) stop("some tips are involved in reticulations: cannot convert to \"networx\"") x <- reorder(x, "postorder") ned <- Nedge(x) nrt <- nrow(x$reticulation) x$edge <- rbind(x$edge, x$reticulation) colnames(x$edge) <- c("oldNodes", "newNodes") x$reticulation <- NULL x$edge.length <- c(x$edge.length, rep(weight, length.out = nrt)) x$split <- c(1:ned, 1:nrt) class(x) <- c("networx", "phylo") x } as.network.evonet <- function(x, directed = TRUE, ...) { class(x) <- NULL x$edge <- rbind(x$edge, x$reticulation) as.network.phylo(x, directed = directed, ...) } as.igraph.evonet <- function(x, directed = TRUE, use.labels = TRUE, ...) { class(x) <- NULL x$edge <- rbind(x$edge, x$reticulation) ## added check by Klaus (2017-05-26) if (use.labels) { if (!is.null(x$node.label)){ tmp <- nchar(x$node.label) if (any(tmp == 0)){ newLabel <- paste0("number", 1:x$Nnode) x$node.label[tmp == 0] <- newLabel[tmp == 0] } } if (any(duplicated(c(x$tip.label, x$node.label)))) stop("Duplicated labels!") } as.igraph.phylo(x, directed = directed, use.labels = use.labels, ...) } print.evonet <- function(x, ...) { nr <- nrow(x$reticulation) cat("\n Evolutionary network with", nr, "reticulation") if (nr > 1) cat("s") cat("\n\n --- Base tree ---") print.phylo(as.phylo(x)) } ## new stuff by Klaus (2017-05-26) reorder.evonet <- function(x, order = "cladewise", index.only = FALSE, ...) { reticulation <- x$reticulation y <- reorder(as.phylo(x), order = order, index.only = index.only, ...) if (index.only) return(y) y$reticulation <- reticulation class(y) <- c("evonet", "phylo") y } as.evonet <- function(x, ...) { if (inherits(x, "evonet")) return(x) UseMethod("as.evonet") } as.evonet.phylo <- function(x, ...) { pos <- grep("#", x$tip.label) ind <- match(pos, x$edge[, 2]) reticulation <- x$edge[ind, , drop = FALSE] edge <- x$edge[-ind, , drop = FALSE] nTips <- as.integer(length(x$tip.label)) reticulation[, 2] <- as.integer(match(x$tip.label[pos], x$node.label) + nTips) for (i in sort(pos, TRUE)) { edge[edge > i ] <- edge[edge > i] - 1L reticulation[reticulation > i] <- reticulation[reticulation > i] - 1L } x$tip.label <- x$tip.label[-pos] nTips <- as.integer(length(x$tip.label)) nn <- length(unique(edge[,1])) if(nn < x$Nnode){ ne <- as.integer( x$Nnode - nn ) edge[edge > nTips] <- edge[edge > nTips] + ne reticulation[reticulation > nTips] <- reticulation[reticulation > nTips] +ne z <- logical(max(edge)) z[edge[, 2]] <- TRUE z[seq_len(nTips)] <- FALSE z[edge[, 1]] <- FALSE pos2 <- which(z) k <- 1 for (i in sort(pos2, TRUE)) { nTips <- as.integer( nTips + 1L ) edge[edge==i] <- nTips reticulation[reticulation == i] <- nTips edge[edge > i] <- edge[edge > i] - 1L reticulation[reticulation > i] <- reticulation[reticulation > i] - 1L } x$Nnode <- nn x$node.label <- NULL x$tip.label <- c(x$tip.label , rep("", ne)) } x$edge <- edge x$reticulation <- reticulation if (!is.null(x$edge.length)) x$edge.length <- x$edge.length[-ind] class(x) <- c("evonet", "phylo") x } ## requires new version of clado.build and tree.build read.evonet <- function(file = "", text = NULL, comment.char = "", ...) { x <- read.tree(file = file, text = text, comment.char = comment.char, ...) as.evonet.phylo(x) } .evonet2phylo <- function(x) { nTips <- as.integer(length(x$tip.label)) if (!is.null(x$edge.length)) { nd <- node.depth.edgelength(x) x$edge.length <- c(x$edge.length, nd[x$reticulation[, 2]] - nd[x$reticulation[, 1]]) } if (!is.null(x$node.label)) x$tip.label <- c(x$tip.label, x$node.label[x$reticulation[, 2] - nTips]) else { newLabels <- paste0("#H", x$reticulation[, 2]) x$tip.label <- c(x$tip.label, newLabels) x$node.label <- rep("", x$Nnode) ind <- which((x$reticulation[, 2] > nTips) & !duplicated(x$reticulation[, 2])) x$node.label[x$reticulation[ind, 2] - nTips] <- newLabels[ind] } nrets <- as.integer(nrow(x$reticulation)) x$edge[x$edge > nTips] <- x$edge[x$edge > nTips] + nrets x$reticulation[, 1] <- x$reticulation[, 1] + nrets x$reticulation[, 2] <- nTips + (1L:nrets) x$edge <- rbind(x$edge, x$reticulation) x$reticulation <- NULL attr(x, "order") <- NULL class(x) <- "phylo" x } write.evonet <- function(x, file = "", ...) { x <- .evonet2phylo(x) write.tree(x, file = file, ...) } Nedge.evonet <- function(phy) dim(phy$edge)[1] + dim(phy$reticulation)[1] ape/R/PGLS.R0000644000176200001440000002116014254570156012121 0ustar liggesusers## PGLS.R (2022-06-22) ## Phylogenetic Generalized Least Squares ## Copyright 2004-2021 Julien Dutheil, and 2006-2017 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. corBrownian <- function(value = 1, phy, form = ~1) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') attr(value, "formula") <- form attr(value, "fixed") <- TRUE attr(value, "tree") <- phy class(value) <- c("corBrownian", "corPhyl", "corStruct") value } corMartins <- function(value, phy, form = ~1, fixed = FALSE) { if (length(value) > 1) stop("only one parameter is allowed") if (value < 0) stop("the parameter alpha must be positive") if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') attr(value, "formula") <- form attr(value, "fixed") <- fixed attr(value, "tree") <- phy class(value) <- c("corMartins", "corPhyl", "corStruct") value } corGrafen <- function(value, phy, form = ~1, fixed = FALSE) { if (length(value) > 1) stop("only one parameter is allowed") if (value < 0) stop("parameter rho must be positive") value <- log(value) # Optimization under constraint, use exponential transform. if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') attr(value, "formula") <- form attr(value, "fixed") <- fixed attr(value, "tree") <- phy class(value) <- c("corGrafen", "corPhyl", "corStruct") value } Initialize.corPhyl <- function(object, data, ...) { ## The same as in Initialize corStruct: form <- formula(object) if (getCovariateFormula(object) == ~1) { warning("No covariate specified, species will be taken as ordered in the data frame. To avoid this message, specify a covariate containing the species names with the 'form' argument.") } ## Obtaining the group information, if any if (is.null(getGroupsFormula(form))) { attr(object, "Dim") <- Dim(object, as.factor(rep(1, nrow(data)))) } else { # no groups attr(object, "groups") <- getGroups(object, form, data = data) attr(object, "Dim") <- Dim(object, attr(object, "groups")) } ## Obtaining the covariate(s) attr(object, "covariate") <- getCovariate(object, data = data) object } corMatrix.corBrownian <- function(object, covariate = getCovariate(object), corr = TRUE, ...) { if (!("corBrownian" %in% class(object))) stop('object is not of class "corBrownian"') if (data.class(covariate) == "list") { as.list(lapply(covariate, function(el) corMatrix(object, covariate = el))) } else { tree <- attr(object, "tree") mat <- vcv.phylo(tree, corr = corr) if (formula(object) == ~1) return(mat) # added by EP (2022-06-22) covariate <- as.character(covariate) mat[covariate, covariate] } } corMatrix.corMartins <- function(object, covariate = getCovariate(object), corr = TRUE, ...) { if (!("corMartins" %in% class(object))) stop('object is not of class "corMartins"') if (data.class(covariate) == "list") { as.list(lapply(covariate, function(el) corMatrix(object, covariate = el))) } else { tree <- attr(object, "tree") dist <- cophenetic.phylo(tree) mat <- exp(-object[1] * dist) if (corr) mat <- cov2cor(mat) if (formula(object) == ~1) return(mat) # added by EP (2022-06-22) covariate <- as.character(covariate) mat[covariate, covariate] } } corMatrix.corGrafen <- function(object, covariate = getCovariate(object), corr = TRUE, ...) { if (!("corGrafen" %in% class(object))) stop('object is not of class "corGrafen"') if (data.class(covariate) == "list") { as.list(lapply(covariate, function(el) corMatrix(object, covariate = el))) } else { tree <- compute.brlen(attr(object, "tree"), method = "Grafen", power = exp(object[1])) mat <- vcv.phylo(tree, corr = corr) if (formula(object) == ~1) return(mat) # added by EP (2022-06-22) covariate <- as.character(covariate) mat[covariate, covariate] } } coef.corBrownian <- function(object, unconstrained = TRUE, ...) { if (!("corBrownian" %in% class(object))) stop('object is not of class "corBrownian"') numeric(0) } coef.corMartins <- function(object, unconstrained = TRUE, ...) { if (!("corMartins" %in% class(object))) stop('object is not of class "corMartins"') if (unconstrained) { if (attr(object, "fixed")) { return(numeric(0)) } else { return(as.vector(object)) } } aux <- as.vector(object) names(aux) <- "alpha" aux } coef.corGrafen <- function(object, unconstrained = TRUE, ...) { if (!("corGrafen" %in% class(object))) stop('object is not of class "corGrafen"') if (unconstrained) { if (attr(object, "fixed")) { return(numeric(0)) } else { return(as.vector(object)) } } aux <- exp(as.vector(object)) names(aux) <- "rho" aux } ## changed by EP (2006-10-12): compute.brlen <- function(phy, method = "Grafen", power = 1, ...) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') Ntip <- length(phy$tip.label) Nnode <- phy$Nnode Nedge <- dim(phy$edge)[1] if (is.numeric(method)) { phy$edge.length <- rep(method, length.out = Nedge) return(phy) } if (is.function(method)) { phy$edge.length <- method(Nedge, ...) return(phy) } if (is.character(method)) { # == "Grafen" tr <- reorder(phy, "postorder") xx <- .C(node_depth, as.integer(Ntip), as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), 1L)[[5]] - 1 m <- Ntip - 1 phy$edge.length <- (xx[phy$edge[, 1]]/m)^power - (xx[phy$edge[, 2]]/m)^power return(phy) } } ## by EP: corPagel <- function(value, phy, form = ~1, fixed = FALSE) { if (value < 0 || value > 1) stop("the value of lambda must be between 0 and 1.") if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') attr(value, "formula") <- form attr(value, "fixed") <- fixed attr(value, "tree") <- phy class(value) <- c("corPagel", "corPhyl", "corStruct") value } corMatrix.corPagel <- function(object, covariate = getCovariate(object), corr = TRUE, ...) { if (!("corPagel" %in% class(object))) stop('object is not of class "corPagel"') if (data.class(covariate) == "list") { as.list(lapply(covariate, function(el) corMatrix(object, covariate = el))) } else { mat <- vcv.phylo(attr(object, "tree"), corr = corr) tmp <- diag(mat) mat <- object[1]*mat diag(mat) <- tmp if (formula(object) == ~1) return(mat) # added by EP (2022-06-22) covariate <- as.character(covariate) mat[covariate, covariate] } } coef.corPagel <- function(object, unconstrained = TRUE, ...) { if (unconstrained) { if (attr(object, "fixed")) return(numeric(0)) else return(object[1]) } aux <- object[1] names(aux) <- "lambda" aux } corBlomberg <- function(value, phy, form = ~1, fixed = FALSE) { if (value <= 0) stop("the value of g must be greater than 0.") if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') attr(value, "formula") <- form attr(value, "fixed") <- fixed attr(value, "tree") <- phy class(value) <- c("corBlomberg", "corPhyl", "corStruct") value } corMatrix.corBlomberg <- function(object, covariate = getCovariate(object), corr = TRUE, ...) { if (object[1] <= 0) stop("the optimization has reached a value <= 0 for parameter 'g': probably need to set 'fixed = TRUE' in corBlomberg().") if (data.class(covariate) == "list") { as.list(lapply(covariate, function(el) corMatrix(object, covariate = el))) } else { phy <- attr(object, "tree") d <- (dist.nodes(phy)[length(phy$tip.label) + 1, ])^(1/object[1]) phy$edge.length <- d[phy$edge[, 2]] - d[phy$edge[, 1]] mat <- vcv.phylo(phy, corr = corr) if (formula(object) == ~1) return(mat) # added by EP (2022-06-22) covariate <- as.character(covariate) mat[covariate, covariate] } } coef.corBlomberg <- function(object, unconstrained = TRUE, ...) { if (unconstrained) { if (attr(object, "fixed")) return(numeric(0)) else return(object[1]) } aux <- object[1] names(aux) <- "g" aux } ape/R/comparePhylo.R0000644000176200001440000001546314562660362014030 0ustar liggesusers## comparePhylo.R (2024-02-13) ## Compare Two "phylo" Objects ## Copyright 2018-2024 Emmanuel Paradis, 2021-2024 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. comparePhylo <- function(x, y, plot = FALSE, force.rooted = FALSE, use.edge.length = FALSE, commons = TRUE, location = "bottomleft", ...) { tree1 <- deparse(substitute(x)) tree2 <- deparse(substitute(y)) res <- list() msg <- paste("=> Comparing", tree1, "with", tree2) res$messages <- msg n1 <- Ntip(x) n2 <- Ntip(y) tmp <- if (n1 == n2) paste("Both trees have the same number of tips:", n1) else paste("Trees have different numbers of tips:", n1, "and", n2) msg <- c(msg, tmp) tips1 <- x$tip.label tips2 <- y$tip.label tips12 <- match(tips1, tips2) tips21 <- match(tips2, tips1) tmp <- is.na(tips12) if (any(tmp)) msg <- c(msg, paste("Tips in", tree1, "not in", tree2, ":", paste(tips1[tmp], collapse = ", "))) tmp2 <- is.na(tips21) if (any(tmp2)) msg <- c(msg, paste("Tips in", tree2, "not in", tree1, ":", paste(tips2[tmp2], collapse = ", "))) sameTips <- FALSE if (!sum(tmp, tmp2)) { msg <- c(msg, "Both trees have the same tip labels") sameTips <- TRUE } m1 <- Nnode(x) m2 <- Nnode(y) tmp <- if (m1 == m2) paste("Both trees have the same number of nodes:", m1) else paste("Trees have different numbers of nodes:", m1, "and", m2) msg <- c(msg, tmp) rooted1 <- is.rooted(x) rooted2 <- is.rooted(y) tmp <- if (rooted1) { if (rooted2) "Both trees are rooted" else paste(tree1, "is rooted,", tree2, "is unrooted") } else { if (rooted2) paste(tree1, "is unrooted,", tree2, "is rooted") else "Both trees are unrooted" } msg <- c(msg, tmp) ultra1 <- ultra2 <- FALSE if (!is.null(x$edge.length)) ultra1 <- is.ultrametric(x) if (!is.null(y$edge.length)) ultra2 <- is.ultrametric(y) tmp <- if (ultra1) { if (ultra2) "Both trees are ultrametric" else paste(tree1, "is ultrametric,", tree2, "is not") } else { if (ultra2) paste(tree1, "is not ultrametric,", tree2, "is ultrametric") else "Both trees are not ultrametric" } msg <- c(msg, tmp) if (rooted1 && rooted2 || force.rooted) { key1 <- makeNodeLabel(x, "md5sum")$node.label key2 <- makeNodeLabel(y, "md5sum")$node.label mk12 <- match(key1, key2) mk21 <- match(key2, key1) if (any(tmp <- is.na(mk12))) { nk <- sum(tmp) msg <- c(msg, paste(nk, if (nk == 1) "clade" else "clades", "in", tree1, "not in", tree2)) } if (plot) { def.par <- par(no.readonly = TRUE) layout(matrix(1:2, 1, 2)) plot(x, use.edge.length = use.edge.length, main = tree1, ...) nodelabels(node = which(tmp) + n1, pch = 19, col = "blue", cex = 2) legend(location, legend = paste("Clade absent in", tree2), pch = 19, col = "blue") } if (any(tmp <- is.na(mk21))) { nk <- sum(tmp) msg <- c(msg, paste(nk, if (nk == 1) "clade" else "clades", "in", tree2, "not in", tree1)) } if (plot) { plot(y, use.edge.length = use.edge.length, main = tree2, ...) nodelabels(node = which(tmp) + n2, pch = 19, col = "red", cex = 2) legend(location, legend = paste("Clade absent in", tree1), pch = 19, col = "red") par(def.par) } nodes1 <- which(!is.na(mk12)) nodes2 <- mk12[!is.na(mk12)] if (ultra1 && ultra2) { bt1 <- branching.times(x) bt2 <- branching.times(y) BT <- data.frame(paste0(bt1[nodes1], " (", nodes1 + n1, ")"), paste0(bt2[nodes2], " (", nodes2 + n2, ")")) names(BT) <- c(tree1, tree2) res$BT <- BT msg <- c(msg, "Branching times of clades in common between both trees: see ..$BT (node number in parentheses)") } if (!is.null(nl1 <- x$node.label) && !is.null(nl2 <- y$node.label)) { NODES <- data.frame(paste0(nl1[nodes1], " (", nodes1 + n1, ")"), paste0(nl2[nodes2], " (", nodes2 + n2, ")")) names(NODES) <- c(tree1, tree2) res$NODES <- NODES msg <- c(msg, "Node labels of clades in common between both trees: see ..$NODES (node number in parentheses)") } } if (sameTips) { TR <- .compressTipLabel(c(x, y)) TR <- root(TR, attr(TR, "TipLabel")[1]) pp <- prop.part(TR) common.splits <- which(attr(pp, "number") == 2L) ncs <- length(common.splits) tmp <- if (ncs) paste(ncs, if (ncs == 1) "split" else "splits", "in common") else "No split in common" msg <- c(msg, tmp) if (plot) { def.par <- par(no.readonly = TRUE) co <- "black"#rgb(0, 0, 1, 0.7) layout(matrix(1:2, 1, 2)) edgecol1 <- rep("black", Nedge(x)) edgew1 <- rep(1, Nedge(x)) edgecol2 <- rep("black", Nedge(y)) edgew2 <- rep(1, Nedge(y)) if (ncs) { pp1 <- SHORTwise(prop.part(TR[[1]])) pp2 <- SHORTwise(prop.part(TR[[2]])) if (commons) { k1 <- which(!is.na(match(pp1, pp2)) & lengths(pp1) > 1) k2 <- which(!is.na(match(pp2, pp1)) & lengths(pp2) > 1) } else { k1 <- which(is.na(match(pp1, pp2)) & lengths(pp1) > 1) k2 <- which(is.na(match(pp2, pp1)) & lengths(pp2) > 1) } e1 <- match(k1 + n1, TR[[1]]$edge[, 2]) e2 <- match(k2 + n2, TR[[2]]$edge[, 2]) edgecol1[e1] <- edgecol2[e2] <- co edgew1[e1] <- edgew2[e2] <- 5 } text4leg <- if (commons) "Split present in both trees" else "Split specific to each tree" plot(TR[[1]], "u", use.edge.length = use.edge.length, edge.color = edgecol1, edge.width = edgew1, main = tree1, ...) legend(location, legend = text4leg, lty = 1, col = "black", lwd = 5, xpd = NA) plot(TR[[2]], "u", use.edge.length = use.edge.length, edge.color = edgecol2, edge.width = edgew2, main = tree2, ...) par(def.par) } } res$messages <- paste0(msg, ".") class(res) <- "comparePhylo" res } print.comparePhylo <- function(x, ...) { cat(x$messages, sep = "\n") cat("\n") x$messages <- class(x) <- NULL if (length(x)) print.default(x) } ape/R/subtrees.R0000644000176200001440000000207612465112403013202 0ustar liggesusers## subtrees.R (2008-04-14) ## All subtrees of a Phylogenetic Tree ## Copyright 2008 Damien de Vienne ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. subtrees<-function(tree, wait = FALSE) { N.tip<-Ntip (tree) N.node<-Nnode(tree) limit<-N.tip+N.node sub<-list(N.node) u<-0 for (k in (N.tip+1):limit) { u<-u+1 if (wait==TRUE) cat("wait... Node",u,"out of", N.node, "treated\n") fils<-NULL pere<-res <- k repeat { for (i in 1: length(pere)) fils<-c(fils, tree$edge[,2][tree$edge[,1]==pere[i]]) res<-c(res, fils) pere<-fils fils<-NULL if (length(pere)==0) break } len<-res[res>N.tip] if (u==1) { tree2<-tree len<-(N.tip+1):limit } else { len.tip<-res[res 1) { x <- mclapply(x, foo, ntip = ntip, mc.cores = mc.cores) bar <- function(i) { y <- x[[i]] m1 <- nnode[i] res_sub <- numeric(n - i) for (j in (i + 1):n) { z <- x[[j]] res_sub[j - i] <- m1 + nnode[j] - 2 * sum(z %in% y) } res_sub } res_list <- mclapply(1:(n - 1), bar, mc.cores = mc.cores) res <- unlist(res_list) } else { x <- lapply(x, foo, ntip = ntip) k <- 0L res <- numeric(n * (n - 1) /2) for (i in 1:(n - 1)) { y <- x[[i]] m1 <- nnode[i] for (j in (i + 1):n) { z <- x[[j]] k <- k + 1L res[k] <- m1 + nnode[j] - 2 * sum(z %in% y) } } } } else { # method == "score" NTIP <- Ntip(x) x <- unroot(x) fooscore <- function(phy) { if (is.null(phy$edge.length)) stop("trees must have branch lengths for the branch score distance.") ntip <- length(phy$tip.label) phy <- reorder.phylo(phy, "postorder") bp <- bipartition2(phy$edge, ntip) lapply(bp, function(x) sort(phy$tip.label[x])) } if (mc.cores > 1) { BP <- mclapply(x, fooscore, mc.cores = mc.cores) bar <- function(i) { tr <- x[[i]] bp <- BP[[i]] nx <- NTIP[i] res_sub <- numeric(n - i) for (j in (i + 1):n) res_sub[j - i] <- .dist.topo.score(tr, x[[j]], nx, NTIP[j], bp, BP[[j]]) res_sub } res_list <- mclapply(1:(n - 1), bar, mc.cores = mc.cores) res <- unlist(res_list) } else { BP <- lapply(x, fooscore) k <- 0L res <- numeric(n * (n - 1) /2) for (i in 1:(n - 1)) { tr <- x[[i]] bp <- BP[[i]] nx <- NTIP[i] for (j in (i + 1):n) { k <- k + 1L res[k] <- .dist.topo.score(tr, x[[j]], nx, NTIP[j], bp, BP[[j]]) } } } } attr(res, "Size") <- n attr(res, "Labels") <- nms attr(res, "Diag") <- attr(res, "Upper") <- FALSE attr(res, "method") <- method class(res) <- "dist" res } .dist.topo.score <- function(x, y, nx, ny, bp1, bp2) { ## ny <- length(y$tip.label) # fix by Otto Cordero ## fix by Tim Wallstrom: bp2.comp <- lapply(bp2, function(a) sort(y$tip.label[is.na(match(y$tip.label, a))])) ## End q1 <- length(bp1) q2 <- length(bp2) xe2 <- x$edge[, 2] ye2 <- y$edge[, 2] xel <- x$edge.length yel <- y$edge.length dT <- 0 found1 <- FALSE found2 <- logical(q2) found2[1] <- TRUE for (i in 2:q1) { for (j in 2:q2) { if (identical(bp1[[i]], bp2[[j]]) || identical(bp1[[i]], bp2.comp[[j]])) { dT <- dT + (xel[which(xe2 == nx + i)] - yel[which(ye2 == ny + j)])^2 found1 <- found2[j] <- TRUE break } } if (found1) found1 <- FALSE else dT <- dT + (xel[which(xe2 == nx + i)])^2 } if (!all(found2)) dT <- dT + sum((yel[ye2 %in% (ny + which(!found2))])^2) sqrt(dT) } .compressTipLabel <- function(x, ref = NULL) { ## 'x' is a list of objects of class "phylo" possibly with no class if (!is.null(attr(x, "TipLabel"))) return(x) if (is.null(ref)) ref <- x[[1]]$tip.label n <- length(ref) if (length(unique(ref)) != n) stop("some tip labels are duplicated in tree no. 1") ## serious improvement by Joseph W. Brown! relabel <- function (y) { label <- y$tip.label if (!identical(label, ref)) { if (length(label) != length(ref)) stop("one tree has a different number of tips") ilab <- match(label, ref) if (any(is.na(ilab))) stop("one tree has different tip labels") ie <- match(1:n, y$edge[, 2]) y$edge[ie, 2] <- ilab } y$tip.label <- NULL y } x <- unclass(x) # another killer improvement by Tucson's hackathon (1/2/2013) x <- lapply(x, relabel) attr(x, "TipLabel") <- ref class(x) <- "multiPhylo" x } prop.part <- function(..., check.labels = TRUE) { obj <- .getTreesFromDotdotdot(...) ntree <- length(obj) if (ntree == 1) check.labels <- FALSE if (check.labels) obj <- .compressTipLabel(obj) # fix by Klaus Schliep (2011-02-21) class(obj) <- NULL # fix by Klaus Schliep (2014-03-06) for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer" class(obj) <- "multiPhylo" obj <- reorder(obj, "postorder") # the following line should not be necessary any more # obj <- .uncompressTipLabel(obj) # fix a bug (2010-11-18) nTips <- length(obj[[1]]$tip.label) clades <- prop_part2(obj, nTips) attr(clades, "labels") <- obj[[1]]$tip.label clades } print.prop.part <- function(x, ...) { if (is.null(attr(x, "labels"))) { for (i in 1:length(x)) { cat("==>", attr(x, "number")[i], "time(s):") print(x[[i]], quote = FALSE) } } else { for (i in 1:length(attr(x, "labels"))) cat(i, ": ", attr(x, "labels")[i], "\n", sep = "") cat("\n") for (i in 1:length(x)) { cat("==>", attr(x, "number")[i], "time(s):") print(x[[i]], quote = FALSE) } } } summary.prop.part <- function(object, ...) attr(object, "number") plot.prop.part <- function(x, barcol = "blue", leftmar = 4, col = "red", ...) { if (is.null(attr(x, "labels"))) stop("cannot plot this partition object; see ?prop.part for details.") L <- length(x) n <- length(attr(x, "labels")) layout(matrix(1:2, 2, 1), heights = c(1, 3)) par(mar = c(0.1, leftmar, 0.1, 0.1)) one2L <- seq_len(L) plot(one2L - 0.5, attr(x, "number"), type = "h", col = barcol, xlim = c(0, L), xaxs = "i", xlab = "", ylab = "Frequency", xaxt = "n", bty = "n", ...) M <- matrix(0L, L, n) for (i in one2L) M[i, x[[i]]] <- 1L image.default(one2L, 1:n, M, col = c("white", col), xlab = "", ylab = "", yaxt = "n") mtext(attr(x, "labels"), side = 2, at = 1:n, las = 1) } ### by Klaus (2016-03-23): prop.clades <- function(phy, ..., part = NULL, rooted = FALSE) { if (is.null(part)) { obj <- .getTreesFromDotdotdot(...) ## avoid double counting of edges if trees are rooted if (!rooted) obj <- lapply(obj, unroot) part <- prop.part(obj, check.labels = TRUE) } LABS <- attr(part, "labels") if (!identical(phy$tip.label, LABS)) { i <- match(phy$tip.label, LABS) j <- match(seq_len(Ntip(phy)), phy$edge[, 2]) phy$edge[j, 2] <- i phy$tip.label <- LABS } bp <- prop.part(phy) if (!rooted) { ## avoid messing up the order and length if phy is rooted in some cases bp <- SHORTwise(bp) part <- postprocess.prop.part(part, "SHORTwise") } pos <- match(bp, part) tmp <- which(!is.na(pos)) n <- rep(NA_real_, phy$Nnode) n[tmp] <- attr(part, "number")[pos[tmp]] n } boot.phylo <- function(phy, x, FUN, B = 100, block = 1, trees = FALSE, quiet = FALSE, rooted = is.rooted(phy), jumble = TRUE, mc.cores = 1) { if (is.null(dim(x)) || length(dim(x)) != 2) stop("the data 'x' must have two dimensions (e.g., a matrix or a data frame)") if (anyDuplicated(rownames(x))) stop("some labels are duplicated in the data: you won't be able to analyse tree bipartitions") boot.tree <- vector("list", B) y <- nc <- ncol(x) nr <- nrow(x) if (nr < 4 && !trees) { warning("not enough rows in 'x' to compute bootstrap values.\nSet 'trees = TRUE' if you want to get the bootstrap trees") return(integer()) } if (block > 1) { a <- seq(1, nc - 1, block) b <- seq(block, nc, block) y <- mapply(":", a, b, SIMPLIFY = FALSE) getBootstrapIndices <- function() unlist(sample(y, replace = TRUE)) } else getBootstrapIndices <- function() sample.int(y, replace = TRUE) if (!quiet) { prefix <- "\rRunning bootstraps: " suffix <- paste("/", B) updateProgress <- function(i) cat(prefix, i, suffix) } if (mc.cores == 1) { for (i in 1:B) { boot.samp <- x[, getBootstrapIndices()] if (jumble) boot.samp <- boot.samp[sample.int(nr), ] boot.tree[[i]] <- FUN(boot.samp) if (!quiet && !(i %% 100)) updateProgress(i) } } else { if (!quiet) cat("Running parallel bootstraps...") foo <- function(i) { boot.samp <- x[, getBootstrapIndices()] if (jumble) boot.samp <- boot.samp[sample.int(nr), ] FUN(boot.samp) } boot.tree <- mclapply(1:B, foo, mc.cores = mc.cores) if (!quiet) cat(" done.") } if (nr < 4 && trees) return(list(BP = integer(), trees = boot.tree)) if (!quiet) cat("\nCalculating bootstrap values...") ## sort labels after mixed them up if (jumble) { boot.tree <- .compressTipLabel(boot.tree, ref = phy$tip.label) boot.tree <- .uncompressTipLabel(boot.tree) boot.tree <- unclass(boot.tree) # otherwise countBipartitions crashes } class(boot.tree) <- "multiPhylo" if (rooted) { pp <- prop.part(boot.tree) ans <- prop.clades(phy, part = pp, rooted = rooted) } else { phy <- reorder(phy, "postorder") ints <- phy$edge[, 2] > Ntip(phy) ans <- countBipartitions(phy, boot.tree) ans <- c(NA_integer_, ans[order(phy$edge[ints, 2])]) } if (!quiet) cat(" done.\n") if (trees) ans <- list(BP = ans, trees = boot.tree) ans } ### The next function transforms an object of class "prop.part" so ### that the vectors which are identical in terms of splits are aggregated. ### For instance if n = 5 tips, 1:2 and 3:5 actually represent the same ### split though they are different clades. The aggregation is done ### arbitrarily. ### The call to SHORTwise() insures that all splits are the shortest ones. ### The call to ONEwise() insures that all splits include the first tip. ### (rewritten by Klaus) postprocess.prop.part <- function(x, method = "ONEwise") { w <- attr(x, "number") labels <- attr(x, "labels") method <- match.arg(toupper(method), c("ONEWISE", "SHORTWISE")) FUN <- switch(method, "ONEWISE" = ONEwise, "SHORTWISE" = SHORTwise) x <- FUN(x) drop <- duplicated(x) if (any(drop)) { ind1 <- match(x[drop], x) ind2 <- which(drop) for (i in seq_along(ind2)) w[ind1[i]] <- w[ind1[i]] + w[ind2[i]] x <- x[!drop] w <- w[!drop] } attr(x, "number") <- w attr(x, "labels") <- labels class(x) <- "prop.part" x } ### This function changes an object of class "prop.part" so that they ### all include the first tip. For instance if n = 5 tips, 3:5 is ### changed to 1:2. ONEwise <- function(x) { nTips <- length(attr(x, "labels")) v <- seq_len(nTips) l <- lengths(x) == 0 if (any(l)) x[l] <- list(v) for (i in which(!l)) { y <- x[[i]] if (y[1] != 1) x[[i]] <- v[-y] } x } ### This function changes an object of class "prop.part" so that they ### all include the shorter part of the partition. ### For instance if n = 5 tips, 1:3 is changed to 4:5. In case n is even, e.g. ### n = 6 similar to ONEwise. SHORTwise <- function(x) { ## ensures the next line should also work for splits objects from phangorn nTips <- length(attr(x, "labels")) v <- seq_len(nTips) l <- lengths(x) lv <- nTips / 2 for (i in which(l >= lv)) { y <- x[[i]] if (l[i] > lv) { x[[i]] <- v[-y] } else { # (l[i] == lv) only possible alternative if (y[1] != 1) x[[i]] <- v[-y] } } x } consensus <- function(..., p = 1, check.labels = TRUE, rooted = FALSE) { foo <- function(ic, node) { ## ic: index of 'pp' ## node: node number in the final tree pool <- pp[[ic]] if (ic < m) { for (j in (ic + 1):m) { wh <- match(pp[[j]], pool) if (!any(is.na(wh))) { edge[pos, 1] <<- node pool <- pool[-wh] edge[pos, 2] <<- nextnode <<- nextnode + 1L pos <<- pos + 1L foo(j, nextnode) } } } size <- length(pool) if (size) { ind <- pos:(pos + size - 1) edge[ind, 1] <<- node edge[ind, 2] <<- pool pos <<- pos + size } } obj <- .getTreesFromDotdotdot(...) if (!is.null(attr(obj, "TipLabel"))) labels <- attr(obj, "TipLabel") else { labels <- obj[[1]]$tip.label if (check.labels) obj <- .compressTipLabel(obj) } if(!rooted) obj <- root(obj, 1) ntree <- length(obj) ## Get all observed partitions and their frequencies: pp <- prop.part(obj, check.labels = FALSE) if (!rooted) { pp <- postprocess.prop.part(pp, "SHORTwise") pp[[1]] <- seq_along(labels) } ## Drop the partitions whose frequency is less than 'p': if (p == 0.5) p <- 0.5000001 # avoid incompatible splits bs <- attr(pp, "number") sel <- bs >= p * ntree pp <- pp[sel] bs <- bs[sel] lens <- lengths(pp) if (length(drop <- which(lens == 1))) { pp <- pp[-drop] lens <- lens[-drop] } ## Get the order of the remaining partitions by decreasing size: ind <- order(lens, decreasing = TRUE) pp <- pp[ind] bs <- bs[ind] n <- length(labels) m <- length(pp) edge <- matrix(0L, n + m - 1, 2) if (m == 1) { edge[, 1] <- n + 1L edge[, 2] <- 1:n } else { nextnode <- n + 1L pos <- 1L foo(1, nextnode) } res <- structure(list(edge = edge, tip.label = labels, Nnode = m), class = "phylo") res <- reorder(res) node.label <- prop.clades(res, obj, rooted=rooted)/ntree res$node.label <- node.label res } ape/R/reorder.phylo.R0000644000176200001440000000614714456450026014155 0ustar liggesusers## reorder.phylo.R (2017-07-28) ## Internal Reordering of Trees ## Copyright 2006-2017 Emmanuel Paradis, 2017 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .reorder_ape <- function(x, order, index.only, nb.tip, io) { nb.edge <- dim(x$edge)[1] if (!is.null(attr(x, "order"))) if (attr(x, "order") == order) if (index.only) return(1:nb.edge) else return(x) nb.node <- x$Nnode if (nb.node == 1) if (index.only) return(1:nb.edge) else return(x) if (io == 3) { x <- reorder(x) neworder <- .C(neworder_pruningwise, as.integer(nb.tip), as.integer(nb.node), as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), as.integer(nb.edge), integer(nb.edge))[[6]] } else { neworder <- reorderRcpp(x$edge, as.integer(nb.tip), as.integer(nb.tip + 1L), io) } if (index.only) return(neworder) x$edge <- x$edge[neworder, ] if (!is.null(x$edge.length)) x$edge.length <- x$edge.length[neworder] attr(x, "order") <- order x } reorder.phylo <- function(x, order = "cladewise", index.only = FALSE, ...) { ORDER <- c("cladewise", "postorder", "pruningwise") io <- pmatch(order, ORDER) if (is.na(io)) stop("ambiguous order") order <- ORDER[io] n <- length(x$tip.label) if (n < 2) { x } else { .reorder_ape(x, order, index.only, n, io) } } reorder.multiPhylo <- function(x, order = "cladewise", ...) { ORDER <- c("cladewise", "postorder", "pruningwise") io <- pmatch(order, ORDER) if (is.na(io)) stop("ambiguous order") order <- ORDER[io] oc <- oldClass(x) class(x) <- NULL labs <- attr(x, "TipLabel") x <- if (is.null(labs)) lapply(x, reorder.phylo, order = order) else lapply(x, .reorder_ape, order = order, index.only = FALSE, nb.tip = length(labs), io = io) if (!is.null(labs)) attr(x, "TipLabel") <- labs class(x) <- oc x } cladewise <- function(x) reorder(x, "cladewise", index.only = TRUE) postorder <- function(x) reorder(x, "postorder", index.only = TRUE) rotateConstr <- function(phy, constraint) { D <- match(phy$tip.label, constraint) n <- Ntip(phy) P <- c(as.list(1:n), prop.part(phy)) e1 <- phy$edge[, 1L] e2 <- phy$edge[, 2L] foo <- function(node) { i <- which(e1 == node) # the edges where 'node' is ancestral desc <- e2[i] # the descendants of 'node' ## below, min() seems to work better than median() which ## seems to work better than mean() which seems to work ## better than sum() o <- order(sapply(desc, function(x) min(D[P[[x]]]))) for (k in o) { j <<- j + 1L neworder[j] <<- i[k] if ((dk <- desc[k]) > n) foo(dk) } } neworder <- integer(Nedge(phy)) j <- 0L foo(n + 1L) phy$edge <- phy$edge[neworder, ] if (!is.null(phy$edge.length)) phy$edge.length <- phy$edge.length[neworder] attr(phy, "order") <- "cladewise" phy } ape/R/skyline.R0000644000176200001440000000632614356737401013042 0ustar liggesusers## skyline.R (2002-09-12) ## Methods to construct skyline objects (data underlying skyline plot) ## Copyright 2002 Korbinian Strimmer ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. skyline <- function(x, ...) UseMethod("skyline") # input: phylogenetic tree skyline.phylo <- function(x, ...) { if (!inherits(x, "phylo")) stop("object \"x\" is not of class \"phylo\"") skyline(coalescent.intervals(x), ...) } # input: coalescent intervals and epsilon skyline.coalescentIntervals <- function(x, epsilon=0, ...) { if (!inherits(x, "coalescentIntervals")) stop("object \"x\" is not of class \"coalescentIntervals\"") if (epsilon < 0) { eps <- find.skyline.epsilon(x, ...) } else eps <- epsilon skyline(collapsed.intervals(x, epsilon=eps), ...) } # input: collapsed intervals skyline.collapsedIntervals <- function(x, old.style=FALSE, ...) { if (!inherits(x, "collapsedIntervals")) stop("object \"x\" is not of class \"collapsedIntervals\"") link <- x$collapsed.interval params <- x$collapsed.interval.count l <- x$lineages w <- x$interval.length b <- choose(l,2) # binomial coefficients sg <- rep(0,params) # sizes of collapsed intervals cg <- rep(0,params) # coalescent events in interval if(old.style) ng <- rep(0,params) # lineages at beginning of an in interval else { ng <- rep(0,params) # sum of classic skp estimates in an interval m.classic <- w*b } for (i in 1:params) { group <- link==i sgr <- w[group] sg[[i]] <- sum(sgr) cg[[i]] <- length(sgr) if(old.style) ng[[i]] <- l[group][[1]] else ng[[i]] <- sum(m.classic[group]) } # generalized skp estimate t <- cumsum(sg) if (old.style) m <- sg*(ng*(ng-cg)/(2.0*cg) ) else m <- ng/cg # log-likelihood logL <- sum(log(b/m[link]) - b/m[link]*w) # AICc corrected log-likelihood K <- x$collapsed.interval.count S <- x$interval.count if (S-K > 1) logL.AICc <- logL - K- K*(K+1)/(S-K-1) else logL.AICc <- NA obj <- list( time=t, interval.length=sg, population.size=m, parameter.count=length(t), epsilon = x$epsilon, logL = logL, logL.AICc = logL.AICc ) class(obj) <- "skyline" return(obj) } # grid search for finding optimal epsilon parameter find.skyline.epsilon <- function(ci, GRID=1000, MINEPS=1e-6, ...) { # Why MINEPS? # Because most "clock-like" trees are not properly # clock-like for a variety of reasons, i.e. the heights # of the tips are not exactly zero. cat("Searching for the optimal epsilon... ") # a grid search is a naive way but still effective of doing this ... size <- ci$interval.count besteps <- ci$total.depth eps <- besteps cli <- collapsed.intervals(ci,eps) skpk <- skyline(cli, ...) bestaicc <- skpk$ logL.AICc params <- skpk$parameter.count delta <- besteps/GRID eps <- eps-delta while(eps > MINEPS) { cli <- collapsed.intervals(ci,eps) skpk <- skyline(cli, ...) aicc <- skpk$ logL.AICc params <- skpk$parameter.count if (aicc > bestaicc && params < size-1) { besteps <- eps bestaicc <- aicc } eps <- eps-delta } cat("epsilon =", besteps, "\n") besteps } ape/R/birthdeath.R0000644000176200001440000001205114204207506013460 0ustar liggesusers## birthdeath.R (2022-02-19) ## Estimation of Speciation and Extinction Rates ## with Birth-Death Models ## birthdeath: standard model ## bd.ext: extended version ## Copyright 2002-2022 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. birthdeath <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') N <- length(phy$tip.label) x <- c(NA, branching.times(phy)) dev <- function(a, r) { if (r < 0 || a > 1) return(1e100) -2 * (lfactorial(N - 1) + (N - 2) * log(r) + r * sum(x[3:N]) + N * log(1 - a) - 2 * sum(log(exp(r * x[2:N]) - a))) } out <- nlm(function(p) dev(p[1], p[2]), c(0.1, 0.2), hessian = TRUE) getSEs <- function(x) { H <- x$hessian inv.hessian <- try(solve(H), silent = TRUE) if (inherits(inv.hessian, "try-error")) return(rep(NA_real_, nrow(H))) sqrt(diag(inv.hessian)) } if (out$estimate[1] < 0) { out <- nlm(function(p) dev(0, p), 0.2, hessian = TRUE) para <- c(0, out$estimate) se <- c(0, getSEs(out)) } else { para <- out$estimate se <- getSEs(out) } Dev <- out$minimum ## 95% profile likelihood CIs ## which: index of the parameter (1 or 2) ## s: sign of the increment (-1 or +1) foo <- function(which, s) { i <- 0.1 if (which == 1) { p <- para[1] + s * i bar <- function() dev(p, para[2]) } else { # which == 2 p <- para[2] + s * i bar <- function() dev(para[1], p) } while (i > 1e-9) { while (bar() < Dev + 3.84) p <- p + s * i p <- p - s * i i <- i / 10 } p } CI <- mapply(foo, c(1, 2, 1, 2), c(-1, -1, 1, 1)) dim(CI) <- c(2, 2) names(para) <- names(se) <- rownames(CI) <- c("d/b", "b-d") colnames(CI) <- c("lo", "up") obj <- list(tree = deparse(substitute(phy)), N = N, dev = Dev, para = para, se = se, CI = CI) class(obj) <- "birthdeath" obj } print.birthdeath <- function(x, ...) { cat("\nEstimation of Speciation and Extinction Rates\n") cat(" with Birth-Death Models\n\n") cat(" Phylogenetic tree:", x$tree, "\n") cat(" Number of tips:", x$N, "\n") cat(" Deviance:", x$dev, "\n") cat(" Log-likelihood:", -(x$dev)/2, "\n") cat(" Parameter estimates:\n") cat(" d / b =", x$para[1], " StdErr =", x$se[1], "\n") cat(" b - d =", x$para[2], " StdErr =", x$se[2], "\n") cat(" (b: speciation rate, d: extinction rate)\n") cat(" Profile likelihood 95% confidence intervals:\n") cat(" d / b: [", x$CI[1, 1], ", ", x$CI[1, 2], "]", "\n", sep = "") cat(" b - d: [", x$CI[2, 1], ", ", x$CI[2, 2], "]", "\n\n", sep = "") } bd.ext <- function(phy, S, conditional = TRUE) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') if (!is.null(names(S))) { if (all(names(S) %in% phy$tip.label)) S <- S[phy$tip.label] else warning('the names of argument "S" and the tip labels did not match: the former were ignored.') } N <- length(S) x <- branching.times(phy) x <- c(x[1], x) trm.br <- phy$edge.length[phy$edge[, 2] <= N] if (conditional) { dev <- function(a, r) { if (a >= 1 || a < 0 || r <= 0) return(1e50) ert <- exp(r * trm.br) zeta <- (ert - 1)/(ert - a) -2 * (lfactorial(N - 1) + (N - 2) * log(r) + N * log(1 - a) + 2 * r * sum(x[2:N]) - 2 * sum(log(exp(r * x[2:N]) - a)) + sum(log(1 - zeta) + (S - 1)*log(zeta))) } } else { dev <- function(a, r) { if (a >= 1 || a < 0 || r <= 0) return(1e50) -2 * (lfactorial(N - 1) + (N - 2) * log(r) + (3 * N) * log(1 - a) + 2 * r * sum(x[2:N]) - 2 * sum(log(exp(r * x[2:N]) - a)) + r * sum(trm.br) + sum((S - 1) * log(exp(r * trm.br) - 1)) - sum((S + 1) * log(exp(r * trm.br) - a))) } } out <- nlm(function(p) dev(p[1], p[2]), c(0.1, 0.2), hessian = TRUE) para <- out$estimate se <- sqrt(diag(solve(out$hessian))) Dev <- out$minimum cat("\nExtended Version of the Birth-Death Models to\n") cat(" Estimate Speciation and Extinction Rates\n\n") cat(" Data: phylogenetic:", deparse(substitute(phy)), "\n") cat(" taxonomic:", deparse(substitute(S)), "\n") cat(" Number of tips:", N, "\n") cat(" Deviance:", Dev, "\n") cat(" Log-likelihood:", -Dev/2, "\n") cat(" Parameter estimates:\n") cat(" d / b =", para[1], " StdErr =", se[1], "\n") cat(" b - d =", para[2], " StdErr =", se[2], "\n") cat(" (b: speciation rate, d: extinction rate)\n") } ape/R/speciesTree.R0000644000176200001440000000164712465112403013624 0ustar liggesusers## speciesTree.R (2013-08-12) ## Species Trees ## Copyright 2010-2013 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. speciesTree <- function(x, FUN = min) ### FUN = min => MAXTREE (Liu et al. 2010) ### FUN = sum => shallowest divergence (Maddison & Knowles 2006) { test.ultra <- which(!unlist(lapply(x, is.ultrametric))) if (length(test.ultra)) stop(paste("the following trees were not ultrametric:\n", paste(test.ultra, collapse = " "))) Ntree <- length(x) D <- lapply(x, cophenetic.phylo) nms <- rownames(D[[1]]) n <- length(nms) M <- matrix(0, n*(n - 1)/2, Ntree) for (i in 1:Ntree) M[, i] <- as.dist(D[[i]][nms, nms]) Y <- apply(M, 1, FUN) attributes(Y) <- list(Size = n, Labels = nms, Diag = FALSE, Upper = FALSE, class = "dist") as.phylo(hclust(Y, "single")) } ape/R/identify.phylo.R0000644000176200001440000000253512465112403014313 0ustar liggesusers## identify.phylo.R (2011-03-23) ## Graphical Identification of Nodes and Tips ## Copyright 2008-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. identify.phylo <- function(x, nodes = TRUE, tips = FALSE, labels = FALSE, quiet = FALSE, ...) { if (!quiet) cat("Click close to a node of the tree...\n") xy <- locator(1) if (is.null(xy)) return(NULL) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) ## rescale the coordinates (especially if the x- and ## y-scales are very different): pin <- par("pin") rescaleX <- pin[1]/max(lastPP$xx) xx <- rescaleX * lastPP$xx rescaleY <- pin[2]/max(lastPP$yy) yy <- rescaleY * lastPP$yy xy$x <- rescaleX * xy$x xy$y <- rescaleY * xy$y ## end of rescaling d <- (xy$x - xx)^2 + (xy$y - yy)^2 # no need to sqrt() NODE <- which.min(d) res <- list() if (NODE <= lastPP$Ntip) { res$tips <- if (labels) x$tip.label[NODE] else NODE return(res) } if (tips) { TIPS <- prop.part(x)[[NODE - lastPP$Ntip]] res$tips <- if (labels) x$tip.label[TIPS] else TIPS } if (nodes) { if (is.null(x$node.label)) labels <- FALSE res$nodes <- if (labels) x$node.label[NODE - lastPP$Ntip] else NODE } res } ape/R/me.R0000644000176200001440000000507414430325341011751 0ustar liggesusers## me.R (2023-05-15) ## Tree Estimation Based on Minimum Evolution Algorithm ## Copyright 2007 Vincent Lefort with modifications by ## Emmanuel Paradis (2008-2019) ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. fastme.bal <- function(X, nni = TRUE, spr = TRUE, tbr = FALSE) { if (tbr) { warning("option 'tbr = TRUE' was ignored: see ?fastme.bal") tbr <- FALSE } if (is.matrix(X)) X <- as.dist(X) N <- as.integer(attr(X, "Size")) if (N < 3) stop("cannot build ME tree with less than 3 observations") nedge <- 2L * N - 3L ans <- .C(me_b, as.double(X), N, 1:N, as.integer(nni), as.integer(spr), as.integer(tbr), integer(nedge), integer(nedge), double(nedge), NAOK = TRUE) labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) labels <- labels[ans[[3]]] obj <- list(edge = cbind(ans[[7]], ans[[8]]), edge.length = ans[[9]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" attr(obj, "order") <- "cladewise" obj } fastme.ols <- function(X, nni = TRUE) { if (is.matrix(X)) X <- as.dist(X) N <- as.integer(attr(X, "Size")) if (N < 3) stop("cannot build ME tree with less than 3 observations") nedge <- 2L * N - 3L ans <- .C(me_o, as.double(X), N, 1:N, as.integer(nni), integer(nedge), integer(nedge), double(nedge), NAOK = TRUE) labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) labels <- labels[ans[[3]]] obj <- list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" attr(obj, "order") <- "cladewise" obj } bionj <- function(X) { if (is.matrix(X)) X <- as.dist(X) if (any(is.na(X))) stop("missing values are not allowed in the distance matrix.\nConsider using bionjs()") if (any(X > 100)) stop("at least one distance was greater than 100") N <- as.integer(attr(X, "Size")) if (N < 3) stop("cannot build a BIONJ tree with less than 3 observations") ans <- .C(C_bionj, as.double(X), N, integer(2 * N - 3), integer(2 * N - 3), double(2*N - 3), NAOK = TRUE) labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } ape/R/CADM.global.R0000644000176200001440000001750514033475446013330 0ustar liggesusers`CADM.global` <- function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, silent=FALSE) { ### Function to test the overall significance of the congruence among ### a group of distance matrices using Kendall's coefficient of concordance W. ### ### copyleft - Pierre Legendre, December 2008 ### ### Reference - ### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance ### matrices: single malt Scotch whiskies revisited. Australian and New Zealand ### Journal of Statistics 46: 615-629. ### ### Parameters of the function -- ### ### Dmat = A text file listing the distance matrices one after the other, with ### or without blank lines. ### Each matrix is in the form of a square distance matrix with 0's ### on the diagonal. ### ### nmat = number of distance matrices in file Dmat. ### ### n = number of objects in each distance matrix. All matrices have same n. ### ### nperm = number of permutations for the tests. ### ### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by ### averaging the two triangular portions. ### = FALSE: analyse asymmetric matrices as they are. ### ### weights = a vector of positive weights for the distance matrices. ### Example: weights = c(1,2,3) ### = NULL (default): all matrices have same weight in calculation of W. ### ### silent = TRUE: informative messages will not be printed, except stopping ### messages. Option useful for simulation work. ### = FALSE: informative messages will be printed. ### ################################################################################ if(nmat < 2) stop("Analysis requested for a single D matrix: CADM is useless") a <- system.time({ ## Check the input file if(ncol(Dmat) != n) stop("Error in the value of 'n' or in the D matrices themselves") nmat2 <- nrow(Dmat)/n if(nmat2 < nmat) # OK if 'nmat' < number of matrices in the input file stop("Number of input D matrices = ",nmat2,"; this value is < nmat") nd <- n*(n-1)/2 if(is.null(weights)) { w <- rep(1,nmat) } else { if(length(weights) != nmat) stop("Incorrect number of values in vector 'weights'") if(length(which(weights < 0)) > 0) stop("Negative weights are not permitted") w <- weights*nmat/sum(weights) if(!silent) cat("Normalized weights =",w,'\n') } ## Are asymmetric D matrices present? asy <- rep(FALSE, nmat) asymm <- FALSE end <- 0 for(k in 1:nmat) { begin <- end+1 end <- end+n D.temp <- Dmat[begin:end,] if(sum(abs(diag(as.matrix(D.temp)))) > 0) stop("Diagonal not 0: matrix #",k," is not a distance matrix") vec1 <- as.vector(as.dist(D.temp)) vec2 <- as.vector(as.dist(t(D.temp))) if(sum(abs((vec1-vec2))) > 0) { if(!silent) cat("Matrix #",k," is asymmetric",'\n') asy[k] <- TRUE asymm <- TRUE } } D1 <- as.list(1:nmat) if(asymm) { if(make.sym) { if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n') } else { nd <- nd*2 if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n') D2 <- as.list(1:nmat) } } else { if(!silent) cat("Analysis of symmetric matrices",'\n') } Y <- rep(NA,nd) ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y' ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test end <- 0 for(k in 1:nmat) { begin <- end+1 end <- end+n D.temp <- as.matrix(Dmat[begin:end,]) vec <- as.vector(as.dist(D.temp)) if(asymm) { if(!make.sym) { ## Analysis carried out on asymmetric matrices: ## The ranks are computed on the whole matrix except the diagonal values. ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]] vec <- c(vec, as.vector(as.dist(t(D.temp)))) diag(D.temp) <- NA D.temp2 <- rank(D.temp) dim(D.temp2) <- dim(D.temp) # Correction E. Paradis, 08may17 diag(D.temp2) <- 0 # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n') # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n') # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n') D1[[k]] <- as.matrix(as.dist(D.temp2)) D2[[k]] <- as.matrix(as.dist(t(D.temp2))) } else { ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]] vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2 D.temp2 <- (D.temp + t(D.temp)) / 2 D.temp2 <- as.dist(D.temp2) D.temp2[] <- rank(D.temp2) D.temp2 <- as.matrix(D.temp2) D1[[k]] <- D.temp2 } } else { ## Symmetric matrices are stored in D1[[k]] D.temp2 <- as.dist(D.temp) D.temp2[] <- rank(D.temp2) D1[[k]] <- as.matrix(D.temp2) } Y <- cbind(Y, vec) } Y <- as.matrix(Y[,-1]) colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.") ## Begin calculations for global test ## Compute the reference values of the statistics: W and Chi2 ## Transform the distances to ranks, by column Rmat <- apply(Y,2,rank) ## Correction factors for tied ranks (eq. 3.3) t.ranks <- apply(Rmat, 2, function(x) summary(as.factor(x), maxsum=nd)) TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x)))) # if(!silent) cat("TT = ",TT,'\n') ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a) ## The ranks are weighted during the sum by the vector of matrix weights 'w' ## Eq. 1b cannot be used with weights; see formula for W below sumRanks <- as.vector(Rmat%*%w) S <- (nd-1)*var(sumRanks) ## Compute Kendall's W (eq. 2a) ## Eq. 2b cannot be used with weights ## because the sum of all ranks is not equal to m*n*(n+1)/2 in that case W <- (12*S)/(((nmat^2)*((nd^3)-nd))-(nmat*TT)) ## Calculate Friedman's Chi-square (Kendall W paper, 2005, eq. 3.4) Chi2 <- nmat*(nd-1)*W ## Test the Chi2 statistic by permutation counter <- 1 for(j in 1:nperm) { # Each matrix is permuted independently # There is no need to permute the last matrix Rmat.perm <- rep(NA,nd) ## if(asymm & !make.sym) { ## For asymmetric matrices: permute the values within each triangular ## portion, stored as square matrices in D1[[]] and D2[[]] for(k in 1:(nmat-1)) { order <- sample(n) vec <- as.vector(as.dist(D1[[k]][order,order])) vec <- c(vec, as.vector(as.dist(D2[[k]][order,order]))) Rmat.perm <- cbind(Rmat.perm, vec) } vec <- as.vector(as.dist(D1[[nmat]])) vec <- c(vec, as.vector(as.dist(D2[[nmat]]))) Rmat.perm <- cbind(Rmat.perm, vec) } else { for(k in 1:(nmat-1)) { order <- sample(n) vec <- as.vector(as.dist(D1[[k]][order,order])) Rmat.perm <- cbind(Rmat.perm, vec) } vec <- as.vector(as.dist(D1[[nmat]])) Rmat.perm <- cbind(Rmat.perm, vec) } # Remove the first column of Rmat.perm containing NA # The test is based on the comparison of S and S.perm instead of the comparison of # Chi2 and Chi2.perm: it is faster that way. # S, W, and Chi2 are equivalent statistics for permutation tests. Rmat.perm <- as.matrix(Rmat.perm[,-1]) S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w)) if(S.perm >= S) counter <- counter+1 } prob.perm.gr <- counter/(nperm+1) table <- rbind(W, Chi2, prob.perm.gr) colnames(table) <- "Statistics" rownames(table) <- c("W", "Chi2", "Prob.perm") }) a[3] <- sprintf("%2f",a[3]) if(!silent) cat("\nTime to compute global test =",a[3]," sec",'\n') # # if(asymm & !make.sym) { out <- list(congruence_analysis=table, D1=D1, D2=D2) # } else { out <- list(congruence_analysis=table) # } # out$nperm <- nperm class(out) <- "CADM.global" out } ape/R/which.edge.R0000644000176200001440000000465313165207203013357 0ustar liggesusers## which.edge.R (2017-10-04) ## Identifies Edges of a Tree ## Copyright 2004-2017 Emmanuel Paradis, 2017 Joseph W. Brown, 2017 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. getMRCA <- function(phy, tip) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') if (length(tip) < 2) return(NULL) Ntip <- length(phy$tip.label) ## do we need to check the value(s) in 'tip'? ##if (any(tip > Ntip + phy$Nnode) || any(tip < 1)) ## stop("value(s) out of range in 'tip'") ## rootnd <- Ntip + 1L pars <- integer(phy$Nnode) # worst case assignment, usually far too long tnd <- if (is.character(tip)) match(tip, phy$tip.label) else tip done_v <- logical(Ntip + phy$Nnode) ## build a lookup table to get parents faster pvec <- integer(Ntip + phy$Nnode) pvec[phy$edge[, 2]] <- phy$edge[, 1] ## get entire lineage for first tip nd <- tnd[1] for (k in 1:phy$Nnode) { nd <- pvec[nd] pars[k] <- nd if (nd == rootnd) break } pars <- pars[1:k] # delete the rest mrcind <- integer(max(pars)) mrcind[pars] <- 1:k mrcand <- pars[1] ## traverse lineages for remaining tips, stop if hit common ancestor for (i in 2:length(tnd)) { cnd <- tnd[i] done <- done_v[cnd] while(!done){ done_v[cnd] <- TRUE cpar <- pvec[cnd] # get immediate parent done <- done_v[cpar] # early exit if TRUE if (cpar %in% pars) { if (cpar == rootnd) return(rootnd) # early exit if(mrcind[cpar] > mrcind[mrcand]) mrcand <- cpar done_v[cpar] <- TRUE done <- TRUE } cnd <- cpar # keep going! } } mrcand } which.edge <- function(phy, group) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') if (is.character(group)) group <- which(phy$tip.label %in% group) if (length(group) == 1) return(match(group, phy$edge[, 2])) n <- length(phy$tip.label) sn <- .Call(seq_root2tip, phy$edge, n, phy$Nnode)[group] i <- 2L repeat { x <- unique(unlist(lapply(sn, "[", i))) if (length(x) != 1) break i <- i + 1L } d <- -(1:(i - 1L)) x <- unique(unlist(lapply(sn, function(x) x[d]))) match(x, phy$edge[, 2L]) } ape/R/ladderize.R0000644000176200001440000000334314371146223013314 0ustar liggesusers## ladderize.R (2023-02-09) ## Ladderize a Tree ## Copyright 2007-2023 Emmanuel Paradis, 2022 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ladderize <- function(phy, right = TRUE) { desc_fun <- function(x) { parent <- x[, 1] children <- x[, 2] res <- vector("list", max(x)) for (i in seq_along(parent)) res[[parent[i]]] <- c(res[[parent[i]]], children[i]) res } if (!is.null(phy$edge.length)) { el <- numeric(max(phy$edge)) el[phy$edge[, 2]] <- phy$edge.length } nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode nb.edge <- dim(phy$edge)[1] phy <- reorder(phy, "postorder") N <- .C(node_depth, as.integer(nb.tip), as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]), as.integer(nb.edge), double(nb.tip + nb.node), 1L)[[5]] ii <- order(x <- phy$edge[, 1], y <- N[phy$edge[, 2]], decreasing = right) desc <- desc_fun(phy$edge[ii, ]) tmp <- integer(nb.node) new_anc <- integer(nb.node) new_anc[1] <- tmp[1] <- nb.tip + 1L k <- nb.node pos <- 1L while (pos > 0L && k > 0) { current <- tmp[pos] new_anc[k] <- current k <- k - 1L dc <- desc[[current]] ind <- dc > nb.tip if (any(ind)) { l <- sum(ind) tmp[pos -1L + seq_len(l)] <- dc[ind] pos <- pos + l - 1L } else { pos <- pos - 1L } } edge <- cbind(rep(new_anc, lengths(desc[new_anc])), unlist(desc[new_anc])) phy$edge <- edge if (!is.null(phy$edge.length)) phy$edge.length <- el[edge[, 2L]] attr(phy, "order") <- "postorder" reorder(phy) } ape/R/Cheverud.R0000644000176200001440000000603414533611140013111 0ustar liggesusers## Cheverud.R (2004-10-29) ## Cheverud's 1985 Autoregression Model ## Copyright 2004 Julien Dutheil ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # This function is adapted from a MatLab code from # Rholf, F. J. (2001) Comparative Methods for the Analysis of Continuous Variables: Geometric Interpretations. # Evolution 55(11): 2143-2160 compar.cheverud <- function(y, W, tolerance=1e-6, gold.tol=1e-4) { ## fix by Michael Phelan diag(W) <- 0 # ensure diagonal is zero ## end of fix y <- as.matrix(y) if(dim(y)[2] != 1) stop("Error: y must be a single column vector.") D <- solve(diag(apply(t(W),2,sum))) Wnorm <- D %*% W #Row normalize W matrix n <- dim(y)[1] m <- dim(y)[2] y <- y-matrix(rep(1, n)) %*% apply(y,2,mean) # Deviations from mean Wy <- Wnorm %*% y Wlam <- eigen(Wnorm)$values # eigenvalues of W # Find distinct eigenvalues sorted <- sort(Wlam) # Check real: for (ii in 1:n) { if(abs(Im(sorted[ii])) > 1e-12) { warning(paste("Complex eigenvalue coerced to real:", Im(sorted[ii]))) } sorted[ii] <- Re(sorted[ii]) # Remove imaginary part } sorted <- as.double(sorted) Distinct <- numeric(0) Distinct[1] <- -Inf Distinct[2] <- sorted[1] nDistinct <- 2 for(ii in 2:n) { if(sorted[ii] - Distinct[nDistinct] > tolerance) { nDistinct <- nDistinct + 1 Distinct[nDistinct] <- sorted[ii] } } # Search for minimum of LL likelihood <- function(rhohat) { DetProd <- 1 for(j in 1:n) { prod <- 1 - rhohat * Wlam[j] DetProd <- DetProd * prod } absValDet <- abs(DetProd) #[abs to allow rho > 1] logDet <- log(absValDet) LL <- log(t(y) %*% y - 2 * rhohat * t(y) %*% Wy + rhohat * rhohat * t(Wy) %*% Wy) - logDet*2/n return(LL) } GoldenSearch <- function(ax, cx) { # Golden section search over the interval ax to cx # Return rhohat and likelihood value. r <- 0.61803399 x0 <- ax x3 <- cx bx <- (ax + cx)/2 if(abs(cx - bx) > abs(bx - ax)) { x1 <- bx x2 <- bx + (1-r)*(cx - bx) } else { x2 <- bx x1 <- bx - (1-r)*(bx - ax) } f1 <- likelihood(x1) f2 <- likelihood(x2) while(abs(x3 - x0) > gold.tol*(abs(x1) + abs(x2))) { if(f2 < f1) { x0 <- x1 x1 <- x2 x2 <- r * x1 + (1 - r) * x3 f1 <- f2 f2 <- likelihood(x2) } else { x3 <- x2 x2 <- x1 x1 <- r * x2 + (1 - r) * x0 f2 <- f1 f1 <- likelihood(x1) } } if(f1 < f2) { likelihood <- f1 xmin <- x1 } else { likelihood <- f2 xmin <- x2 } return(list(rho=xmin, LL=likelihood)) } LL <- Inf for(ii in 2:(nDistinct -1)) {# Search between pairs of roots # [ constrain do not use positive roots < 1] ax <- 1/Distinct[ii] cx <- 1/Distinct[ii+1] GS <- GoldenSearch(ax, cx) if(GS$LL < LL) { LL <- GS$LL rho <- GS$rho } } # Compute residuals: res <- y - rho * Wy return(list(rhohat=rho, Wnorm=Wnorm, residuals=res)) } ape/R/dbd.R0000644000176200001440000000732612465140136012106 0ustar liggesusers## dbd.R (2015-02-06) ## Probability Density Under Birth--Death Models ## Copyright 2012-2015 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. dyule <- function(x, lambda = 0.1, t = 1, log = FALSE) { tmp <- exp(-lambda * t) res <- if (log) log(tmp) + (x - 1) * log(1 - tmp) else tmp * (1 - tmp)^(x - 1) out.of.range <- x <= 0 if (any(out.of.range)) res[out.of.range] <- if (log) -Inf else 0 res } dbd <- function(x, lambda, mu, t, conditional = FALSE, log = FALSE) { if (length(lambda) > 1) { lambda <- lambda[1] warning("only the first value of 'lambda' was considered") } if (length(mu) > 1) { mu <- mu[1] warning("only the first value of 'mu' was considered") } if (mu == 0) return(dyule(x, lambda, t, log)) ## for the unconditional case, we have to consider x=0 separately: if (!conditional) { zero <- x == 0 out.of.range <- x < 0 } else { out.of.range <- x <= 0 } res <- numeric(length(x)) ## the situation were speciation and extinction probabilities are equal: if (lambda == mu) { tmp <- lambda * t eta <- tmp/(1 + tmp) if (conditional) { res[] <- if (log) log(1 - eta) + (x - 1) * log(eta) else (1 - eta) * eta^(x - 1) } else { # the unconditional case: if (length(zero)) { res[zero] <- eta res[!zero] <- (1 - eta)^2 * eta^(x[!zero] - 1) } else res[] <- (1 - eta)^2 * eta^(x - 1) } } else { # the general case with lambda != mu ## this expression is common to the conditional and unconditional cases: Ent <- exp((lambda - mu) * t) if (conditional) { if (log) { res[] <- log(lambda - mu) - log(lambda * Ent - mu) + (x - 1) * (log(lambda) + log(Ent - 1) - log(lambda * Ent - mu)) } else { eta <- lambda * (Ent - 1)/(lambda * Ent - mu) res[] <- (1 - eta) * eta^(x - 1) } } else { # finally, the unconditional case: eta <- lambda * (Ent - 1)/(lambda * Ent - mu) if (length(zero)) { res[zero] <- eta * mu / lambda res[!zero] <- (1 - mu * eta / lambda) * (1 - eta) * eta^(x[!zero] - 1) } else res[] <- (1 - mu * eta / lambda) * (1 - eta) * eta^(x - 1) } } if (any(out.of.range)) res[out.of.range] <- if (log) -Inf else 0 res } dbdTime <- function(x, birth, death, t, conditional = FALSE, BIRTH = NULL, DEATH = NULL, fast = FALSE) { if (length(t) > 1) { t <- t[1] warning("only the first value of 't' was considered") } if (conditional) { PrNt <- function(t, T, x) { tmp <- exp(-RHO(t, T)) Wt <- tmp * (1 + INT(t)) out <- (1/Wt)*(1 - 1/Wt)^(x - 1) zero <- x == 0 if (length(zero)) out[zero] <- 0 out } } else { # the unconditional case: PrNt <- function(t, T, x) { tmp <- exp(-RHO(t, T)) Wt <- tmp * (1 + INT(t)) out <- numeric(length(x)) zero <- x == 0 if (length(zero)) { out[zero] <- 1 - tmp/Wt out[!zero] <- (tmp/Wt^2)*(1 - 1/Wt)^(x[!zero] - 1) } else out[] <- (tmp/Wt^2)*(1 - 1/Wt)^(x - 1) out } } case <- .getCase(birth, death, BIRTH, DEATH) ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case = case, fast = fast) RHO <- ff[[1]] INT <- ff[[2]] environment(RHO) <- environment(INT) <- environment() Tmax <- t PrNt(0, t, x) } ape/R/node.dating.R0000644000176200001440000002655514533612127013556 0ustar liggesusers## node.dating.R (2021-03-03) ## This file is part of the R-package `ape'. ## See the file COPYING in the package ape available at cran.r-project.org for licensing issues. # Copyright (c) 2016, Bradley R. Jones, BC Centre for Excellence in HIV/AIDS # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the BC Centre for Excellence in HIV/AIDS nor the # names of its contributors may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL The BC Centre for Excellence in HIV/AIDS BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # Estimate the mutation rate and node dates based on tip dates. # # Felsenstein, Joseph. "Evolutionary trees from DNA sequences: A maximum # likelihood approach." Journal of Molecular Evolution 17 (1981):368-376. # # Rambaut, Andrew. "Estimating the rate of molecular evolution: incorporating # non-contemporaneous sequences into maximum likelihood phylogenies." # Bioinformatics 16.4 (2000): 395-399. # Estimate the mutation rate of a phylogenetic tree from the tip dates using # linear regression. This model assumes that the tree follows a molecular # clock. # # t: rooted tree with edge lengths equal to genetic distance # # tip.dates: vector of dates for the tips, in the same order as t$tip.label. # Tip dates can be censored with NA values # # p: p-value cutoff for failed regression (default=0.05) # # returns the mutation rate as a double estimate.mu <- function(t, node.dates, p.tol=0.05) { # fit linear model g <- glm(node.depth.edgelength(t)[1:length(node.dates)] ~ node.dates, na.action=na.omit) p <- anova(g, test="Chisq")[2,5] # test fit if (p > p.tol) { warning(paste("Cannot reject null hypothesis (p=", p, ")")) } coef(g)[[2]] } # Estimate the dates of the internal nodes of a phylogenetic tree. # # t: rooted tree with edge lengths equal to genetic distance # # node.dates: either a vector of dates for the tips, in the same order as # t$tip.label; or a vector of dates to initalize each node # # mu: mutation rate, either a vector of size one for a strict molecular clock # or a vector with a local molecular clock along each edge # # min.date: the minimum date that a node can have (needed for optimize()). The # default is -.Machine$double.xmax # # show.steps: set to print the log likelihood every show.steps. Set to 0 to # supress output # # opt.tol: tolerance for optimization precision. By default, the optimize() # function uses a tolerance of .Machine$double.eps^0.25 (see ?optimize) # # lik.tol: tolerance for likelihood comparison. estimate.dates will stop when # the log likelihood between successive trees is less than like.tol. If # 0 will stop after nsteps steps. # # nsteps: the maximum number of steps to run. If 0 will run until the log # likelihood between successive runs is less than lik.tol. The default # is 1000. # # is.binary: if the phylogentic tree is binary, setting is.binary to TRUE, will # run a optimization method # # If lik.tol and nsteps are both 0 then estimate.dates will only run the inital # step. # # returns a vector of the estimated dates of the tips and internal nodes estimate.dates <- function(t, node.dates, mu = estimate.mu(t, node.dates), min.date = -.Machine$double.xmax, show.steps = 0, opt.tol = 1e-8, nsteps = 1000, lik.tol = 0, is.binary = is.binary.phylo(t)) { # check parameters if (any(mu < 0)) stop(paste("mu (", mu, ") less than 0", sep="")) # init vars mu <- if (length(mu) == 1) rep(mu, length(t$edge.length)) else mu n.tips <- length(t$tip.label) dates <- if (length(node.dates) == n.tips) { c(node.dates, rep(NA, t$Nnode)) } else if (length(node.dates) == n.tips + t$Nnode) { node.dates } else { stop(paste0("node.dates must be a vector with length equal to the number of tips or equal to the number of nodes plus the number of tips")) } lik.sens <- if (lik.tol == 0) opt.tol else lik.tol # Don't count initial step if all values are seeded iter.step <- if (any(is.na(dates))) 0 else 1 children <- lapply(1:t$Nnode, function(x) { which(t$edge[,1] == x + n.tips) }) parent <- lapply(1:t$Nnode, function(x) { which(t$edge[,2] == x + n.tips) }) # to process children before parents nodes <- c(1) for (i in 1:t$Nnode) { to.add <- t$edge[children[[nodes[i]]], 2] - n.tips nodes <- c(nodes, to.add[to.add > 0]) i <- i + 1 } nodes <- rev(nodes) # calculate likelihood functions scale.lik <- sum(-lgamma(t$edge.length+1)+(t$edge.length+1)*log(mu)) calc.Like <- function(ch.node, ch.edge, x) { tim <- ch.node - x t$edge.length[ch.edge]*log(tim)-mu[ch.edge]*tim } opt.fun <- function(x, ch, p, ch.edge, p.edge, use.parent=T) { sum(if (!use.parent || length(dates[p]) == 0 || is.na(dates[p])) { calc.Like(dates[ch], ch.edge, x) } else { calc.Like(c(dates[ch], x), c(ch.edge, p.edge), c(rep(x, length(dates[ch])), dates[p])) }) } solve.lin <- function(bounds, ch.times, ch.edge) { y <- (mu[ch.edge] * ch.times - t$edge.length[ch.edge]) / mu[ch.edge] x <- c(bounds[1] + opt.tol, bounds[2] - opt.tol) if (bounds[1] < y && y < bounds[2]) x <- c(x, y) x[which.max(unlist(lapply(x, function(y) sum(calc.Like(ch.times, ch.edge, y)))))] } solve.poly2 <- function(bounds, a, b, c.0) { x <- c(bounds[1] + opt.tol, bounds[2] - opt.tol) if (b ^ 2 - 4 * a * c.0 >= 0) { if (a == 0) { y <- -c.0 / b if (bounds[1] < y && y < bounds[2]) x <- c(x, y) } else { x.1 <- (-b + sqrt(b ^ 2 - 4 * a * c.0)) / (2 * a) x.2 <- (-b - sqrt(b ^ 2 - 4 * a * c.0)) / (2 * a) if (bounds[1] < x.1 && x.1 < bounds[2]) x <- c(x, x.1) if (bounds[1] < x.2 && x.2 < bounds[2]) x <- c(x, x.2) } } x } solve.bin <- function(bounds, ch.times, ch.edge) { ch.edge.length <- t$edge.length[ch.edge] a <- sum(mu[ch.edge]) b <- ch.edge.length[1] + ch.edge.length[2] - a * (ch.times[1] + ch.times[2]) c.0 <- a*ch.times[1] * ch.times[2] - ch.times[1] * ch.edge.length[2] - ch.times[2] * ch.edge.length[1] x <- solve.poly2(bounds, a, b, c.0) x[which.max(unlist(lapply(x, function(y) sum(calc.Like(ch.times, ch.edge, y)))))] } solve.bin2 <- function(bounds, ch.times, ch.edge, par.time, par.edge) { ch.edge.length <- t$edge.length[ch.edge] par.edge.length <- t$edge.length[par.edge] a <- mu[ch.edge] - mu[par.edge] b <- ch.edge.length + par.edge.length - a * (ch.times + par.time) c.0 <- a*ch.times * par.time - ch.times * par.edge.length - par.time * ch.edge.length cat(sprintf("a: %f, b: %f, c: %f\n", a, b, c.0)) x <- solve.poly2(bounds, a, b, c.0) x[which.max(unlist(lapply(x, function(y) sum(calc.Like(c(ch.times, y), c(ch.edge, par.edge), c(y, par.time))))))] } solve.poly3 <- function(bounds, a, b, c.0, d) { x <- c(bounds[1] + opt.tol, bounds[2] - opt.tol) if (a == 0) x <- c(x, solve.poly2(bounds, b, c.0, d)) else { delta.0 <- complex(real=b^2 - 3 * a * c.0) delta.1 <- complex(real=2 * b^3 - 9 * a * b * c.0 + 27 * a^2 * d) C <- ((delta.1 + sqrt(delta.1^2 - 4 * delta.0^3)) / 2)^(1/3) x.1 <- Re(-1 / (3 * a) * (b + complex(real=1) * C + delta.0 / (complex(real=1) * C))) x.2 <- Re(-1 / (3 * a) * (b + complex(real=-1/2, imaginary=sqrt(3)/2) * C + delta.0 / (complex(real=-1/2, imaginary=sqrt(3)/2) * C))) x.3 <- Re(-1 / (3 * a) * (b + complex(real=-1/2, imaginary=-sqrt(3)/2) * C + delta.0 / (complex(real=-1/2, imaginary=-sqrt(3)/2) * C))) if (bounds[1] < x.1 && x.1 < bounds[2]) x <- c(x, x.1) if (bounds[1] < x.2 && x.2 < bounds[2]) x <- c(x, x.2) if (bounds[1] < x.3 && x.3 < bounds[2]) x <- c(x, x.3) } x } solve.cube <- function(bounds, ch.times, ch.edge, par.time, par.edge) { ch.edge.length <- t$edge.length[ch.edge] par.edge.length <- t$edge.length[par.edge] a <- sum(mu[ch.edge]) - mu[par.edge] b <- sum(ch.edge.length) + par.edge.length - a * (sum(ch.times) + par.time) c.0 <- a * (ch.times[1] * ch.times[2] + ch.times[1] * par.time + ch.times[2] * par.time) - (ch.times[1] + ch.times[2]) * par.edge.length - (ch.times[1] + par.time) * ch.edge.length[2] - (ch.times[2] + par.time) * ch.edge.length[1] d <- ch.edge.length[1] * ch.times[2] * par.time + ch.edge.length[2] * ch.times[1] * par.time + par.edge.length * ch.times[1] * ch.times[2] - a * prod(ch.times) * par.time x <- solve.poly3(bounds, a, b, c.0, d) x[which.max(unlist(lapply(x, function(y) sum(calc.Like(c(ch.times, y), c(ch.edge, par.edge), c(y, y, par.time))))))] } estimate <- function(node) { ch.edge <- children[[node]] ch <- t$edge[ch.edge, 2] p.edge <- parent[[node]] p <- t$edge[p.edge, 1] m <- if (length(p) == 0 || is.na(dates[p])) { min.date } else { dates[p] } if (is.binary) { if (m + 2 * opt.tol >= min(dates[ch])) { mean(c(m, min(dates[ch]))) } else if (length(dates[p]) == 0 || is.na(dates[p])) { if (length(ch.edge) == 2) solve.bin(c(m, min(dates[ch])), dates[ch], ch.edge) else solve.lin(c(m, min(dates[ch])), dates[ch], ch.edge) } else { if (length(ch.edge) == 2) solve.cube(c(m, min(dates[ch])), dates[ch], ch.edge, dates[p], p.edge) else solve.bin2(c(m, min(dates[ch])), dates[ch], ch.edge, dates[p], p.edge) } } else { res <- suppressWarnings(optimize(opt.fun, c(m, min(dates[ch])), ch, p, ch.edge, p.edge, maximum=T)) res$maximum } } # iterate to estimate dates lik <- NA repeat { for (n in nodes) { dates[n + n.tips] <- estimate(n) } all.lik <- calc.Like(dates[t$edge[,2]], 1:length(t$edge.length), dates[t$edge[,1]]) + scale.lik new.lik <- sum(all.lik) if (show.steps > 0 && ((iter.step %% show.steps) == 0)) { cat(paste("Step: ", iter.step, ", Likelihood: ", new.lik, "\n", sep="")) } if ((lik.tol > 0 && (!is.na(lik) && (is.infinite(lik) || is.infinite(new.lik) || new.lik - lik < lik.tol))) || (nsteps > 0 && iter.step >= nsteps) || (lik.tol <= 0 && nsteps <= 0)) { if (is.infinite(lik) || is.infinite(new.lik)) { warning("Likelihood infinite") } else if (!is.na(lik) && new.lik + lik.sens < lik) { warning("Likelihood less than previous estimate") } break } else { lik <- new.lik } iter.step <- iter.step + 1 } if (show.steps > 0) { cat(paste("Step: ", iter.step, ", Likelihood: ", new.lik, "\n", sep="")) } dates } ape/R/subtreeplot.R0000644000176200001440000000306513112040066013710 0ustar liggesusers## subtreeplot.R (2017-05-26) ## Zoom on a Portion of a Phylogeny by Successive Clicks ## Copyright 2008 Damien de Vienne ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. subtreeplot<-function(x, wait=FALSE, ...) { sub<-subtrees(x, wait=wait) y<-NULL plot.default(0, type="n",axes=FALSE, ann=FALSE) repeat { split.screen(c(1,2)) screen(2) if (is.null(y)) plot(x,...) else plot(y,sub=paste("Node :", click),...) screen(1) plot(x,sub="Complete tree",main="Type ESC or right click to exit", cex.main=0.9, ...) N.tip<-Ntip(x) N.node<-Nnode(x) # 5/24/17 changed by Klaus # coor<-plotPhyloCoor(x) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) tips<-x$tip.label nodes<-x$node.label if (is.null(x$node.label)) nodes<-(N.tip+1):(N.tip+N.node) labs<-c(rep("",N.tip), nodes) #click<-identify(coor[,1], coor[,2], labels=labs, n=1) click<-identify(lastPP$xx, lastPP$yy, labels=labs, n=1) if (length(click) == 0) {return(y)} if (click > N.tip) { close.screen(c(1,2),all.screens = TRUE) split.screen(c(1,2)) screen(1) #selects the screen to plot in plot(x, sub="Complete tree", ...) # plots x in screen 1 (left) screen(2) for (i in 1:length(sub)) if (sub[[i]]$name==click) break y<-sub[[i]] } else cat("this is a tip, you have to choose a node\n") } on.exit(return(y)) } ape/R/CADM.post.R0000644000176200001440000002401014033475456013043 0ustar liggesusers`CADM.post` <- function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, mult="holm", mantel=FALSE, silent=FALSE) { ### Function to carry out a posteriori tests of the contribution of individual ### matrices to the congruence of a group of distance matrices. ### ### copyleft - Pierre Legendre, December 2008 ### ### Reference - ### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance ### matrices: single malt Scotch whiskies revisited. Australian and New Zealand ### Journal of Statistics 46: 615-629. ### ### Parameters of the function -- ### ### Dmat = A text file listing the distance matrices one after the other, with ### or without blank lines. ### Each matrix is in the form of a square distance matrix with 0's ### on the diagonal. ### ### nmat = number of distance matrices in file Dmat. ### ### n = number of objects in each distance matrix. All matrices have same n. ### ### nperm = number of permutations for the tests. ### ### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by ### averaging the two triangular portions. ### = FALSE: analyse asymmetric matrices as they are. ### ### weights = a vector of positive weights for the distance matrices. ### Example: weights = c(1,2,3) ### = NULL (default): all matrices have same weight in calculation of W. ### ### mult = method for correcting P-values due to multiple testing. The methods ### are "holm" (default), "sidak", and "bonferroni". The Bonferroni ### correction is overly conservative; it is not recommended. It is ### included to allow comparisons with the other methods. ### ### mantel = TRUE: Mantel statistics are computed from ranked distances, ### as well as permutational P-values. ### = FALSE (default): Mantel statistics and tests are not computed. ### ### silent = TRUE: informative messages will not be printed, except stopping ### messages. Option useful for simulation work. ### = FALSE: informative messages will be printed. ### ################################################################################ mult <- match.arg(mult, c("sidak", "holm", "bonferroni")) if(nmat < 2) stop("Analysis requested for a single D matrix: CADM is useless") a <- system.time({ ## Check the input file if(ncol(Dmat) != n) stop("Error in the value of 'n' or in the D matrices themselves") nmat2 <- nrow(Dmat)/n if(nmat2 < nmat) # OK if 'nmat' < number of matrices in the input file stop("Number of input D matrices = ",nmat2,"; this value is < nmat") nd <- n*(n-1)/2 if(is.null(weights)) { w <- rep(1,nmat) } else { if(length(weights) != nmat) stop("Incorrect number of values in vector 'weights'") if(length(which(weights < 0)) > 0) stop("Negative weights are not permitted") w <- weights*nmat/sum(weights) if(!silent) cat("Normalized weights =",w,'\n') } ## Are asymmetric D matrices present? asy <- rep(FALSE, nmat) asymm <- FALSE end <- 0 for(k in 1:nmat) { begin <- end+1 end <- end+n D.temp <- Dmat[begin:end,] if(sum(abs(diag(as.matrix(D.temp)))) > 0) stop("Diagonal not 0: matrix #",k," is not a distance matrix") vec1 <- as.vector(as.dist(D.temp)) vec2 <- as.vector(as.dist(t(D.temp))) if(sum(abs((vec1-vec2))) > 0) { if(!silent) cat("Matrix #",k," is asymmetric",'\n') asy[k] <- TRUE asymm <- TRUE } } D1 <- as.list(1:nmat) if(asymm) { if(make.sym) { if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n') } else { nd <- nd*2 if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n') D2 <- as.list(1:nmat) } } else { if(!silent) cat("Analysis of symmetric matrices",'\n') } Y <- rep(NA,nd) ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y' ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test end <- 0 for(k in 1:nmat) { begin <- end+1 end <- end+n D.temp <- as.matrix(Dmat[begin:end,]) vec <- as.vector(as.dist(D.temp)) if(asymm) { if(!make.sym) { ## Analysis carried out on asymmetric matrices: ## The ranks are computed on the whole matrix except the diagonal values. ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]] vec <- c(vec, as.vector(as.dist(t(D.temp)))) diag(D.temp) <- NA D.temp2 <- rank(D.temp) dim(D.temp2) <- dim(D.temp) # Correction E. Paradis, 08may17 diag(D.temp2) <- 0 # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n') # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n') # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n') D1[[k]] <- as.matrix(as.dist(D.temp2)) D2[[k]] <- as.matrix(as.dist(t(D.temp2))) } else { ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]] vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2 D.temp2 <- (D.temp + t(D.temp)) / 2 D.temp2 <- as.dist(D.temp2) D.temp2[] <- rank(D.temp2) D.temp2 <- as.matrix(D.temp2) D1[[k]] <- D.temp2 } } else { ## Symmetric matrices are stored in D1[[k]] D.temp2 <- as.dist(D.temp) D.temp2[] <- rank(D.temp2) D1[[k]] <- as.matrix(D.temp2) } Y <- cbind(Y, vec) } Y <- as.matrix(Y[,-1]) colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.") ## Begin calculations: compute reference value of S ## Transform the distances to ranks, by column Rmat <- apply(Y,2,rank) ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a) ## The ranks are weighted during the sum by the vector of matrix weights 'w' sumRanks <- as.vector(Rmat%*%w) S <- (nd-1)*var(sumRanks) ## Begin a posteriori tests of individual matrices ## Statistics displayed for each matrix: "Mantel.mean" and "W.per.matrix" ## Calculate the mean of the Mantel correlations on ranks for each matrix Mantel.cor <- cor(Rmat) diag(Mantel.cor) <- 0 spear.mean <- as.vector(Mantel.cor%*%w)/(nmat-1) ## Calculate Kendall's W for each variable ## W.var <- ((nmat-1)*spear.mean+1)/nmat ## P-value for each matrix: test of S, permuting values in matrix[[k]] only ## as in program CADM.f (2004) ## Initialize the counters counter <- rep(1,nmat) ## Test each matrix 'k' in turn for(k in 1:nmat) { ## Create a new Rmat table where the permuted column has been removed Rmat.mod <- Rmat[,-k] ## Permutation loop: string out permuted matrix 'k' only for(j in 1:nperm) { order <- sample(n) if(asymm & !make.sym) { ## For asymmetric matrices: permute the values within each triangular ## portion, stored as square matrices in D1[[]] and D2[[]] vec <- as.vector(as.dist(D1[[k]][order,order])) vec <- c(vec, as.vector(as.dist(D2[[k]][order,order]))) } else { vec <- as.vector(as.dist(D1[[k]][order,order])) } Rmat.perm <- cbind(Rmat.mod, vec) S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w)) if(S.perm >= S) counter[k] <- counter[k]+1 } } ## Calculate P-values counter <- counter/(nperm+1) ## Correction to P-values for multiple testing if(mult == "sidak") { vec.corr = NA for(i in 1:nmat) vec.corr = c(vec.corr, (1-(1-counter[i])^nmat)) vec.corr <- vec.corr[-1] } if(mult == "holm") vec.corr <- p.adjust(counter, method="holm") if(mult == "bonferroni") vec.corr <- p.adjust(counter, method="bonferroni") ## Create a data frame containing the results # table <- rbind(spear.mean, W.var, counter, vec.corr) # rownames(table) <- c("Mantel.mean", "W.per.matrix", "Prob", "Corrected prob") table <- rbind(spear.mean, counter, vec.corr) rownames(table) <- c("Mantel.mean", "Prob", "Corrected.prob") colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Dmat.") ## Mantel tests if(mantel) { diag(Mantel.cor) <- 1 rownames(Mantel.cor) <- colnames(table) colnames(Mantel.cor) <- colnames(table) Mantel.prob <- matrix(1,nmat,nmat) rownames(Mantel.prob) <- colnames(table) colnames(Mantel.prob) <- colnames(table) for(j in 1:nperm) { # Each matrix is permuted independently # There is no need to permute the last matrix Rmat.perm <- rep(NA,nd) ## if(asymm & !make.sym) { ## For asymmetric matrices: permute the values within each triangular ## portion, stored as square matrices in D1[[]] and D2[[]] for(k in 1:(nmat-1)) { order <- sample(n) vec <- as.vector(as.dist(D1[[k]][order,order])) vec <- c(vec, as.vector(as.dist(D2[[k]][order,order]))) Rmat.perm <- cbind(Rmat.perm, vec) } vec <- as.vector(as.dist(D1[[nmat]])) vec <- c(vec, as.vector(as.dist(D2[[nmat]]))) Rmat.perm <- cbind(Rmat.perm, vec) } else { for(k in 1:(nmat-1)) { order <- sample(n) vec <- as.vector(as.dist(D1[[k]][order,order])) Rmat.perm <- cbind(Rmat.perm, vec) } vec <- as.vector(as.dist(D1[[nmat]])) Rmat.perm <- cbind(Rmat.perm, vec) } # Remove the first column of Rmat.perm containing NA Rmat.perm <- as.matrix(Rmat.perm[,-1]) # Compute Mantel correlations on ranks under permutation Mantel.cor.perm <- cor(Rmat.perm) for(j2 in 1:(nmat-1)) { # Compute prob in the upper tail for(j1 in (j2+1):nmat) { if(Mantel.cor.perm[j1,j2] >= Mantel.cor[j1,j2]) Mantel.prob[j1,j2] <- Mantel.prob[j1,j2]+1 } } } Mantel.prob <- as.matrix(as.dist(Mantel.prob/(nperm+1))) diag(Mantel.prob) <- NA # Corrected 08feb13 } }) a[3] <- sprintf("%2f",a[3]) if(!silent) cat("Time to compute a posteriori tests (per matrix) =",a[3]," sec",'\n') out <- list(A_posteriori_tests=table, Correction.type=mult) if(mantel) { out$Mantel.cor <- Mantel.cor out$Mantel.prob <- Mantel.prob } out$nperm <- nperm class(out) <- "CADM.post" out } ape/R/diversi.time.R0000644000176200001440000000643312465112403013751 0ustar liggesusers## diversi.time.R (2007-09-22) ## Analysis of Diversification with Survival Models ## Copyright 2002-2007 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. diversi.time <- function(x, census = NULL, censoring.codes = c(1, 0), Tc = NULL) { n <- length(x) if (is.null(census)) { k <- n census <- rep(censoring.codes[1], n) } else k <- sum(census == censoring.codes[1]) u <- n - k S <- sum(x) delta <- k / S var.delta <- delta^2 / k loglik.A <- k * log(delta) - delta * S tk <- x[census == censoring.codes[1]] tu <- x[census == censoring.codes[2]] fb <- function(b) 1/b - sum(x^b * log(x))/sum(x^b) + sum(log(tk))/k beta <- uniroot(fb, interval = c(1e-7, 10))$root Sp <- sum(x^beta) alpha <- (k / Sp)^(1/beta) var.alpha <- 1/ ((k * beta / alpha^2) + beta * (beta - 1) * alpha^(beta - 2) * Sp) ax <- alpha * x var.beta <- 1 / (k / beta^2 + sum(ax^beta * log(ax))) loglik.B <- k*(log(alpha) + log(beta)) + (beta - 1)*(k*log(alpha) + sum(log(tk)))- Sp * alpha^beta if (is.null(Tc)) Tc <- median(x) tk1 <- tk[tk < Tc] tk2 <- tk[tk >= Tc] tu1 <- tu[tu < Tc] tu2 <- tu[tu >= Tc] k1 <- length(tk1) k2 <- k - k1 u1 <- length(tu1) u2 <- u - u1 tmp <- (k2 + u2) * Tc delta1 <- k1 / (sum(tk1) + sum(tu1) + tmp) delta2 <- k2 / (sum(tk2) + sum(tu2) - tmp) var.delta1 <- delta1^2 / k1 var.delta2 <- delta2^2 / k2 tmp <- Tc * (delta2 - delta1) loglik.C <- k1 * log(delta1) - delta1 * sum(tk1) + k2 * log(delta2) + k2 * tmp - delta2 * sum(tk2) - delta1 * sum(tu1) + u2 * tmp - delta2 * sum(tu2) cat("\nAnalysis of Diversification with Survival Models\n\n") cat("Data:", deparse(substitute(x)), "\n") cat("Number of branching times:", n, "\n") cat(" accurately known:", k, "\n") cat(" censored:", u, "\n\n") cat("Model A: constant diversification\n") cat(" log-likelihood =", round(loglik.A, 3), " AIC =", round(-2 * loglik.A + 2, 3), "\n") cat(" delta =", round(delta, 6), " StdErr =", round(sqrt(var.delta), 6), "\n\n") cat("Model B: diversification follows a Weibull law\n") cat(" log-likelihood =", round(loglik.B, 3), " AIC =", round(-2 * loglik.B + 4, 3), "\n") cat(" alpha =", round(alpha, 6), " StdErr =", round(sqrt(var.alpha), 6), "\n") cat(" beta =", round(beta, 6), " StdErr =", round(sqrt(var.beta), 6), "\n\n") cat("Model C: diversification changes with a breakpoint at time =", Tc, "\n") cat(" log-likelihood =", round(loglik.C, 3), " AIC =", round(-2 * loglik.C + 4, 3), "\n") cat(" delta1 =", round(delta1, 6), " StdErr =", round(sqrt(var.delta1), 6), "\n") cat(" delta2 =", round(delta2, 6), " StdErr =", round(sqrt(var.delta2), 6), "\n\n") cat("Likelihood ratio tests:\n") c1 <- 2 * (loglik.B - loglik.A) p1 <- round(1 - pchisq(c1, 1), 4) c2 <- 2 * (loglik.C - loglik.A) p2 <- round(1 - pchisq(c2, 1), 4) cat(" Model A vs. Model B: chi^2 =", round(c1, 3), " df = 1, P =", p1, "\n") cat(" Model A vs. Model C: chi^2 =", round(c2, 3), " df = 1, P =", p2, "\n") } ape/R/plot.popsize.R0000644000176200001440000000247613424004335014020 0ustar liggesusers## plot.popsize.R (2004-07-4) modified by EP (2019-01-29) ## Plot population size in dependence of time ## Copyright 2004 Rainer Opgen-Rhein and Korbinian Strimmer ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. plot.popsize <- function(x, show.median = TRUE, show.years = FALSE, subst.rate, present.year, xlab = NULL, ylab = "Effective population size", log = "y", ...) { ylim <- range(x[, 2:5], na.rm = TRUE) x1 <- x[, 1] if (show.years) { x1 <- -x1/subst.rate + present.year if (is.null(xlab)) xlab <- "Time (years)" } else { if (is.null(xlab)) xlab <- "Time (past to present in units of substitutions)" } xlim <- range(x1, na.rm = TRUE) j <- if (show.median) 3 else 2 plot(x1, x[, j], type = "s", xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, log = log, lwd = 2.5, ...) lines(x1, x[, 4], ...) lines(x1, x[, 5], ...) } lines.popsize <- function(x, show.median = TRUE, show.years = FALSE, subst.rate, present.year, ...) { x1 <- x[, 1] if (show.years) x1 <- -x1/subst.rate + present.year j <- if (show.median) 3 else 2 lines(x1, x[, j], lwd = 2.5, ...) lines(x1, x[, 4], ...) lines(x1, x[, 5], ...) } ape/R/yule.time.R0000644000176200001440000000421712465112403013260 0ustar liggesusers## yule.time.R (2009-02-20) ## Fits the Time-Dependent Yule Model ## Copyright 2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. yule.time <- function(phy, birth, BIRTH = NULL, root.time = 0, opti = "nlm", start = 0.01) { opti <- pmatch(opti, c("nlm", "nlminb", "optim")) if (is.na(opti)) stop("ambiguous argument 'opti'") LAMBDA <- function() x body(LAMBDA) <- body(birth) formals(LAMBDA) <- alist(t=) BT <- branching.times(phy) T <- BT[1] x <- BT[1] - BT + root.time m <- phy$Nnode paranam <- c(names(formals(birth))) np <- length(paranam) start <- rep(start, length.out = np) ## Foo is always vectorized if (is.null(BIRTH)) { Foo <- function(x) { n <- length(x) res <- numeric(n) for (i in 1:n) res[i] <- integrate(LAMBDA, x[i], T)$value res } } else { environment(BIRTH) <- environment() Foo <- function(x) BIRTH(T) - BIRTH(x) } half.dev <- function(p) { for (i in 1:np) assign(paranam[i], p[i], pos = sys.frame(1)) root.term <- if (is.null(BIRTH)) integrate(LAMBDA, x[1], T)$value else BIRTH(T) - BIRTH(x[1]) sum(Foo(x)) + root.term - sum(log(LAMBDA(x[2:m]))) } switch(opti, { out <- nlm(half.dev, start, hessian = TRUE) est <- out$estimate se <- sqrt(diag(solve(out$hessian))) loglik <- lfactorial(m) - out$minimum },{ out <- nlminb(start, half.dev) est <- out$par se <- NULL loglik <- lfactorial(m) - out$objective },{ out <- optim(start, half.dev, hessian = TRUE, control = list(maxit = 1000), method = "BFGS") est <- out$par se <- sqrt(diag(solve(out$hessian))) loglik <- lfactorial(m) - out$value }) names(est) <- paranam if (!is.null(se)) names(se) <- paranam structure(list(estimate = est, se = se, loglik = loglik), class = "yule") } ape/R/cophyloplot.R0000644000176200001440000001650712465112403013726 0ustar liggesusers## cophyloplot.R (2014-04-07) ## Plots two phylogenetic trees face to ## face with the links between the tips ## Copyright 2008-2010 Damien de Vienne ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. cophyloplot <- function(x, y, assoc = NULL, use.edge.length = FALSE, space = 0, length.line = 1, gap = 2, type = "phylogram", rotate = FALSE, col = par("fg"), lwd = par("lwd"), lty = par("lty"), show.tip.label = TRUE, font = 3, ...) { if (is.null(assoc)) { assoc <- matrix(ncol = 2) print("No association matrix specified. Links will be omitted.") } if (rotate == TRUE) { cat("\n Click on a node to rotate (right click to exit)\n\n") repeat { res <- plotCophylo2(x, y, assoc = assoc, use.edge.length = use.edge.length, space = space, length.line = length.line, gap = gap, type = type, return = TRUE, col = col, lwd=lwd, lty=lty, show.tip.label = show.tip.label, font = font) click <- identify(res$c[, 1], res$c[, 2], n = 1) if (click < length(res$a[, 1]) + 1) { if (click > res$N.tip.x) x <- rotate(x, click) } else if (click < length(res$c[, 1]) + 1) { if (click > length(res$a[, 1]) + res$N.tip.y) y <- rotate(y, click - length(res$a[, 1])) } } on.exit(cat("done\n")) } else plotCophylo2(x, y, assoc = assoc, use.edge.length = use.edge.length, space = space, length.line = length.line, gap = gap, type = type, return = FALSE, col = col, lwd=lwd, lty=lty, show.tip.label = show.tip.label, font = font) } plotCophylo2 <- function(x, y, assoc = assoc, use.edge.length = use.edge.length, space = space, length.line = length.line, gap = gap, type = type, return = return, col = col, lwd=lwd, lty=lty, show.tip.label = show.tip.label, font = font, ...) { res <- list() ###choice of the minimum space between the trees left <- max(nchar(x$tip.label, type = "width")) + length.line right <- max(nchar(y$tip.label, type = "width")) + length.line space.min <- left + right + gap * 2 if ((space <= 0) || (space < space.min)) space <- space.min N.tip.x <- Ntip(x) N.tip.y <- Ntip(y) res$N.tip.x <- N.tip.x res$N.tip.y <- N.tip.y a <- plotPhyloCoor(x, use.edge.length = use.edge.length, type = type) res$a <- a b <- plotPhyloCoor(y, use.edge.length = use.edge.length, direction = "leftwards", type = type) ###for the two trees to have the extreme leaves at the same ordinate. a[, 2] <- a[, 2] - min(a[, 2]) b[, 2] <- b[, 2] - min(b[, 2]) res$b <- b b2 <- b b2[, 1] <- b[1:nrow(b), 1] * (max(a[, 1])/max(b[, 1])) + space + max(a[, 1]) b2[, 2] <- b[1:nrow(b), 2] * (max(a[, 2])/max(b[, 2])) res$b2 <- b2 c <- matrix(ncol = 2, nrow = nrow(a) + nrow(b)) c[1:nrow(a), ] <- a[1:nrow(a), ] c[nrow(a) + 1:nrow(b), 1] <- b2[, 1] c[nrow(a) + 1:nrow(b), 2] <- b2[, 2] res$c <- c plot(c, type = "n", xlim = NULL, ylim = NULL, log = "", main = NULL, sub = NULL, xlab = NULL, ylab = NULL, ann = FALSE, axes = FALSE, frame.plot = FALSE) ###segments for cladograms if (type == "cladogram") { for (i in 1:(nrow(a) - 1)) segments(a[x$edge[i, 1], 1], a[x$edge[i, 1], 2], a[x$edge[i, 2], 1], a[x$edge[i, 2], 2], col="red") for (i in 1:(nrow(b) - 1)) segments(b2[y$edge[i, 1], 1], b2[y$edge[i, 1], 2], b2[y$edge[i, 2], 1], b2[y$edge[i, 2], 2]) } ###segments for phylograms if (type == "phylogram") { for (i in (N.tip.x + 1):nrow(a)) { l <- length(x$edge[x$edge[, 1] == i, ][, 1]) for (j in 1:l) { segments(a[x$edge[x$edge[, 1] == i, ][1, 1], 1], a[x$edge[x$edge[, 1] == i, 2], 2][1], a[x$edge[x$edge[, 1] == i, ][1, 1], 1], a[x$edge[x$edge[, 1] == i, 2], 2][j]) segments(a[x$edge[x$edge[, 1] == i, ][1, 1], 1], a[x$edge[x$edge[, 1] == i, 2], 2][j], a[x$edge[x$edge[, 1] == i, 2], 1][j], a[x$edge[x$edge[, 1] == i, 2], 2][j]) } } for (i in (N.tip.y + 1):nrow(b)) { l <- length(y$edge[y$edge[, 1] == i, ][, 1]) for (j in 1:l) { segments(b2[y$edge[y$edge[, 1] == i, ][1, 1], 1], b2[y$edge[y$edge[, 1] == i, 2], 2][1], b2[y$edge[y$edge[, 1] == i, ][1, 1], 1], b2[y$edge[y$edge[, 1] == i, 2], 2][j]) segments(b2[y$edge[y$edge[, 1] == i, ][1, 1], 1], b2[y$edge[y$edge[, 1] == i, 2], 2][j], b2[y$edge[y$edge[, 1] == i, 2], 1][j], b2[y$edge[y$edge[, 1] == i, 2], 2][j]) } } } if (show.tip.label) { text(a[1:N.tip.x, ], cex = 0, font = font, pos = 4, labels = x$tip.label) text(b2[1:N.tip.y, ], cex = 1, font = font, pos = 2, labels = y$tip.label) } ###links between associated taxa. Takes into account the size of the character strings of the taxa names. lsa <- 1:N.tip.x lsb <- 1:N.tip.y decx <- array(nrow(assoc)) decy <- array(nrow(assoc)) #colors if (length(col)==1) colors<-c(rep(col, nrow(assoc))) else if (length(col)>=nrow(assoc)) colors<-col else colors<-c(rep(col, as.integer(nrow(assoc)/length(col))+1)) #lwd if (length(lwd)==1) lwidths<-c(rep(lwd, nrow(assoc))) else if (length(lwd)>=nrow(assoc)) lwidths<-lwd else lwidths<-c(rep(lwd, as.integer(nrow(assoc)/length(lwd))+1)) #lty if (length(lty) == 1) ltype <- c(rep(lty, nrow(assoc))) else if (length(lty) >= nrow(assoc)) ltype <- lty else ltype <- c(rep(lty, as.integer(nrow(assoc)/length(lty))+1)) for (i in 1:nrow(assoc)) { if (show.tip.label) { decx[i] <- strwidth(x$tip.label[lsa[x$tip.label == assoc[i, 1]]]) decy[i] <- strwidth(y$tip.label[lsb[y$tip.label == assoc[i, 2]]]) } else { decx[i] <- decy[i] <- 0 } if (length.line) { # added by EP (2014-04-07) segments(a[lsa[x$tip.label == assoc[i, 1]], 1] + decx[i] + gap, a[lsa[x$tip.label == assoc[i, 1]], 2], a[lsa[x$tip.label == assoc[i, 1]], 1] + gap + left, a[lsa[x$tip.label == assoc[i, 1]], 2], col = colors[i], lwd = lwidths[i], lty = ltype[i]) segments(b2[lsb[y$tip.label == assoc[i, 2]], 1] - (decy[i] + gap), b2[lsb[y$tip.label == assoc[i, 2]], 2], b2[lsb[y$tip.label == assoc[i, 2]], 1] - (gap + right), b2[lsb[y$tip.label == assoc[i, 2]], 2], col = colors[i], lwd = lwidths[i], lty = ltype[i]) } segments(a[lsa[x$tip.label == assoc[i, 1]], 1] + gap + left, a[lsa[x$tip.label == assoc[i, 1]], 2], b2[lsb[y$tip.label == assoc[i, 2]], 1] - (gap + right), b2[lsb[y$tip.label == assoc[i, 2]], 2], col = colors[i], lwd = lwidths[i], lty = ltype[i]) } if (return == TRUE) return(res) } ape/R/biplot.pcoa.R0000644000176200001440000000506314033475416013570 0ustar liggesusers'biplot.pcoa' <- function(x, Y=NULL, plot.axes=c(1,2), dir.axis1=1, dir.axis2=1,rn=NULL,main=NULL, ...) # x = output object from function pcoa.R # Y = optional sites-by-variables data table # plot.axes = the two axes to be plotted # dir.axis.1 = -1 to revert axis 1 for the projection of points and variables # dir.axis.2 = -1 to revert axis 2 for the projection of points and variables # rn = an optional vector, length n, of object name labels # Customize the title of the biplot with argument 'main'. Ex.: main="My own PCoA title". # # Corrected version, March 2017 - This version draws biplots from the principal coordinates (x$vectors.cor) with Lingoes or Cailliez correction, when applicable. # # Author: Pierre Legendre, January 2009, March 2017 { if (!inherits(x, "pcoa")) stop("Object of class 'pcoa' expected") pr.coo <- x$vectors if(x$correction[2] > 1) pr.coo <- x$vectors.cor k <- ncol(pr.coo) if(k < 2) stop("There is a single eigenvalue. No plot can be produced.") if(k < plot.axes[1]) stop("Axis",plot.axes[1],"does not exist.") if(k < plot.axes[2]) stop("Axis",plot.axes[2],"does not exist.") if(!is.null(rn)) rownames(pr.coo) <- rn labels = colnames(pr.coo[,plot.axes]) diag.dir <- diag(c(dir.axis1,dir.axis2)) pr.coo[,plot.axes] <- pr.coo[,plot.axes] %*% diag.dir if(is.null(Y)) { limits <- apply(pr.coo[,plot.axes], 2, range) ran.x <- limits[2,1] - limits[1,1] ran.y <- limits[2,2] - limits[1,2] xlim <- c((limits[1,1]-ran.x/10), (limits[2,1]+ran.x/5)) ylim <- c((limits[1,2]-ran.y/10), (limits[2,2]+ran.y/10)) par(mai = c(1.0, 1.0, 1.0, 0.5)) plot(pr.coo[,plot.axes],xlab=labels[1],ylab=labels[2],xlim=xlim,ylim=ylim,asp=1) text(pr.coo[,plot.axes], labels=rownames(pr.coo), pos=4, cex=1, offset=0.5) if(is.null(main)) { title(main = "PCoA ordination", line=2) } else { title(main = main, family="serif", line=2) } } else { # Find positions of variables in biplot: # construct U from covariance matrix between Y and standardized point vectors # (equivalent to PCA scaling 1, since PCoA preserves distances among objects) n <- nrow(Y) points.stand <- scale(pr.coo[,plot.axes]) S <- cov(Y, points.stand) U <- S %*% diag((x$values$Eigenvalues[plot.axes]/(n-1))^(-0.5)) colnames(U) <- colnames(pr.coo[,plot.axes]) par(mai = c(1, 0.5, 1.4, 0)) biplot(pr.coo[,plot.axes], U, xlab=labels[1], ylab=labels[2]) if(is.null(main)) { title(main = c("PCoA biplot","Response variables projected","as in PCA with scaling 1"), line=4) } else { title(main = main, family="serif") } } invisible() } ape/R/chronoMPL.R0000644000176200001440000000313613112106215013177 0ustar liggesusers## chronoMPL.R (2017-04-25) ## Molecular Dating with Mean Path Lengths ## Copyright 2007-2017 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. chronoMPL <- function(phy, se = TRUE, test = TRUE) { if (!is.binary.phylo(phy)) stop("the tree is not dichotomous.") n <- length(phy$tip.label) m <- phy$Nnode N <- dim(phy$edge)[1] obj <- reorder(phy, "postorder") ndesc <- .C(node_depth, as.integer(n), as.integer(obj$edge[, 1]), as.integer(obj$edge[, 2]), as.integer(N), double(n + m), 1L)[[5]] s <- numeric(n + m) # sum of path lengths if (se) ss <- s if (test) Pval <- numeric(m) for (i in seq(1, N - 1, 2)) { j <- i + 1 a <- obj$edge[i, 2] b <- obj$edge[j, 2] o <- obj$edge[i, 1] A <- s[a] + ndesc[a]*obj$edge.length[i] B <- s[b] + ndesc[b]*obj$edge.length[j] s[o] <- A + B if (se) ss[o] <- ss[a] + ndesc[a]^2 * obj$edge.length[i] + ss[b] + ndesc[b]^2 * obj$edge.length[j] if (test) { z <- abs(A/ndesc[a] - B/ndesc[b]) tmp <- (ss[a] + ndesc[a]^2 * obj$edge.length[i])/ndesc[a]^2 tmp <- tmp + (ss[b] + ndesc[b]^2 * obj$edge.length[j])/ndesc[b]^2 z <- z/sqrt(tmp) Pval[o - n] <- 2*pnorm(z, lower.tail = FALSE) } } node.age <- s/ndesc phy$edge.length <- node.age[phy$edge[, 1]] - node.age[phy$edge[, 2]] if (se) attr(phy, "stderr") <- sqrt(ss[-(1:n)]/ndesc[-(1:n)]^2) if (test) attr(phy, "Pval") <- Pval phy } ape/R/write.nexus.data.R0000644000176200001440000001311713313371402014546 0ustar liggesusers## write.nexus.data.R (2018-06-23) ## Write Character Data in NEXUS Format ## Copyright 2006-2015 Johan Nylander, Emmanuel Paradis, 2018 Thomas Guillerme ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. write.nexus.data <- function(x, file, format = "dna", datablock = TRUE, interleaved = TRUE, charsperline = NULL, gap = NULL, missing = NULL) { ### TODO: Standard data, mixed data, nice indent format <- match.arg(toupper(format), c("DNA", "PROTEIN", "STANDARD", "CONTINUOUS")) if (inherits(x, "DNAbin") && format != "DNA") { format <- "DNA" warning("object 'x' is of class DNAbin: format forced to DNA") } if (inherits(x, "AAbin") && format != "PROTEIN") { format <- "PROTEIN" warning("object 'x' is of class AAbin: format forced to PROTEIN") } indent <- " " # Two blanks maxtax <- 5 # Max nr of taxon names to be printed on a line defcharsperline <- 80 # Default nr of characters per line if interleaved defgap <- "-" # Default gap character defmissing <- "?" # Default missing data character if (is.matrix(x)) { if (inherits(x, "DNAbin")) x <- as.list(x) else { xbak <- x x <- vector("list", nrow(xbak)) for (i in seq_along(x)) x[[i]] <- xbak[i, ] names(x) <- rownames(xbak) rm(xbak) } } ntax <- length(x) nchars <- length(x[[1]]) zz <- file(file, "w") if (is.null(names(x))) names(x) <- as.character(1:ntax) fcat <- function(..., file = zz) cat(..., file = file, sep = "", append = TRUE) find.max.length <- function(x) max(nchar(x)) print.matrix <- function(x, dindent = " ", collapse = "") { Names <- names(x) printlength <- find.max.length(Names) + 2 if (!interleaved) { for (i in seq_along(x)) { sequence <- paste(x[[i]], collapse = collapse) taxon <- Names[i] thestring <- sprintf("%-*s%s%s", printlength, taxon, dindent, sequence) fcat(indent, indent, thestring, "\n") } } else { ntimes <- ceiling(nchars/charsperline) start <- 1 end <- charsperline for (j in seq_len(ntimes)) { for (i in seq_along(x)) { sequence <- paste(x[[i]][start:end], collapse = collapse) taxon <- Names[i] thestring <- sprintf("%-*s%s%s", printlength, taxon, dindent, sequence) fcat(indent, indent, thestring, "\n") } if (j < ntimes) fcat("\n") start <- start + charsperline end <- end + charsperline if (end > nchars) end <- nchars } } } if (inherits(x, "DNAbin") || inherits(x, "AAbin")) x <- as.character(x) fcat("#NEXUS\n[Data written by write.nexus.data.R, ", date(), "]\n") NCHAR <- paste("NCHAR=", nchars, sep = "") NTAX <- paste0("NTAX=", ntax) DATATYPE <- paste0("DATATYPE=", format) # fix by Robin Cristofari (2015-02-04) if (is.null(charsperline)) { if (nchars <= defcharsperline) { charsperline <- nchars interleaved <- FALSE } else charsperline <- defcharsperline } if (is.null(missing)) missing <- defmissing MISSING <- paste0("MISSING=", missing) if (is.null(gap)) gap <- defgap GAP <- paste0("GAP=", gap) INTERLEAVE <- if (interleaved) "INTERLEAVE=YES" else "INTERLEAVE=NO" if (datablock) { fcat("BEGIN DATA;\n") fcat(indent, "DIMENSIONS ", NTAX, " ", NCHAR, ";\n") ## only DNA and PROTEIN is supported for the moment, so the ## following 'if' is not needed ## if (format %in% c("DNA", "PROTEIN")) # from Francois Michonneau (2009-10-02) if(format != "STANDARD") { fcat(indent, "FORMAT", " ", DATATYPE, " ", MISSING, " ", GAP, " ", INTERLEAVE, ";\n") } else { fcat(indent, "FORMAT", " ", DATATYPE, " ", MISSING, " ", GAP, " ", INTERLEAVE, " symbols=\"0123456789\";\n") } ## fcat(indent, "MATRIX\n") if(format != "CONTINUOUS") { print.matrix(x) } else { print.matrix(x, collapse = "\t") } fcat(indent, ";\nEND;\n\n") } else { fcat("BEGIN TAXA;\n") fcat(indent, "DIMENSIONS", " ", NTAX, ";\n") fcat(indent, "TAXLABELS\n") fcat(indent, indent) j <- 0 for (i in seq_len(ntax)) { fcat(names(x[i]), " ") j <- j + 1 if (j == maxtax) { fcat("\n", indent, indent) j <- 0 } } fcat("\n", indent, ";\n") fcat("END;\n\nBEGIN CHARACTERS;\n") fcat(indent, "DIMENSIONS", " ", NCHAR, ";\n") ## only DNA and PROTEIN is supported for the moment, so the ## following 'if' is not needed ## if (format %in% c("DNA", "PROTEIN")) if(format != "STANDARD") { fcat(indent, "FORMAT", " ", MISSING, " ", GAP, " ", DATATYPE, " ", INTERLEAVE, ";\n") } else { fcat(indent, "FORMAT", " ", MISSING, " ", GAP, " ", DATATYPE, " ", INTERLEAVE, " symbols=\"0123456789\";\n") } ## fcat(indent,"MATRIX\n") if(format != "CONTINUOUS") { print.matrix(x) } else { print.matrix(x, collapse = "\t") } fcat(indent, ";\nEND;\n\n") } close(zz) } ape/R/makeNodeLabel.R0000644000176200001440000000604114533567472014047 0ustar liggesusers## makeNodeLabel.R (2023-12-05) ## Makes Node Labels ## Copyright 2009-2023 Emmanuel Paradis, 2023 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # relabel derived from dist.topo.R inside .compressTipLabel, # with an extra argument. # makeNodeLabel is faster: # 1. use relabel to avoid calling sort inside the loop # 2. use digest package to avoid writing files files as with md5sum # the node labels are the same makeNodeLabel <- function(phy, ...) UseMethod("makeNodeLabel") makeNodeLabel.phylo <- function(phy, method = "number", prefix = "Node", nodeList = list(), ...) { method <- sapply(method, match.arg, c("number", "md5sum", "user"), USE.NAMES = FALSE) if ("number" %in% method) phy$node.label <- paste(prefix, 1:phy$Nnode, sep = "") if ("md5sum" %in% method) { phy <- relabel(phy, sort(phy$tip.label)) nl <- character(phy$Nnode) pp <- prop.part(phy, check.labels = FALSE) labs <- attr(pp, "labels") for (i in seq_len(phy$Nnode)) { tmp <- paste0(labs[pp[[i]]], sep = "\n", collapse = "") nl[i] <- digest(tmp, algo = "md5", serialize = FALSE) } phy$node.label <- nl } if ("user" %in% method) { if (is.null(phy$node.label)) phy$node.label <- character(phy$Nnode) nl <- names(nodeList) if (is.null(nl)) stop("argument 'nodeList' has no names") Ntip <- length(phy$tip.label) seq.nod <- .Call(seq_root2tip, phy$edge, Ntip, phy$Nnode) ## a local version to avoid the above call many times: .getMRCA <- function(seq.nod, tip) { sn <- seq.nod[tip] MRCA <- Ntip + 1 i <- 2 repeat { x <- unique(unlist(lapply(sn, "[", i))) if (length(x) != 1) break MRCA <- x i <- i + 1 } MRCA } for (i in seq_along(nodeList)) { tips <- sapply(nodeList[[i]], grep, phy$tip.label, ..., USE.NAMES = FALSE) j <- .getMRCA(seq.nod, unique(unlist(tips))) phy$node.label[j - Ntip] <- nl[i] } } phy } relabel <- function(y, ref, tip.label = TRUE) { label <- y$tip.label if (!identical(label, ref)) { if (length(label) != length(ref)) stop("one tree has a different number of tips") ilab <- match(label, ref) if (any(is.na(ilab))) stop("one tree has different tip labels") ie <- match(seq_len(Ntip(y)), y$edge[, 2]) y$edge[ie, 2] <- ilab } y$tip.label <- if (tip.label) ref else NULL y } makeNodeLabel.multiPhylo <- function(phy, method = "number", prefix = "Node", nodeList = list(), ...) { oc <- oldClass(phy) class(phy) <- NULL phy <- lapply(phy, makeNodeLabel.phylo, method = method, prefix = prefix, nodeList = nodeList, ...) class(phy) <- oc phy } ape/R/extract.popsize.R0000644000176200001440000000456614533611545014527 0ustar liggesusers## extract.popsize.R (2004-07-4) ## Extract table with population size in dependence of time ## from mcmc output generated by mcmc.popsize ## Copyright 2004 Rainer Opgen-Rhein and Korbinian Strimmer ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. extract.popsize<-function(mcmc.out, credible.interval=0.95, time.points=200, thinning=1, burn.in=0) { # construct a matrix with the positions of the jumps b<-burn.in+1 i<-1 k<-array(dim=ceiling((length(mcmc.out$pos)-burn.in)/thinning)) while(i<=length(k)) { k[i]<-length(mcmc.out$pos[[b]]); (i<-i+1); b<-b+thinning } o<-max(k) b<-burn.in+1 i<-1 pos.m<-matrix(nrow=length(k), ncol=o) while(i<=length(k)) { pos.m[i,]<-c(mcmc.out$pos[[b]], array(dim=o-length(mcmc.out$pos[[b]]))); i<-i+1; b<-b+thinning } # construct a matrix with the heights of the jumps b<-burn.in+1 i<-1 h.m<-matrix(nrow=length(k), ncol=o) while(i<=length(k)) { h.m[i,]<-c(mcmc.out$h[[b]], array(dim=o-length(mcmc.out$h[[b]]))); i<-i+1; b<-b+thinning } prep<-list("pos"=pos.m, "h"=h.m) step <- (max(prep$pos, na.rm=TRUE)-min(prep$pos, na.rm=TRUE))/(time.points-1) nr <- time.points p<-min(prep$pos, na.rm=TRUE) i<-1 me<-matrix(nrow=nr, ncol=5) prep.l<-prep prep.l$pos<-cbind(prep$pos,prep$pos[,length(prep$pos[1,])]) prep.l$h<-cbind(prep$h,prep$h[,length(prep$h[1,])]) while (p<=max(prep$pos, na.rm=TRUE)) { #Vector with position of heights l.prep<-prep$pos<=p l.prep[is.na(l.prep)]<-FALSE pos.of.h<-l.prep%*% array(data=1, dim=dim(prep$pos)[2]) #Vector with heights z<-array(data=(1:dim(prep$pos)[1]), dim=dim(prep$pos)[1]) index.left<-cbind(z,pos.of.h) index.right<-cbind(z, pos.of.h+1) mixed.heights<-((((p-prep$pos[index.left])/(prep$pos[index.right]-prep$pos[index.left]))* (prep$h[index.right]-prep$h[index.left]))+prep$h[index.left]) me[i,2]<-mean(mixed.heights) me[i,3]<-median(mixed.heights) me[i,4]<-quantile(mixed.heights, probs=(1-credible.interval)/2, na.rm=TRUE) me[i,5]<-quantile(mixed.heights, probs=(1+credible.interval)/2, na.rm=TRUE) me[i,1]<-p p<-p+step i<-i+1 } colnames(me) <- c("time", "mean", "median", "lower CI", "upper CI") class(me) <- "popsize" return(me) } ape/R/rtt.R0000644000176200001440000000521614404767766012205 0ustar liggesusers## rtt.R (2023-03-17) ## Root a tree by root-to-tip regression ## Copyright (c) 2014-2015, Rosemary McCloskey, BC Centre for Excellence in HIV/AIDS ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # helper functions extracted from summary.lm r.squared <- function(x){ r <- x$residuals f <- x$fitted.values mss <- sum((f - mean(f))^2) rss <- sum(r^2) mss/(mss + rss) } sigmasq <- function(x){ r <- x$residuals rss <- sum(r^2) / x$df.residual } rtt <- function (t, tip.dates, ncpu = 1, objective = "correlation", opt.tol = .Machine$double.eps^0.25) { if (objective == "correlation") objective <- function(x, y){ # cor.test(y, x)$estimate cor(y, x, use = "complete.obs") # fix by Brad Jones (2023-03-17) } else if (objective == "rsquared") objective <- function(x, y) { # summary(lm(y ~ x))$r.squared X[,2] <- x r.squared(lm.fit(X, y)) } else if (objective == "rms"){ objective <- function(x, y){ # -summary(lm(y ~ x))$sigma^2 X[,2] <- x - sigmasq( lm.fit(X, y) ) } } else stop("objective must be one of \"correlation\", \"rsquared\", or \"rms\"") ut <- unroot(t) dist <- dist.nodes(ut)[, seq_len(Ntip(ut))] # allow multifurcations X <- matrix(1, Ntip(ut), 2) f <- function (x, parent, child) { edge.dist <- x * dist[parent, ] + (1 - x) * dist[child,] objective(tip.dates, edge.dist) } obj.edge <- if (ncpu > 1) unlist(mclapply(1:nrow(ut$edge), function (e) { opt.fun <- function (x) f(x, ut$edge[e,1], ut$edge[e,2]) optimize(opt.fun, c(0, 1), maximum = TRUE, tol = opt.tol)$objective }, mc.cores=ncpu)) else apply(ut$edge, 1, function (e) { opt.fun <- function (x) f(x, e[1], e[2]) optimize(opt.fun, c(0, 1), maximum = TRUE, tol = opt.tol)$objective }) best.edge <- which.max(obj.edge) best.edge.parent <- ut$edge[best.edge, 1] best.edge.child <- ut$edge[best.edge, 2] best.edge.length <- ut$edge.length[best.edge] opt.fun <- function (x) f(x, best.edge.parent, best.edge.child) best.pos <- optimize(opt.fun, c(0, 1), maximum = TRUE, tol = opt.tol)$maximum new.root <- list(edge = matrix(c(2L, 1L), 1, 2), tip.label = "new.root", edge.length = 1, Nnode = 1L, root.edge = 1) class(new.root) <- "phylo" ut <- bind.tree(ut, new.root, where = best.edge.child, position = best.pos * best.edge.length) ut <- collapse.singles(ut) ut <- root(ut, "new.root") drop.tip(ut, "new.root") } ape/R/plotPhyloCoor.R0000644000176200001440000000605513112106514014160 0ustar liggesusers## plotPhyloCoor.R (2017-05-26) ## Coordinates of a Tree Plot ## Copyright 2008 Damien de Vienne, 2013-2017 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. plotPhyloCoor <- function (x, type = "phylogram", use.edge.length = TRUE, node.pos = NULL, direction = "rightwards", tip.height = NULL, ...) { Ntip <- length(x$tip.label) if (Ntip == 1) stop("found only one tip in the tree!") Nedge <- dim(x$edge)[1] if (any(tabulate(x$edge[, 1]) == 1)) stop("there are single (non-splitting) nodes in your tree; you may need to use collapse.singles().") Nnode <- x$Nnode if (is.null(x$edge.length)) use.edge.length <- FALSE phyloORclado <- type %in% c("phylogram", "cladogram") horizontal <- direction %in% c("rightwards", "leftwards") if (phyloORclado) { ## changed by KS: yy <- numeric(Ntip + Nnode) x <- reorder(x) TIPS <- x$edge[x$edge[, 2] <= Ntip, 2] if (!is.null(tip.height)) { if(!is.null(names(tip.height))) tip.height = tip.height[x$tip.label] yy[TIPS] <- tip.height } else yy[TIPS] <- 1:Ntip } xe <- x$edge ## first reorder the tree in cladewise order to avoid cophyloplot() hanging: ## x <- reorder(reorder(x), order = "pruningwise") ... maybe not needed anymore (EP) x <- reorder(x, order = "postorder") ereorder <- match(x$edge[, 2], xe[, 2]) if (phyloORclado) { if (is.null(node.pos)) { node.pos <- 1 if (type == "cladogram" && !use.edge.length) node.pos <- 2 } if (node.pos == 1) yy <- .C(node_height, as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), as.integer(Nedge), as.double(yy))[[4]] else { ans <- .C(node_height_clado, as.integer(Ntip), as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), as.double(yy)) xx <- ans[[5]] - 1 yy <- ans[[6]] } if (!use.edge.length) { if (node.pos != 2) xx <- .C(node_depth, as.integer(Ntip), as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), 1L)[[5]] - 1 xx <- max(xx) - xx } else { xx <- .C(node_depth_edgelength, as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), as.integer(Nedge), as.double(x$edge.length), double(Ntip + Nnode))[[5]] } } if (phyloORclado && direction != "rightwards") { if (direction == "leftwards") { xx <- -xx xx <- xx - min(xx) } if (!horizontal) { tmp <- yy yy <- xx xx <- tmp - min(tmp) + 1 if (direction == "downwards") { yy <- -yy yy <- yy - min(yy) } } } cbind(xx, yy) } ape/R/summary.phylo.R0000644000176200001440000002243314647417712014213 0ustar liggesusers## summary.phylo.R (2024-07-22) ## Print Summary of a Phylogeny, "multiPhylo" operators, node degrees ## Copyright 2003-2024 Emmanuel Paradis, 2006 Ben Bolker, and Klaus Schliep 2016-2024 ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. Ntip <- function(phy) UseMethod("Ntip") Ntip.phylo <- function(phy) length(phy$tip.label) Ntip.multiPhylo <- function(phy) { labs <- attr(phy, "TipLabel") if (is.null(labs)) sapply(unclass(phy), Ntip.phylo) else setNames(rep(length(labs), length(phy)), names(phy)) } Nnode <- function(phy, ...) UseMethod("Nnode") Nnode.phylo <- function(phy, internal.only = TRUE, ...) { if (internal.only) return(phy$Nnode) phy$Nnode + length(phy$tip.label) } Nnode.multiPhylo <- function(phy, internal.only = TRUE, ...) { res <- sapply(unclass(phy), "[[", "Nnode") if (internal.only) return(res) res + Ntip.multiPhylo(phy) } Nedge <- function(phy) UseMethod("Nedge") Nedge.phylo <- function(phy) dim(phy$edge)[1] Nedge.multiPhylo <- function(phy) sapply(unclass(phy), Nedge.phylo) summary.phylo <- function(object, ...) { cat("\nPhylogenetic tree:", deparse(substitute(object)), "\n\n") nb.tip <- length(object$tip.label) nb.node <- object$Nnode cat(" Number of tips:", nb.tip, "\n") cat(" Number of nodes:", nb.node, "\n") if (is.null(object$edge.length)) cat(" No branch lengths.\n") else { cat(" Branch lengths:\n") cat(" mean:", mean(object$edge.length), "\n") cat(" variance:", var(object$edge.length), "\n") cat(" distribution summary:\n") print(summary(object$edge.length)[-4]) } if (is.null(object$root.edge)) cat(" No root edge.\n") else cat(" Root edge:", object$root.edge, "\n") if (nb.tip <= 10) { cat(" Tip labels:", object$tip.label[1], "\n") cat(paste(" ", object$tip.label[-1]), sep = "\n") } else { cat(" First ten tip labels:", object$tip.label[1], "\n") cat(paste(" ", object$tip.label[2:10]), sep = "\n") } if (is.null(object$node.label)) cat(" No node labels.\n") else { if (nb.node <= 10) { cat(" Node labels:", object$node.label[1], "\n") cat(paste(" ", object$node.label[-1]), sep = "\n") } else { cat(" First ten node labels:", object$node.label[1], "\n") cat(paste(" ", object$node.label[2:10]), sep = "\n") } } } ### by BB: print.phylo <- function(x, printlen = 6,...) { nb.tip <- length(x$tip.label) nb.node <- x$Nnode NT <- if (nb.tip == 1L) "tip and" else "tips and" NN <- if (nb.node == 1L) "internal node.\n\n" else "internal nodes.\n\n" cat("\nPhylogenetic tree with", nb.tip, NT, nb.node, NN) if (nb.tip == 1L) cat("Tip label:\n") else cat("Tip labels:\n") if (nb.tip > printlen) { cat(" ", paste(x$tip.label[1:printlen], collapse=", "), ", ...\n", sep = "") } else { cat(" ", paste(x$tip.label, collapse=", "), "\n", sep = "") } if (!is.null(x$node.label)) { if (nb.node == 1L) cat("Node label:\n") else cat("Node labels:\n") if (nb.node > printlen) { cat(" ", paste(x$node.label[1:printlen], collapse=", "), ", ...\n", sep = "") } else { cat(" ", paste(x$node.label, collapse=", "), "\n", sep = "") } } rlab <- if (is.rooted(x)) "Rooted" else "Unrooted" cat("\n", rlab, "; ", sep="") blen <- if (is.null(x$edge.length)) "no branch length." else "includes branch length(s)." cat(blen, "\n", sep = "") } print.multiPhylo <- function(x, details = FALSE, ...) { N <- length(x) cat(N, "phylogenetic", ifelse(N > 1, "trees\n", "tree\n")) if (details) for (i in 1:N) cat("tree", i, ":", length(x[[i]]$tip.label), "tips\n") } "[[.multiPhylo" <- function(x, i) { class(x) <- NULL phy <- x[[i]] if (!is.null(attr(x, "TipLabel"))) phy$tip.label <- attr(x, "TipLabel") phy } `$.multiPhylo` <- function(x, name) x[[name]] "[.multiPhylo" <- function(x, i) { oc <- oldClass(x) class(x) <- NULL structure(x[i], TipLabel = attr(x, "TipLabel"), class = oc) } str.multiPhylo <- function(object, ...) { class(object) <- NULL cat('Class "multiPhylo"\n') str(object, ...) } .c_phylo_single <- function(phy) structure(list(phy), class = "multiPhylo") c.phylo <- function(..., recursive = TRUE) { obj <- list(...) classes <- lapply(obj, class) isphylo <- sapply(classes, function(x) "phylo" %in% x) if (all(isphylo)) { class(obj) <- "multiPhylo" return(obj) } if (!recursive) return(obj) ismulti <- sapply(classes, function(x) "multiPhylo" %in% x) if (all(isphylo | ismulti)) { for (i in which(isphylo)) obj[[i]] <- .c_phylo_single(obj[[i]]) ## added by Klaus: for (i in which(ismulti)) obj[[i]] <- .uncompressTipLabel(obj[[i]]) obj <- .makeMultiPhyloFromObj(obj) } else { warning('some objects not of class "phylo" or "multiPhylo": argument recursive=TRUE ignored') } obj } # this is an option to avoid growing the list, better check it also # not really as important as long the list of trees is short (by Klaus) .makeMultiPhyloFromObj <- function(obj) { n <- length(obj) N <- lengths(obj, FALSE) x <- vector("list", sum(N)) a <- b <- 0L for (i in 1:n) { a <- b + 1L b <- b + N[i] z <- obj[[i]] x[a:b] <- z if (inherits(z, "multiPhylo") && !is.null(nms <- names(z))) names(x)[a:b] <- nms # see issue #37 on GH } class(x) <- "multiPhylo" x } c.multiPhylo <- function(..., recursive = TRUE) { obj <- list(...) if (!recursive) return(obj) classes <- lapply(obj, class) isphylo <- sapply(classes, function(x) "phylo" %in% x) ismulti <- sapply(classes, function(x) "multiPhylo" %in% x) if (!all(isphylo | ismulti)) { warning('some objects not of class "phylo" or "multiPhylo": argument recursive=TRUE ignored') return(obj) } for (i in which(isphylo)) obj[[i]] <- .c_phylo_single(obj[[i]]) ## added by Klaus for (i in which(ismulti)) obj[[i]] <- .uncompressTipLabel(obj[[i]]) .makeMultiPhyloFromObj(obj) } .uncompressTipLabel <- function(x) { Lab <- attr(x, "TipLabel") if (is.null(Lab)) return(x) clx <- class(x) # <- new class(x) <- NULL for (i in seq_along(x)) x[[i]]$tip.label <- Lab # <- modified class(x) <- clx # <- modified attr(x, "TipLabel") <- NULL x } `[<-.multiPhylo` <- function(x, i, value) { ## recycling is allowed so no need to check length(value) != length(i) if (missing(i)) i <- seq_along(x) ## check that all elements in 'value' inherit class "phylo" test <- unlist(lapply(value, function(xx) !inherits(xx, "phylo"))) if (any(test)) stop("at least one element in 'value' is not of class \"phylo\".") oc <- oldClass(x) class(x) <- NULL TipLabel.x <- attr(x, "TipLabel") TipLabel.value <- attr(value, "TipLabel") if (is.null(TipLabel.x)) { if (!is.null(TipLabel.value)) # to solve PR #45 value <- .uncompressTipLabel(value) # x[i] <- value class(x) <- oc return(x) } ## to solve PR #45 if (is.null(TipLabel.value)) { x <- .uncompressTipLabel(x) class(x) <- NULL } else { if (!identical(TipLabel.x, TipLabel.value)) { x <- .uncompressTipLabel(x) class(x) <- NULL value <- .uncompressTipLabel(value) } } x[i] <- 0L # in case x needs to be elongated class(x) <- oc j <- 1L for (k in i) { ## x is of class "multiPhylo", so this uses the operator below: x[[k]] <- value[[j]] j <- j + 1L } x } `[[<-.multiPhylo` <- function(x, i, value) { if (!inherits(value, "phylo")) stop('trying to assign an object not of class "phylo" into an object of class "multiPhylo".') oc <- oldClass(x) class(x) <- NULL Lab <- attr(x, "TipLabel") if (!is.null(Lab)) { n <- length(Lab) if (n != length(value$tip.label)) stop("tree with different number of tips than those in the list (which all have the same labels; maybe you want to uncompress them)") o <- match(value$tip.label, Lab) if (any(is.na(o))) stop("tree tip labels do not match with those in the list; maybe you want to uncompress them.") value$tip.label <- NULL ie <- match(o, value$edge[, 2]) value$edge[ie, 2] <- 1:n } x[[i]] <- value class(x) <- oc x } `$<-.multiPhylo` <- function(x, i, value) { x[[i]] <- value x } degree <- function(x, ...) UseMethod("degree") degree.phylo <- function(x, details = FALSE, ...) { N <- max(x$edge) res <- tabulate(x$edge, N) if (details) return(res) tab <- tabulate(res) DF <- data.frame(Degree = seq_along(tab), N = tab) DF[tab > 0, ] } degree.evonet <- function(x, details = FALSE, ...) { N <- length(x$tip.label) + x$Nnode res <- tabulate(x$edge, N) + tabulate(x$reticulation, N) if (details) return(res) tab <- tabulate(res) DF <- data.frame(Degree = seq_along(tab), N = tab) DF[tab > 0, ] } ape/R/chronopl.R0000644000176200001440000002147014533611173013177 0ustar liggesusers## chronopl.R (2012-02-09) ## Molecular Dating With Penalized Likelihood ## Copyright 2005-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. chronopl <- function(phy, lambda, age.min = 1, age.max = NULL, node = "root", S = 1, tol = 1e-8, CV = FALSE, eval.max = 500, iter.max = 500, ...) { n <- length(phy$tip.label) ROOT <- n + 1L if (identical(node, "root")) node <- ROOT if (any(node <= n)) stop("node numbers should be greater than the number of tips") zerobl <- which(phy$edge.length <= 0) if (length(zerobl)) { if (any(phy$edge[zerobl, 2] <= n)) stop("at least one terminal branch is of length zero: you should remove it to have a meaningful estimation.") else { warning("at least one internal branch is of length zero: it was collapsed and some nodes have been deleted.") if (length(node) == 1 && node == ROOT) phy <- di2multi(phy) else { tmp <- FALSE if (is.null(phy$node.label)) { tmp <- !tmp phy$node.label <- paste("node", 1:phy$Nnode) } node.lab <- phy$node.label[node - n] phy <- di2multi(phy) node <- match(node.lab, phy$node.label) + n if (tmp) phy$node.label <- NULL } } } m <- phy$Nnode el <- phy$edge.length e1 <- phy$edge[, 1L] e2 <- phy$edge[, 2L] N <- length(e1) TIPS <- 1:n EDGES <- 1:N ini.rate <- el el <- el/S ## `basal' contains the indices of the basal edges ## (ie, linked to the root): basal <- which(e1 == ROOT) Nbasal <- length(basal) ## `ind' contains in its 1st column the index of all nonbasal ## edges, and in its second column the index of the edges ## where these edges come from (ie, this matrix contains pairs ## of contiguous edges), eg: ## ___b___ ind: ## | | | | ## ___a___| | b | a | ## | | c | a | ## |___c___ | | | ind <- matrix(0L, N - Nbasal, 2) ind[, 1] <- EDGES[-basal] ind[, 2] <- match(e1[EDGES[-basal]], e2) age <- numeric(n + m) ### This bit sets 'ini.time' and should result in no negative branch lengths seq.nod <- .Call("seq_root2tip", phy$edge, n, phy$Nnode, PACKAGE = "ape") ini.time <- age ini.time[ROOT:(n + m)] <- NA ini.time[node] <- if (is.null(age.max)) age.min else (age.min + age.max) / 2 ## if no age given for the root, find one approximately: if (is.na(ini.time[ROOT])) ini.time[ROOT] <- if (is.null(age.max)) 3 * max(age.min) else 3 * max(age.max) ISnotNA.ALL <- unlist(lapply(seq.nod, function(x) sum(!is.na(ini.time[x])))) o <- order(ISnotNA.ALL, decreasing = TRUE) for (y in seq.nod[o]) { ISNA <- is.na(ini.time[y]) if (any(ISNA)) { i <- 2L # we know the 1st value is not NA, so we start at the 2nd one while (i <= length(y)) { if (ISNA[i]) { # we stop at the next NA j <- i + 1L while (ISNA[j]) j <- j + 1L # look for the next non-NA nb.val <- j - i by <- (ini.time[y[i - 1L]] - ini.time[y[j]]) / (nb.val + 1) ini.time[y[i:(j - 1L)]] <- ini.time[y[i - 1L]] - by * seq_len(nb.val) i <- j + 1L } else i <- i + 1L } } } real.edge.length <- ini.time[e1] - ini.time[e2] if (any(real.edge.length <= 0)) stop("some initial branch lengths are zero or negative; maybe you need to adjust the given dates -- see '?chronopl' for details") ## because if (!is.null(age.max)), 'node' is modified, ## so we copy it in case CV = TRUE: node.bak <- node ## `unknown.ages' will contain the index of the nodes of unknown age: unknown.ages <- n + 1:m ## define the bounds for the node ages: lower <- rep(tol, length(unknown.ages)) upper <- rep(1/tol, length(unknown.ages)) if (!is.null(age.max)) { # are some nodes known within some intervals? lower[node - n] <- age.min upper[node - n] <- age.max ## find nodes known within an interval: interv <- which(age.min != age.max) ## drop them from the 'node' since they will be estimated: node <- node[-interv] if (length(node)) age[node] <- age.min[-interv] # update 'age' } else age[node] <- age.min if (length(node)) { unknown.ages <- unknown.ages[n - node] # 'n - node' is simplification for '-(node - n)' lower <- lower[n - node] upper <- upper[n - node] } ## `known.ages' contains the index of all nodes (internal and ## terminal) of known age: known.ages <- c(TIPS, node) ## concatenate the bounds for the rates: lower <- c(rep(tol, N), lower) upper <- c(rep(1 - tol, N), upper) minusploglik.gr <- function(rate, node.time) { grad <- numeric(N + length(unknown.ages)) age[unknown.ages] <- node.time real.edge.length <- age[e1] - age[e2] if (any(real.edge.length < 0)) { grad[] <- 0 return(grad) } ## gradient for the rates: ## the parametric part can be calculated without a loop: grad[EDGES] <- real.edge.length - el/rate if (Nbasal == 2) { # the simpler formulae if there's a basal dichotomy grad[basal[1]] <- grad[basal[1]] + lambda*(rate[basal[1]] - rate[basal[2]]) grad[basal[2]] <- grad[basal[2]] + lambda*(rate[basal[2]] - rate[basal[1]]) } else { # the general case for (i in 1:Nbasal) grad[basal[i]] <- grad[basal[i]] + lambda*(2*rate[basal[i]]*(1 - 1/Nbasal) - 2*sum(rate[basal[-i]])/Nbasal)/(Nbasal - 1) } for (i in EDGES) { ii <- c(which(e2 == e1[i]), which(e1 == e2[i])) if (!length(ii)) next grad[i] <- grad[i] + lambda*(2*length(ii)*rate[i] - 2*sum(rate[ii])) } ## gradient for the 'node times' for (i in 1:length(unknown.ages)) { nd <- unknown.ages[i] ii <- which(e1 == nd) grad[i + N] <- sum(rate[ii] - el[ii]/real.edge.length[ii])#, na.rm = TRUE) if (nd != ROOT) { ii <- which(e2 == nd) grad[i + N] <- grad[i + N] - rate[ii] + el[ii]/real.edge.length[ii] } } grad } minusploglik <- function(rate, node.time) { age[unknown.ages] <- node.time real.edge.length <- age[e1] - age[e2] if (any(real.edge.length < 0)) return(1e50) B <- rate*real.edge.length loglik <- sum(-B + el*log(B) - lfactorial(el)) -(loglik - lambda*(sum((rate[ind[, 1]] - rate[ind[, 2]])^2) + var(rate[basal]))) } out <- nlminb(c(ini.rate, ini.time[unknown.ages]), function(p) minusploglik(p[EDGES], p[-EDGES]), function(p) minusploglik.gr(p[EDGES], p[-EDGES]), control = list(eval.max = eval.max, iter.max = iter.max, ...), lower = lower, upper = upper) attr(phy, "ploglik") <- -out$objective attr(phy, "rates") <- out$par[EDGES] attr(phy, "message") <- out$message age[unknown.ages] <- out$par[-EDGES] if (CV) ophy <- phy phy$edge.length <- age[e1] - age[e2] if (CV) attr(phy, "D2") <- chronopl.cv(ophy, lambda, age.min, age.max, node.bak, n, S, tol, eval.max, iter.max, ...) phy } chronopl.cv <- function(ophy, lambda, age.min, age.max, nodes, n, S, tol, eval.max, iter.max, ...) ### ophy: the original phylogeny ### n: number of tips ### Note that we assume here that the order of the nodes ### in node.label are not modified by the drop.tip operation { cat("Doing cross-validation\n") BT <- branching.times(ophy) D2 <- numeric(n) for (i in 1:n) { cat("\r dropping tip ", i, " / ", n, sep = "") tr <- drop.tip(ophy, i) j <- which(ophy$edge[, 2] == i) if (ophy$edge[j, 1] %in% nodes) { k <- which(nodes == ophy$edge[j, 1]) node <- nodes[-k] agemin <- age.min[-k] agemax <- age.max[-k] } else node <- nodes if (length(node)) { chr <- chronopl(tr, lambda, age.min, age.max, node, S, tol, FALSE, eval.max, iter.max, ...) tmp <- if (Nnode(chr) == Nnode(ophy)) BT else BT[-(ophy$edge[j, 1] - n)] D2[i] <- sum((tmp - branching.times(chr))^2 / tmp) } else D2[i] <- 0 } cat("\n") D2 } ape/R/read.dna.R0000644000176200001440000001563314151160501013021 0ustar liggesusers## read.dna.R (2021-11-29) ## Read DNA Sequences in a File ## Copyright 2003-2021 Emmanuel Paradis, 2017 RJ Ewing ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. read.FASTA <- function(file, type = "DNA") { TYPES <- c("DNA", "AA") itype <- pmatch(toupper(type), TYPES) if (is.na(itype)) stop(paste("'type' should be", paste(dQuote(TYPES), collapse = " or "))) GZ <- grepl("\\.gz$", file, ignore.case = TRUE) if (length(grep("^(ht|f)tp(s|):", file))) { url <- file file <- tempfile() download.file(url, file) } if (inherits(file, "connection")) { if (!isOpen(file, "rt")) { open(file, "rt") on.exit(close(file)) } x <- scan(file, what = character(), sep = "\n", quiet = TRUE) x <- charToRaw(paste(x, collapse = "\n")) } else { if (GZ) { file <- gzcon(gzfile(file)) open(file) x <- raw() repeat { y <- readBin(file, "raw", 1e9) if (!length(y)) break x <- c(x, y) } close(file) } else { x <- readBin(file, "raw", file.size(file)) } } sz <- length(x) ## if the file is larger than 1 Gb we assume that it is ## UNIX-encoded and skip the search-replace of carriage returns if (sz < 1e9) { icr <- which(x == as.raw(0x0d)) # CR if (length(icr)) x <- x[-icr] } res <- .Call(rawStreamToDNAorAAbin, x, itype - 2L) if (identical(res, 0L)) { warning("failed to read sequences, returns NULL") return(NULL) } names(res) <- sub("^ +", "", names(res)) # to permit phylosim class(res) <- c("DNAbin", "AAbin")[itype] res } read.dna <- function(file, format = "interleaved", skip = 0, nlines = 0, comment.char = "#", as.character = FALSE, as.matrix = NULL) { findFirstNucleotide <- function(x) { ## actually find the 1st non-blank character ## just in case: pat.base <- "[-AaCcGgTtUuMmRrWwSsYyKkVvHhDdBbNn?]{10}" tmp <- regexpr("[[:blank:]]+", x[1]) # consider only a single string tmp[1] + attr(tmp, "match.length") } getTaxaNames <- function(x) { x <- sub("^['\" ]+", "", x) # remove the leading quotes and spaces x <- sub("['\" ]+$", "", x) # " " trailing " " " x } getNucleotide <- function(x) { x <- gsub(" ", "", x) x <- strsplit(x, NULL) tolower(unlist(x)) } formats <- c("interleaved", "sequential", "fasta", "clustal") format <- match.arg(format, formats) if (format == "fasta") { obj <- read.FASTA(file) } else { X <- scan(file = file, what = "", sep = "\n", quiet = TRUE, skip = skip, nlines = nlines, comment.char = comment.char) if (format %in% formats[1:2]) { ## need to remove the possible leading spaces and/or tabs in the first line fl <- gsub("^[[:blank:]]+", "", X[1]) fl <- as.numeric(unlist(strsplit(fl, "[[:blank:]]+"))) if (length(fl) != 2 || any(is.na(fl))) stop("the first line of the file must contain the dimensions of the data") n <- fl[1] s <- fl[2] obj <- matrix("", n, s) X <- X[-1] } switch(format, "interleaved" = { start.seq <- findFirstNucleotide(X[1]) one2n <- 1:n taxa <- getTaxaNames(substr(X[one2n], 1, start.seq - 1)) X[one2n] <- substr(X[one2n], start.seq, nchar(X[one2n])) nl <- length(X) for (i in one2n) obj[i, ] <- getNucleotide(X[seq(i, nl, n)]) }, "sequential" = { taxa <- character(n) j <- 1L # line number for (i in 1:n) { start.seq <- findFirstNucleotide(X[j]) taxa[i] <- getTaxaNames(substr(X[j], 1, start.seq - 1)) sequ <- getNucleotide(substr(X[j], start.seq, nchar(X[j]))) j <- j + 1L while (length(sequ) < s) { sequ <- c(sequ, getNucleotide(X[j])) j <- j + 1L } obj[i, ] <- sequ } taxa <- getTaxaNames(taxa) }, "clustal" = { X <- X[-1] # drop the line with "Clustal bla bla..." ## find where the 1st sequence starts start.seq <- findFirstNucleotide(X[1]) ## find the lines with *********.... nspaces <- paste("^ {", start.seq - 1, "}", sep = "", collapse = "") stars <- grep(nspaces, X) ## we now know how many sequences in the file: n <- stars[1] - 1 taxa <- getTaxaNames(substr(X[1:n], 1, start.seq - 1)) ## need to remove the sequence names before getting the sequences: X <- substr(X, start.seq, nchar(X)) nl <- length(X) ## find the length of the 1st sequence: tmp <- getNucleotide(X[seq(1, nl, n + 1)]) s <- length(tmp) obj <- matrix("", n, s) obj[1, ] <- tmp for (i in 2:n) obj[i, ] <- getNucleotide(X[seq(i, nl, n + 1)]) }) } if (format != "fasta") { rownames(obj) <- taxa if (!as.character) obj <- as.DNAbin(obj) } else { LENGTHS <- unique(lengths(obj, use.names = FALSE)) allSameLength <- length(LENGTHS) == 1 if (is.logical(as.matrix)) { if (as.matrix && !allSameLength) stop("sequences in FASTA file not of the same length") } else { as.matrix <- allSameLength } if (as.matrix) { taxa <- names(obj) n <- length(obj) y <- matrix(as.raw(0), n, LENGTHS) for (i in seq_len(n)) y[i, ] <- obj[[i]] obj <- y rownames(obj) <- taxa class(obj) <- "DNAbin" } if (as.character) obj <- as.character(obj) } obj } read.fastq <- function(file, offset = -33) { Z <- scan(file, "", sep="\n", quiet = TRUE) tmp <- Z[c(TRUE, TRUE, FALSE, FALSE)] sel <- c(TRUE, FALSE) tmp[sel] <- gsub("^@", ">", tmp[sel]) fl <- tempfile() cat(tmp, file = fl, sep = "\n") DNA <- read.FASTA(fl) ## get the qualities: tmp <- Z[c(FALSE, FALSE, FALSE, TRUE)] QUAL <- lapply(tmp, function(x) as.integer(charToRaw(x))) if (offset) QUAL <- lapply(QUAL, "+", offset) names(QUAL) <- names(DNA) attr(DNA, "QUAL") <- QUAL DNA } ape/R/zzz.R0000644000176200001440000000032012465112403012171 0ustar liggesusers## zzz.R (2009-01-12) ## Library Loading ## Copyright 2003-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .PlotPhyloEnv <- new.env() ape/R/read.nexus.R0000644000176200001440000002253014647417061013433 0ustar liggesusers## read.nexus.R (2024-07-22) ## Read Tree File in Nexus Format ## Copyright 2003-2024 Emmanuel Paradis and 2010-2017 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .treeBuildWithTokens <- function(x) { phy <- .Call(treeBuildWithTokens, x) dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) nms <- c("edge", "edge.length", "Nnode", "node.label", "root.edge") if (length(phy) == 4) nms <- nms[-5] names(phy) <- nms if (all(phy$node.label == "")) phy$node.label <- NULL class(phy) <- "phylo" attr(phy, "order") <- "cladewise" phy } ## for read.nexus clado with TRANSLATION .cladoBuildWithTokens <- function(x) { phy <- .Call(cladoBuildWithTokens, x) dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) nms <- c("edge", "Nnode", "node.label", "root.edge") if (length(phy) == 3) nms <- nms[-4] names(phy) <- nms if (all(phy$node.label == "")) phy$node.label <- NULL class(phy) <- "phylo" attr(phy, "order") <- "cladewise" phy } .treeBuild <- function(x) { if (!length(grep(",", x))) { ## only one tip but can be several nodes (GH's issues #104 and #124) Nnode <- length(gregexpr("\\)", x)[[1]]) edge <- if (Nnode == 1L) 2:1 else c(2L, rep(3:(Nnode + 1L), each = 2), 1L) edge <- matrix(edge, Nnode, 2L, TRUE) phy <- list(edge = edge, Nnode = Nnode) labs <- unlist(strsplit(x, "[\\(\\):;]")) nt <- Nnode + 1L phy$tip.label <- labs[nt] labs <- labs[-(1:nt)] s <- c(TRUE, FALSE) tmp <- as.numeric(labs[s]) if (length(tmp) == Nnode) { phy$edge.length <- tmp } else { # length(tmp) == Nnode + 1L (not checked) phy$edge.length <- tmp[-length(tmp)] phy$root.edge <- tmp[length(tmp)] } phy$node.label <- labs[!s] } else { phy <- .Call(treeBuild, x) dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) nms <- c("edge", "edge.length", "Nnode", "node.label", "tip.label", "root.edge") if (length(phy) == 5) nms <- nms[-6] names(phy) <- nms } if (all(phy$node.label == "")) phy$node.label <- NULL class(phy) <- "phylo" attr(phy, "order") <- "cladewise" phy } .cladoBuild <- function(x) { if (!length(grep(",", x))) { ## only one tip but can be several nodes (GH's issue #104) Nnode <- length(gregexpr("\\)", x)[[1]]) edge <- if (Nnode == 1L) 2:1 else c(2L, rep(3:(Nnode + 1L), each = 2), 1L) dim(edge) <- c(Nnode, 2L) phy <- list(edge = edge, Nnode = Nnode) labs <- unlist(strsplit(x, "[\\(\\);]")) phy$tip.label <- labs[Nnode + 1L] phy$node.label <- labs[(Nnode + 2L):length(labs)] } else { phy <- .Call(cladoBuild, x) dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) nms <- c("edge", "Nnode", "node.label", "tip.label", "root.edge") if (length(phy) == 4) nms <- nms[-5] names(phy) <- nms } if (all(phy$node.label == "")) phy$node.label <- NULL class(phy) <- "phylo" attr(phy, "order") <- "cladewise" phy } read.nexus <- function(file, tree.names = NULL, force.multi = FALSE) { X <- scan(file = file, what = "", sep = "\n", quiet = TRUE) ## remove all comments ## (this might not work if there are square brackets within the comments) LEFT <- grep("\\[", X) RIGHT <- grep("\\]", X) if (length(LEFT)) { # in case there are no comments at all w <- LEFT == RIGHT if (any(w)) { # in case all comments use at least 2 lines s <- LEFT[w] X[s] <- gsub("\\[[^]]*\\]", "", X[s]) ## The above regexp was quite tough to find: it makes ## possible to delete series of comments on the same line: ## ...[...]xxx[...]... ## without deleting the "xxx". This regexp is in three parts: ## \\[ [^]]* \\] ## where [^]]* means "any character, except "]", repeated zero ## or more times" (note that the ']' is not escaped here). ## The previous version was: ## X[s] <- gsub("\\[.*\\]", "", X[s]) ## which deleted the "xxx". (EP 2008-06-24) } w <- !w if (any(w)) { s <- LEFT[w] X[s] <- gsub("\\[.*", "", X[s]) sb <- RIGHT[w] X[sb] <- gsub(".*\\]", "", X[sb]) if (any(s < sb - 1)) X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))] } } endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE) semico <- grep(";", X) i1 <- grep("BEGIN TREES;", X, ignore.case = TRUE) i2 <- grep("TRANSLATE", X, ignore.case = TRUE) translation <- if (length(i2) == 1 && i2 > i1) TRUE else FALSE if (translation) { end <- semico[semico > i2][1] x <- X[(i2 + 1):end] # assumes there's a 'new line' after "TRANSLATE" #x <- unlist(strsplit(x, "[,; \t]")) ################################################ # when the label of translation contains space # # 1 "tip 1 a", # # 2 "tip 2" # ################################################ # remove the space and tab before the string x <- gsub("^\\s+", "", x) # remove the , ; symbol x <- gsub("[,;]", "", x) # split with the first space x <- unlist(regmatches(x, regexpr("\\s+", x), invert=TRUE)) ############################################### x <- x[nzchar(x)] TRANS <- matrix(x, ncol = 2, byrow = TRUE) TRANS[, 2] <- gsub("['\"]", "", TRANS[, 2]) n <- dim(TRANS)[1] } start <- if (translation) semico[semico > i2][1] + 1 else i1 + 1 # semico[semico > i1][1] ## fix done on 2014-08-25 end <- endblock[endblock > i1][1] - 1 tree <- X[start:end] rm(X) ## check whether there are empty lines from the above manips: tree <- tree[tree != ""] semico <- grep(";", tree) Ntree <- length(semico) # provisional -- some ";" may actually mark end of commands ## are some trees on several lines? ## -- this actually 'packs' all characters ending with a ";" in a single string -- if (Ntree == 1 && length(tree) > 1) STRING <- paste(tree, collapse = "") else { if (any(diff(semico) != 1)) { STRING <- character(Ntree) s <- c(1, semico[-Ntree] + 1) j <- mapply(":", s, semico) if (is.list(j)) { for (i in 1:Ntree) STRING[i] <- paste(tree[j[[i]]], collapse = "") } else { for (i in 1:Ntree) STRING[i] <- paste(tree[j[, i]], collapse = "") } } else STRING <- tree } rm(tree) ## exclude the possible command lines ending with ";": STRING <- STRING[grep("^[[:blank:]]*tree.*= *", STRING, ignore.case = TRUE)] Ntree <- length(STRING) # update Ntree ## get the tree names: nms.trees <- sub(" *= *.*", "", STRING) # only the first occurence of "=" nms.trees <- sub("^[[:blank:]]*tree[[:blank:]\\*]*", "", nms.trees, ignore.case = TRUE) # fix by Graham Gower (2014-10-20) STRING <- sub("^.*= *", "", STRING) # delete title and 'TREE' command with 'sub' STRING <- gsub(" ", "", STRING) # delete all white spaces colon <- grep(":", STRING) if (!length(colon)) { trees <- lapply(STRING, .cladoBuild) } else if (length(colon) == Ntree) { trees <- if (translation) lapply(STRING, .treeBuildWithTokens) else lapply(STRING, .treeBuild) } else { trees <- vector("list", Ntree) trees[colon] <- lapply(STRING[colon], .treeBuild) nocolon <- (1:Ntree)[!1:Ntree %in% colon] trees[nocolon] <- lapply(STRING[nocolon], .cladoBuild) if (translation) { for (i in 1:Ntree) { tr <- trees[[i]] for (j in 1:n) { ind <- which(tr$tip.label[j] == TRANS[, 1]) tr$tip.label[j] <- TRANS[ind, 2] } if (!is.null(tr$node.label)) { for (j in 1:length(tr$node.label)) { ind <- which(tr$node.label[j] == TRANS[, 1]) tr$node.label[j] <- TRANS[ind, 2] } } trees[[i]] <- tr } translation <- FALSE } } for (i in 1:Ntree) { tr <- trees[[i]] if (!translation) n <- length(tr$tip.label) } if (Ntree == 1 && !force.multi) { trees <- trees[[1]] if (translation) { trees$tip.label <- if (length(colon)) TRANS[, 2] else TRANS[, 2][as.numeric(trees$tip.label)] } } else { if (!is.null(tree.names)) names(trees) <- tree.names if (translation) { if (length(colon) == Ntree) # .treeBuildWithTokens() was used attr(trees, "TipLabel") <- TRANS[, 2] else { # reassign the tip labels then compress for (i in 1:Ntree) trees[[i]]$tip.label <- TRANS[, 2][as.numeric(trees[[i]]$tip.label)] trees <- .compressTipLabel(trees) } } class(trees) <- "multiPhylo" if (!all(nms.trees == "")) names(trees) <- nms.trees } trees } ape/R/pic.R0000644000176200001440000001323013147042221012111 0ustar liggesusers## pic.R (2017-08-22) ## Phylogenetically Independent Contrasts ## Copyright 2002-2017 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. pic <- function(x, phy, scaled = TRUE, var.contrasts = FALSE, rescaled.tree = FALSE) { if (!inherits(phy, "phylo")) stop("object 'phy' is not of class \"phylo\"") if (is.null(phy$edge.length)) stop("your tree has no branch lengths") nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode if (nb.node != nb.tip - 1) stop("'phy' is not rooted and fully dichotomous") if (length(x) != nb.tip) stop("length of phenotypic and of phylogenetic data do not match") if (any(is.na(x))) stop("missing data in 'x': you may consider removing the species with missing data from your tree with the function 'drop.tip'.") phy <- reorder(phy, "postorder") phenotype <- numeric(nb.tip + nb.node) if (is.null(names(x))) { phenotype[1:nb.tip] <- x } else { if (all(names(x) %in% phy$tip.label)) phenotype[1:nb.tip] <- x[phy$tip.label] else { phenotype[1:nb.tip] <- x warning("the names of argument 'x' and the tip labels of the tree did not match: the former were ignored in the analysis.") } } ## No need to copy the branch lengths: they are rescaled ## in the C code, so it's important to leave the default ## `DUP = TRUE' of .C. ans <- .C(C_pic, as.integer(nb.tip), as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]), as.double(phy$edge.length), as.double(phenotype), double(nb.node), double(nb.node), as.integer(var.contrasts), as.integer(scaled)) contr <- ans[[6]] lbls <- if (is.null(phy$node.label)) as.character(1:nb.node + nb.tip) else phy$node.label if (var.contrasts) { contr <- cbind(contr, ans[[7]]) dimnames(contr) <- list(lbls, c("contrasts", "variance")) } else names(contr) <- lbls if (rescaled.tree) { phy$edge.length <- ans[[4]] contr <- list(contr = contr, rescaled.tree = phy) } contr } pic.ortho <- function(x, phy, var.contrasts = FALSE, intra = FALSE) { n <- length(x) m <- n - 1L # number of nodes phy <- reorder(phy, "postorder") xx <- unlist(lapply(x, mean)) # 'x' in Felsenstein's paper xx <- c(xx, numeric(m)) delta.v <- numeric(n + m) s <- 1/lengths(x) s <- c(s, numeric(m)) contrast <- var.cont <- numeric(m) i <- 1L while (i < m + n) { d1 <- phy$edge[i, 2] d2 <- phy$edge[i + 1L, 2] a <- phy$edge[i, 1] tmp1 <- 1/(phy$edge.length[i] + delta.v[d1]) tmp2 <- 1/(phy$edge.length[i + 1L] + delta.v[d2]) xx[a] <- (tmp1 * xx[d1] + tmp2 * xx[d2])/(tmp1 + tmp2) delta.v[a] <- 1/(tmp1 + tmp2) f1 <- tmp1/(tmp1 + tmp2) f2 <- tmp2/(tmp1 + tmp2) s[a] <- f1*f1 * s[d1] + f2*f2 * s[d2] tmp <- 1/(s[d1] + s[d2]) contrast[a - n] <- (xx[d1] - xx[d2]) * sqrt(tmp) var.cont[a - n] <- (1/tmp1 + 1/tmp2) * tmp i <- i + 2L } lbls <- if (is.null(phy$node.label)) as.character(1:m + n) else phy$node.label if (var.contrasts) { contrast <- cbind(contrast, var.cont) dimnames(contrast) <- list(lbls, c("contrasts", "variance")) } else names(contrast) <- lbls if (intra) { intraspe.ctr <- function(x) { k <- length(x) - 1L if (!k) return(NULL) ctr <- numeric(k) ctr[1L] <- x[1L] - x[2L] if (k > 1) for (i in 2:k) ctr[i] <- x[i + 1L] - mean(x[1:i]) sqrt((1:k)/(1:k + 1)) * ctr } tmp <- lapply(x, intraspe.ctr) names(tmp) <- phy$tip.label attr(contrast, "intra") <- tmp } contrast } varCompPhylip <- function(x, phy, exec = NULL) { n <- Ntip(phy) if (is.vector(x)) x <- as.list(x) if (is.matrix(x) || is.data.frame(x)) { tmpx <- vector("list", n) for (i in 1:n) tmpx[[i]] <- x[i, , drop = FALSE] names(tmpx) <- rownames(x) x <- tmpx } p <- if (is.vector(x[[1]])) 1L else ncol(x[[1]]) if (!is.null(names(x))) x <- x[phy$tip.label] phy <- makeLabel(phy, len = 10) lbs <- phy$tip.label ni <- sapply(x, function(xx) if (is.vector(xx)) 1L else nrow(xx)) pfx <- tempdir() write.tree(phy, file = paste(pfx, "intree", sep = "/")) infile <- paste(pfx, "infile", sep = "/") file.create(infile) cat(n, " ", p, "\n", sep = "", file = infile, append = TRUE) for (i in 1:n) { cat(lbs[i], file = infile, append = TRUE) ## can surely be better but OK for the moment: cat(paste(rep(" ", 11 - nchar(lbs[i])), collapse = ""), file = infile, append = TRUE) cat(ni[i], "\n", sep = "", file = infile, append = TRUE) if (ni[i] == 1) { cat(x[[i]], sep = " ", file = infile, append = TRUE) cat("\n", file = infile, append = TRUE) } else write(t(x[[i]]), file = infile, ncolumns = p, append = TRUE) } if (is.null(exec)) exec <- if (.Platform$OS.type == "unix") "phylip contrast" else "contrast" odir <- setwd(pfx) on.exit(setwd(odir)) if (file.exists("outfile")) unlink("outfile") system(exec, intern = TRUE, input = c("W", "A", "Y")) varA <- scan("outfile", skip = 7, nlines = p, quiet = TRUE) varE <- scan("outfile", skip = 11 + p, nlines = p, quiet = TRUE) if (p > 1) { varA <- matrix(varA, p, p, byrow = TRUE) varE <- matrix(varE, p, p, byrow = TRUE) } list(varA = varA, varE = varE) } ape/R/SlowinskiGuyer.R0000644000176200001440000000657213003217100014336 0ustar liggesusers## SlowinskiGuyer.R (2016-10-23) ## Tests of Diversification Shifts with Sister-Clades ## Copyright 2011-2016 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. slowinskiguyer.test <- function(x, detail = FALSE) { r <- x[, 1] n <- x[, 1] + x[, 2] pp <- (n - r)/(n - 1) chi <- -2 * sum(log(pp)) df <- as.integer(2 * length(pp)) pval <- pchisq(chi, df, lower.tail = FALSE) res <- data.frame("chisq" = chi, "df" = df, "P.val" = pval, row.names = "") if (detail) res <- list(res, individual_Pvalues = pp) res } mcconwaysims.test <- function(x) { LRTp <- function(x) { f <- function(x) ifelse(x == 0, 0, x * log(x)) n1 <- x[1] n2 <- x[2] 1.629*(f(n1 - 1) - f(n1) + f(n2 - 1) - f(n2) - f(2) - f(n1 + n2 - 2) + f(n1 + n2)) } chi <- sum(apply(x, 1, LRTp)) pval <- pchisq(chi, df <- nrow(x), lower.tail = FALSE) data.frame("chisq" = chi, "df" = df, "P.val" = pval, row.names = "") } richness.yule.test <- function(x, t) { n1 <- x[, 1] n2 <- x[, 2] n <- c(n1, n2) tb <- c(t, t) .PrNt.Yule <- function(N, age, birth) { tmp <- -birth * age tmp + (N - 1) * log(1 - exp(tmp)) # on a log-scale } ## the functions to minimize: minusloglik0 <- function(l) -sum(.PrNt.Yule(n, tb, l)) minusloglika <- function(l) -sum(.PrNt.Yule(n1, t, l[1])) - sum(.PrNt.Yule(n2, t, l[2])) ## initial values (moment estimators): ipa <- c(mean(log(n1)/t), mean(log(n2)/t)) ip0 <- mean(ipa) out0 <- nlminb(ip0, minusloglik0, lower = 0, upper = 1) outa <- nlminb(ipa, minusloglika, lower = c(0, 0), upper = c(1, 1)) chi <- 2 * (out0$objective - outa$objective) pval <- pchisq(chi, 1, lower.tail = FALSE) data.frame(chisq = chi, df = 1, P.val = pval, row.names = "") } diversity.contrast.test <- function(x, method = "ratiolog", alternative = "two.sided", nrep = 0, ...) { method <- match.arg(method, c("ratiolog", "proportion", "difference", "logratio")) alternative <- match.arg(alternative, c("two.sided", "less", "greater")) minmax <- t(apply(x, 1, sort)) # sort all rows DIFF <- x[, 1] - x[, 2] SIGN <- sign(DIFF) CONTRAST <- switch(method, "ratiolog" = { if (any(minmax == 1)) minmax <- minmax + 1 # prevent division by 0 ## Note: if min = max, no need to set the contrast ## to zero since this is done with sign() log(minmax[, 2]) / log(minmax[, 1]) }, "proportion" = minmax[, 2] / (minmax[, 2] + minmax[, 1]), "difference" = abs(DIFF), "logratio" = log(minmax[, 1] / minmax[, 2])) y <- SIGN * CONTRAST # the signed contrasts if (nrep) { n <- length(SIGN) RND <- replicate(nrep, sum(sample(c(-1, 1), size = n, replace = TRUE) * CONTRAST)) cases <- switch(alternative, "two.sided" = sum(abs(RND) > sum(y)), "less" = sum(RND < sum(y)), "greater" = sum(RND > sum(y))) cases/nrep } else wilcox.test(x = y, alternative = alternative, ...)$p.value } ape/R/DNA.R0000644000176200001440000013331614533611426011761 0ustar liggesusers## DNA.R (2023-10-05) ## Manipulations and Comparisons of DNA and AA Sequences ## Copyright 2002-2023 Emmanuel Paradis, 2015 Klaus Schliep, 2017 Franz Krah ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. DNAbin2indel <- function(x) { if (is.list(x)) x <- as.matrix(x) d <- dim(x) s <- as.integer(d[2]) n <- as.integer(d[1]) if (s * n > 2^31 - 1) stop("DNAbin2indel() cannot handle more than 2^31 - 1 bases") res <- .C(DNAbin2indelblock, x, n, s, integer(n*s), NAOK = TRUE)[[4]] dim(res) <- d rownames(res) <- rownames(x) res } labels.DNAbin <- function(object, ...) { if (is.list(object)) return(names(object)) if (is.matrix(object)) return(rownames(object)) NULL } del.gaps <- function(x) { deleteGaps <- function(x, gapcode) { i <- which(x == gapcode) if (length(i)) x[-i] else x } if (inherits(x, "DNAbin")) { gapcode <- 4 } else { if (inherits(x, "AAbin")) { gapcode <- 45 } else { x <- as.DNAbin(x) gapcode <- 4 } } cl <- class(x) if (is.matrix(x)) { n <- dim(x)[1] y <- vector("list", n) for (i in 1:n) y[[i]] <- x[i, ] names(y) <- rownames(x) x <- y rm(y) } if (!is.list(x)) return(deleteGaps(x, gapcode = gapcode)) x <- lapply(x, deleteGaps, gapcode = gapcode) class(x) <- cl x } del.rowgapsonly <- function(x, threshold = 1, freq.only = FALSE) { if (inherits(x, "DNAbin")) { gapcode <- 4 } else { if (inherits(x, "AAbin")) { gapcode <- 45 } else { x <- as.DNAbin(x) gapcode <- 4 } } if (!is.matrix(x)) stop("sequences not in a matrix") foo <- function(x) sum(x == gapcode) g <- apply(x, 1, foo) if (freq.only) return(g) i <- which(g / ncol(x) >= threshold) if (length(i)) x <- x[-i, ] x } del.colgapsonly <- function(x, threshold = 1, freq.only = FALSE) { if (inherits(x, "DNAbin")) { gapcode <- 4 } else { if (inherits(x, "AAbin")) { gapcode <- 45 } else { x <- as.DNAbin(x) gapcode <- 4 } } if (!is.matrix(x)) stop("sequences not in a matrix") foo <- function(x) sum(x == gapcode) g <- apply(x, 2, foo) if (freq.only) return(g) i <- which(g / nrow(x) >= threshold) if (length(i)) x <- x[, -i] x } as.alignment <- function(x) { if (is.list(x)) n <- length(x) if (is.matrix(x)) n <- dim(x)[1] seq <- character(n) if (is.list(x)) { nam <- names(x) for (i in 1:n) seq[i] <- paste(x[[i]], collapse = "") } if (is.matrix(x)) { nam <- dimnames(x)[[1]] for (i in 1:n) seq[i] <- paste(x[i, ], collapse = "") } obj <- list(nb = n, seq = seq, nam = nam, com = NA) class(obj) <- "alignment" obj } "[.DNAbin" <- function(x, i, j, drop = FALSE) { ans <- NextMethod("[", drop = drop) class(ans) <- "DNAbin" ans } as.matrix.DNAbin <- function(x, ...) { if (is.matrix(x)) return(x) if (!is.list(x)) { # vector dim(x) <- c(1, length(x)) return(x) } s <- unique(lengths(x, use.names = FALSE)) if (length(s) != 1) stop("DNA sequences in list not of the same length.") n <- length(x) y <- matrix(raw(), n, s) for (i in seq_len(n)) y[i, ] <- x[[i]] rownames(y) <- names(x) class(y) <- "DNAbin" y } as.list.DNAbin <- function(x, ...) { if (is.list(x)) return(x) if (is.null(dim(x))) obj <- list(x) # cause is.vector() doesn't work else { # matrix class(x) <- NULL n <- nrow(x) obj <- vector("list", n) for (i in seq_len(n)) obj[[i]] <- x[i, , drop = TRUE] names(obj) <- rownames(x) } class(obj) <- "DNAbin" obj } rbind.DNAbin <- function(...) { obj <- list(...) n <- length(obj) if (n == 1) return(obj[[1]]) for (i in 1:n) if (!is.matrix(obj[[1]])) stop("the 'rbind' method for \"DNAbin\" accepts only matrices") NC <- unlist(lapply(obj, ncol)) if (length(unique(NC)) > 1) stop("matrices do not have the same number of columns.") for (i in 1:n) class(obj[[i]]) <- NULL # safe but maybe not really needed structure(do.call(rbind, obj), class = "DNAbin") } cbind.DNAbin <- function(..., check.names = TRUE, fill.with.gaps = FALSE, quiet = FALSE) { obj <- list(...) n <- length(obj) if (n == 1) return(obj[[1]]) for (i in 1:n) if (!is.matrix(obj[[1]])) stop("the 'cbind' method for \"DNAbin\" accepts only matrices") NR <- unlist(lapply(obj, nrow)) for (i in 1:n) class(obj[[i]]) <- NULL if (check.names) { NMS <- lapply(obj, rownames) for (i in 1:n) if (anyDuplicated(NMS[[i]])) stop("Duplicated rownames in matrix ", i, ": see ?cbind.DNAbin") nms <- unlist(NMS) if (fill.with.gaps) { NC <- unlist(lapply(obj, ncol)) nms <- unique(nms) ans <- matrix(as.raw(4), length(nms), sum(NC)) rownames(ans) <- nms from <- 1 for (i in 1:n) { to <- from + NC[i] - 1 k <- match(NMS[[i]], nms) ans[k, from:to] <- obj[[i]] from <- to + 1 } } else { tab <- table(nms) ubi <- tab == n nms <- names(tab)[which(ubi)] ans <- obj[[1]][nms, , drop = FALSE] for (i in 2:n) ans <- cbind(ans, obj[[i]][nms, , drop = FALSE]) if (!quiet && !all(ubi)) warning("some rows were dropped.") } } else { if (length(unique(NR)) > 1) stop("matrices do not have the same number of rows.") ans <- matrix(unlist(obj), NR) rownames(ans) <- rownames(obj[[1]]) } class(ans) <- "DNAbin" ans } c.DNAbin <- function(..., recursive = FALSE) { if (!all(unlist(lapply(list(...), is.list)))) stop("the 'c' method for \"DNAbin\" accepts only lists") structure(NextMethod("c"), class = "DNAbin") } print.DNAbin <- function(x, printlen = 6, digits = 3, ...) { if (is.list(x)) { n <- length(x) nms <- names(x) if (n == 1) { cat("1 DNA sequence in binary format stored in a list.\n\n") nTot <- length(x[[1]]) cat("Sequence length:", nTot, "\n") } else { cat(n, "DNA sequences in binary format stored in a list.\n\n") tmp <- lengths(x, use.names = FALSE) nTot <- sum(as.numeric(tmp)) mini <- min(tmp) maxi <- max(tmp) if (mini == maxi) cat("All sequences of same length:", maxi, "\n") else { cat("Mean sequence length:", round(mean(tmp), 3), "\n") cat(" Shortest sequence:", mini, "\n") cat(" Longest sequence:", maxi, "\n") } } } else { nTot <- length(x) if (is.matrix(x)) { nd <- dim(x) n <- nd[1] nms <- rownames(x) if (n == 1) { cat("1 DNA sequence in binary format stored in a matrix.\n\n") cat("Sequence length:", nd[2], "\n") } else { cat(n, "DNA sequences in binary format stored in a matrix.\n\n") cat("All sequences of same length:", nd[2], "\n") } } else { cat("1 DNA sequence in binary format stored in a vector.\n\n") cat("Sequence length:", nTot, "\n\n") } } if (exists("nms")) { HEAD <- if (n == 1) "\nLabel:" else "\nLabels:" TAIL <- "" if (printlen < n) { nms <- nms[1:printlen] TAIL <- "...\n" } if (any(longs <- nchar(nms) > 60)) nms[longs] <- paste0(substr(nms[longs], 1, 60), "...") cat(HEAD, nms, TAIL, sep = "\n") } if (nTot <= 1e7) { cat("Base composition:\n") print(round(base.freq(x), digits)) } else { cat("More than 10 million bases: not printing base composition.\n") } if (nTot > 1) { k <- floor(log(nTot, 1000)) units <- c("bases", "kb", "Mb", "Gb", "Tb", "Pb", "Eb") cat("(Total: ", round(nTot/1000^k, 2), " ", units[k + 1], ")\n", sep = "") } } as.DNAbin <- function(x, ...) UseMethod("as.DNAbin") ._cs_ <- c("a", "g", "c", "t", "r", "m", "w", "s", "k", "y", "v", "h", "d", "b", "n", "-", "?") ._bs_ <- c(136, 72, 40, 24, 192, 160, 144, 96, 80, 48, 224, 176, 208, 112, 240, 4, 2) ## by Klaus: as.DNAbin.character <- function(x, ...) { ans <- as.raw(._bs_)[match(tolower(x), ._cs_)] if (is.matrix(x)) { dim(ans) <- dim(x) dimnames(ans) <- dimnames(x) } class(ans) <- "DNAbin" ans } as.DNAbin.alignment <- function(x, ...) { n <- x$nb x$seq <- tolower(x$seq) ans <- matrix("", n, nchar(x$seq[1])) for (i in 1:n) ans[i, ] <- strsplit(x$seq[i], "")[[1]] rownames(ans) <- gsub(" +$", "", gsub("^ +", "", x$nam)) as.DNAbin.character(ans) } as.DNAbin.list <- function(x, ...) { obj <- lapply(x, as.DNAbin) class(obj) <- "DNAbin" obj } as.character.DNAbin <- function(x, ...) { f <- function(xx) { ans <- ._cs_[match(as.numeric(xx), ._bs_)] if (is.matrix(xx)) { dim(ans) <- dim(xx) dimnames(ans) <- dimnames(xx) } ans } if (is.list(x)) lapply(x, f) else f(x) } base.freq <- function(x, freq = FALSE, all = FALSE) { if (!inherits(x, "DNAbin")) stop('base.freq requires an object of class "DNAbin"') f <- function(x) .Call(BaseProportion, x) if (is.list(x)) { BF <- rowSums(sapply(x, f)) n <- sum(as.double(lengths(x, use.names = FALSE))) } else { n <- length(x) BF <- f(x) } names(BF) <- c("a", "c", "g", "t", "r", "m", "w", "s", "k", "y", "v", "h", "d", "b", "n", "-", "?") if (all) { if (!freq) BF <- BF / n } else { BF <- BF[1:4] if (!freq) BF <- BF / sum(BF) } BF } Ftab <- function(x, y = NULL) { if (is.null(y)) { if (is.list(x)) { y <- x[[2]] x <- x[[1]] if (length(x) != length(y)) stop("'x' and 'y' not of the same length") } else { # 'x' is a matrix y <- x[2, , drop = TRUE] x <- x[1, , drop = TRUE] } } else { x <- as.vector(x) y <- as.vector(y) if (length(x) != length(y)) stop("'x' and 'y' not of the same length") } out <- matrix(0, 4, 4) k <- c(136, 40, 72, 24) for (i in 1:4) { a <- x == k[i] for (j in 1:4) { b <- y == k[j] out[i, j] <- sum(a & b) } } dimnames(out)[1:2] <- list(c("a", "c", "g", "t")) out } GC.content <- function(x) sum(base.freq(x)[2:3]) seg.sites <- function(x, strict = FALSE, trailingGapsAsN = TRUE) { if (is.list(x)) x <- as.matrix(x) ## is.vector() returns FALSE because of the class, ## so we use a different test dx <- dim(x) if (is.null(dx)) return(integer()) if (dx[1] == 1) return(integer()) if (trailingGapsAsN) x <- latag2n(x) ans <- .Call(SegSites, x, strict) which(as.logical(ans)) } dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE, pairwise.deletion = FALSE, base.freq = NULL, as.matrix = FALSE) { MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93", "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV", "INDEL", "INDELBLOCK") imod <- pmatch(toupper(model), MODELS) if (is.na(imod)) stop(paste("'model' must be one of:", paste("\"", MODELS, "\"", sep = "", collapse = " "))) if (imod == 11 && variance) { warning("computing variance not available for model BH87") variance <- FALSE } if (gamma && imod %in% c(1, 5:7, 9:17)) { warning(paste("gamma-correction not available for model", model)) gamma <- FALSE } if (is.list(x)) x <- as.matrix(x) nms <- dimnames(x)[[1]] n <- dim(x)[1] # in case nms is NULL if (imod %in% c(4, 6:8)) { BF <- if (is.null(base.freq)) base.freq(x) else base.freq } else BF <- 0 if (imod %in% 16:17) pairwise.deletion <- TRUE if (!pairwise.deletion) { keep <- .Call(GlobalDeletionDNA, x) x <- x[, as.logical(keep)] } if (!gamma) { alpha <- 0 } else { alpha <- gamma gamma <- 1L } d <- .Call(dist_dna, x, imod, BF, as.integer(pairwise.deletion), as.integer(variance), as.integer(gamma), alpha) if (variance) { var <- d[[2]] d <- d[[1]] } if (imod == 11) { dim(d) <- c(n, n) dimnames(d) <- list(nms, nms) } else { attr(d, "Size") <- n attr(d, "Labels") <- nms attr(d, "Diag") <- attr(d, "Upper") <- FALSE attr(d, "call") <- match.call() attr(d, "method") <- model class(d) <- "dist" if (as.matrix) d <- as.matrix(d) } if (variance) attr(d, "variance") <- var d } alview <- function(x, file = "", uppercase = TRUE, showpos = TRUE) { if (is.list(x)) x <- as.matrix(x) taxa <- formatC(labels(x), width = -1) x <- as.character(x) s <- ncol(x) if (nrow(x) > 1) { for (j in seq_len(s)) { q <- which(x[-1L, j] == x[1L, j]) + 1L x[q, j] <- "." } } x <- apply(x, 1L, paste, collapse = "") if (uppercase) x <- toupper(x) res <- paste(taxa, x) if ((is.logical(showpos) && showpos) || is.numeric(showpos)) { if (is.logical(showpos)) { pos <- 1:s digits <- floor(log10(s)) + 1 } else { pos <- showpos digits <- floor(log10(max(pos))) + 1 } hdr <- sprintf(paste0("%0", digits, "d"), pos) hdr <- unlist(strsplit(hdr, "")) dim(hdr) <- c(digits, length(pos)) hdr <- apply(hdr, 1, paste, collapse = "") hdr <- formatC(hdr, width = nchar(res[1])) cat(hdr, file = file, sep = "\n") } cat(res, file = file, sep = "\n", append = TRUE) } where <- function(x, pattern) { pat <- strsplit(pattern, NULL)[[1]] if (inherits(x, "DNAbin")) { pat <- as.DNAbin(pat) } else { if (inherits(x, "AAbin")) { pat <- as.AAbin(toupper(pat)) } else { stop("'x' should inherit class \"DNAbin\" or \"AAbin\"") } } p <- length(pat) f <- function(x, pat, p) { if (length(x) < p) { warning("sequence shorter than the pattern: returning NULL") return(NULL) } .Call(C_where, x, pat) } if (is.list(x)) return(lapply(x, f, pat = pat, p = p)) if (is.matrix(x)) { n <- nrow(x) res <- vector("list", n) for (i in seq_len(n)) res[[i]] <- f(x[i, , drop = TRUE], pat, p) names(res) <- rownames(x) return(res) } f(x, pat, p) # if x is a vector } ## conversions from BioConductor: ## DNA: .DNAString2DNAbin <- function(from) .Call("charVectorToDNAbinVector", as.character(from)) as.DNAbin.DNAString <- function(x, ...) { res <- list(.DNAString2DNAbin(x)) class(res) <- "DNAbin" res } as.DNAbin.DNAStringSet <- function(x, ...) { res <- lapply(x, .DNAString2DNAbin) class(res) <- "DNAbin" res } as.DNAbin.DNAMultipleAlignment <- function(x, ...) as.matrix(as.DNAbin.DNAStringSet(as(x, "DNAStringSet"))) as.DNAbin.PairwiseAlignmentsSingleSubject <- function(x, ...) as.DNAbin.DNAMultipleAlignment(x) ## AA: .AAString2AAbin <- function(from) charToRaw(as.character(from)) as.AAbin.AAString <- function(x, ...) { res <- list(.AAString2AAbin(x)) class(res) <- "AAbin" res } as.AAbin.AAStringSet <- function(x, ...) { res <- lapply(x, .AAString2AAbin) class(res) <- "AAbin" res } as.AAbin.AAMultipleAlignment <- function(x, ...) as.matrix(as.AAbin.AAStringSet(as(x, "AAStringSet"))) complement <- function(x) { f <- function(x) { ## reorder the vector of raws to match the complement: comp <- as.raw(._bs_[c(4:1, 10:9, 7:8, 6:5, 14:11, 15:17)]) ans <- comp[match(as.integer(x), ._bs_)] rev(ans) # reverse before returning } if (is.matrix(x)) { for (i in 1:nrow(x)) x[i, ] <- f(x[i, ]) return(x) } else if (is.list(x)) { x <- lapply(x, f) } else x <- f(x) class(x) <- "DNAbin" x } trans <- function(x, code = 1, codonstart = 1) { f <- function(x, s, code) .C(trans_DNA2AA, x, as.integer(s), raw(s/3), as.integer(code), NAOK = TRUE)[[3]] if (code > 6) stop("only the genetic codes 1--6 are available for now") if (codonstart > 1) { del <- -(1:(codonstart - 1)) if (is.list(x)) { for (i in seq_along(x)) x[[i]] <- x[[i]][del] } else { x <- if (is.matrix(x)) x[, del] else x[del] } } if (is.list(x)) { res <- lapply(x, trans, code = code) } else { s <- if (is.matrix(x)) ncol(x) else length(x) rest <- s %% 3 if (rest != 0) { s <- s - rest x <- if (is.matrix(x)) x[, 1:s] else x[1:s] msg <- paste("sequence length not a multiple of 3:", rest, "nucleotide") if (rest == 2) msg <- paste0(msg, "s") warning(paste(msg, "dropped")) } if (is.matrix(x)) { res <- t(apply(x, 1, f, s = s, code = code)) if (s == 3) { res <- t(res) rownames(res) <- rownames(x) } } else { res <- f(x, s, code) } } class(res) <- "AAbin" res } print.AAbin <- function(x, ...) { if (is.list(x)) { n <- length(x) cat(n, "amino acid sequence") if (n > 1) cat("s") cat(" in a list\n\n") tmp <- lengths(x, use.names = FALSE) maxi <- max(tmp) mini <- min(tmp) if (mini == maxi) cat("All sequences of the same length:", maxi, "\n") else { cat("Mean sequence length:", round(mean(tmp), 3), "\n Shortest sequence:", mini, "\n Longest sequence:", maxi, "\n") } } else if (is.matrix(x)) { n <- nrow(x) cat(n, "amino acid sequence") if (n > 1) cat("s") cat(" in a matrix\n") if (n == 1) cat("Sequence length: ") else cat("All sequences of the same length: ") cat(ncol(x), "\n") } else { cat("1 amino acid sequence in a vector:\n\n", rawToChar(x)) } cat("\n") } "[.AAbin" <- function (x, i, j, drop = FALSE) { ans <- NextMethod("[", drop = drop) class(ans) <- "AAbin" ans } as.character.AAbin <- function(x, ...) { f <- function(xx) { ans <- strsplit(rawToChar(xx), "")[[1]] if (is.matrix(xx)) { dim(ans) <- dim(xx) dimnames(ans) <- dimnames(xx) } ans } if (is.list(x)) lapply(x, f) else f(x) } as.AAbin <- function(x, ...) UseMethod("as.AAbin") as.AAbin.character <- function(x, ...) { f <- function(x) charToRaw(paste(x, collapse = "")) res <- if (is.vector(x)) f(x) else t(apply(x, 1, f)) class(res) <- "AAbin" res } labels.AAbin <- function(object, ...) labels.DNAbin(object, ...) ## TO BE MOVED TO phangorn LATER if (getRversion() >= "2.15.1") utils::globalVariables("phyDat") as.phyDat.AAbin <- function(x, ...) phyDat(as.character(x), type = "AA") ## \alias{as.phyDat.AAbin} ## \method{as.phyDat}{AAbin}(x, \dots) dist.aa <- function(x, pairwise.deletion = FALSE, scaled = FALSE) { n <- nrow(x) d <- numeric(n*(n - 1)/2) X <- charToRaw("X") k <- 0L if (!pairwise.deletion) { del <- apply(x, 2, function(y) any(y == X)) if (any(del)) x <- x[, !del] for (i in 1:(n - 1)) { for (j in (i + 1):n) { k <- k + 1L d[k] <- sum(x[i, ] != x[j, ]) } } if (scaled) d <- d/ncol(x) } else { for (i in 1:(n - 1)) { a <- x[i, ] for (j in (i + 1):n) { b <- x[j, ] del <- a == X | b == X p <- length(b <- b[!del]) tmp <- sum(a[!del] != b) k <- k + 1L d[k] <- if (scaled) tmp/p else tmp } } } attr(d, "Size") <- n attr(d, "Labels") <- rownames(x) attr(d, "Diag") <- attr(d, "Upper") <- FALSE attr(d, "call") <- match.call() class(d) <- "dist" d } AAsubst <- function(x) { X <- charToRaw("X") f <- function(y) length(unique.default(y[y != X])) which(apply(x, 2, f) > 1) } .AA_3letter <- c("Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", "Trp", "Tyr", "Xaa", "Stp") .AA_1letter <- c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y", "X", "*") .AA_raw <- sapply(.AA_1letter, charToRaw) .AA_3cat <- list(Hydrophobic = .AA_raw[c("V", "I", "L", "F", "W", "Y", "M")], Small = .AA_raw[c("P", "G", "A", "C")], Hydrophilic = .AA_raw[c("S", "T", "H", "N", "Q", "D", "E", "K", "R")]) #.AA_3cat <- list(Hydrophobic = .AA_raw[c("V", "I", "L", "F", "W", "Y", "M")], # Small = .AA_raw[c("P", "G", "A", "C")], # Hydrophilic = .AA_raw[c("S", "T", "H", "N", "Q", "D", "E", "K", "R")]) Ape_NT <- list(properties = list( a="a", g="g", c="c", t="t", n="n", "-"="-"), color=c("red", "yellow", "green", "blue", "grey", "black")) RY_NT <- list(properties = list( Purine = c("a", "g", "r"), Pyrimidine = c("c", "t", "y"), "n" = "n", "-" = "-"), color=c("#FF00FF", "#00FFFF", "grey", "black")) Ape_AA <- list(properties = list( Hydrophobic = c("V", "I", "L", "F", "W", "Y", "M"), Small = c("P", "G", "A", "C"), Hydrophilic = c("S", "T", "H", "N", "Q", "D", "E", "K", "R")), color=c("red", "yellow", "blue")) # Properties + Conservation (Clustal X) Clustal <- list(properties = list( Hydrophobic = c("A", "I", "L", "M", "F", "W", "V"), Positive = c("K", "R"), Negative = c("E", "D"), Polar = c("N", "Q", "S", "T"), Glycines = "G", Prolines = "P", Aromatic = c("H", "Y"), Cysteine = "C"), color= c("#80a0f0", "#f01505", "#c048c0", "#15c015", "#f09048", "#c0c000", "#15a4a4", "#f08080") ) # Polarity geneious Polarity <- list(properties = list( "Non polar" = c("G", "A", "V", "L", "I", "F", "W", "M", "P"), "Polar, uncharged" = c("S", "T", "C", "Y", "N", "Q"), "Polar, acidic" = c("D", "E"), "Polar, basic" = c("K", "R", "H")), color = c("yellow", "green", "red", "blue")) # Physicochemical Properties Zappo_AA <- list(properties = list( Hydrophobic = c("I", "L", "V", "A", "M"), # `Aliphatic/Hydrophobic` Aromatic = c("F", "W", "Y"), Positive = c("K", "R", "H"), Negative = c("E", "D"), Hydrophilic = c("S", "T", "N", "Q"), Conformational = c("P", "G"), # `Conformationally special` Cysteine = "C"), color= c("#ff7979", "#f89f56", "#0070c0", "#c00000", "#08c81a", "#cc00cc", "#ffff00") ) Transmembrane_tendency <- list(properties = list( Lys = "K", Asp = "D", Glu = "E", Arg = "R", Gln = "Q", Asn = "N", Pro = "P", His = "H", Ser = "S", Thr = "T", Cys = "C", Gly = "G", Ala = "A", Tyr = "Y", Met = "M", Val = "V", Trp = "W", Leu = "L", Ile = "I", Phe = "F" ), color=c("#0000FF", "#0D00F1", "#1A00E4", "#2800D6", "#3500C9", "#4300BB", "#5000AE", "#5D00A1", "#6B0093", "#780086", "#860078", "#93006B", "#A1005D", "#AE0050", "#BB0043", "#C90035", "#D60028", "#E4001A", "#F1000D", "#FF0000")) image.worker <- function(x, what, col, bg = "white", xlab = "", ylab = "", show.labels = TRUE, cex.lab = 1, legend = TRUE, grid = FALSE, show.bases = FALSE, base.cex = 1, base.font = 1, base.col = "black", scheme=NULL, bs=NULL, cs=NULL,...) { # what <- # if (missing(what)) c("a", "g", "c", "t", "n", "-") else tolower(what) if (missing(col)) col <- c("red", "yellow", "green", "blue", "grey", "black") x <- as.matrix(x) # tests if all sequences have the same length n <- (dx <- dim(x))[1] # number of sequences s <- dx[2] # number of sites y <- integer(N <- length(x)) ncl <- length(what) col <- rep(col, length.out = ncl) brks <- 0.5:(ncl + 0.5) sm <- 0L for (i in ncl:1) { k <- bs[match(what[[i]], cs)] sel <- which(unclass(x) %in% k) if (L <- length(sel)) { y[sel] <- i sm <- sm + L } #else { # what <- what[-i] # col <- col[-i] # brks <- brks[-i] # } } dim(y) <- dx ## if there's no 0 in y, must drop 'bg' from the cols passed to image: if (sm == N) { leg.co <- co <- col leg.txt <- toupper(names(what)) } else { co <- c(bg, col) leg.txt <- c(toupper(names(what)), "others") leg.co <- c(col, bg) brks <- c(-0.5, brks) } yaxt <- if (show.labels) "n" else "s" image.default(1:s, 1:n, t(y[n:1, , drop = FALSE]), col = co, xlab = xlab, ylab = ylab, yaxt = yaxt, breaks = brks, ...) if (show.labels) mtext(rownames(x), side = 2, line = 0.1, at = n:1, cex = cex.lab, adj = 1, las = 1) if (legend) { psr <- par("usr") xx <- psr[2]/2 yy <- psr[4] * (0.5 + 0.5/par("plt")[4]) n_col <- length(leg.txt) if(n_col > 6) n_col <- ceiling(n_col/2) legend(xx, yy, legend = leg.txt, pch = 22, pt.bg = leg.co, pt.cex = 2, bty = "n", xjust = 0.5, yjust = 0.5, horiz = FALSE, ncol=n_col, xpd = TRUE) } if (grid) { if (is.logical(grid)) grid <- 3L if (grid %in% 2:3) abline(v = seq(1.5, s - 0.5, 1), lwd = 0.33, xpd = FALSE) if (grid %in% c(1, 3)) abline(h = seq(1.5, n - 0.5, 1), lwd = 0.33, xpd = FALSE) } if (show.bases) { x <- toupper(as.character(x)) xx <- rep(1:s, each = n) yy <- rep(n:1, s) text(xx, yy, x, cex = base.cex, font = base.font, col = base.col) } } image.AAbin <- function(x, what, col, bg = "white", xlab = "", ylab = "", show.labels = TRUE, cex.lab = 1, legend = TRUE, grid = FALSE, show.aa = FALSE, aa.cex = 1, aa.font = 1, aa.col = "black", scheme="Ape_AA", ...) { scheme <- match.arg(scheme, c("Ape_AA", "Clustal", "Zappo_AA", "Polarity", "Transmembrane_tendency")) scheme <- get(scheme, environment(image.AAbin)) if (missing(what)){ if(!is.null(scheme)) what <- scheme$properties else what <- Ape_AA$properties } if (missing(col)){ if(!is.null(scheme)) col <- scheme$color else col <- c("red", "yellow", "blue") } image.worker(x, what, col, bg = bg, xlab = xlab, ylab = ylab, show.labels = show.labels, cex.lab = cex.lab, legend = legend, grid = grid, show.bases = show.aa, base.cex = aa.cex, base.font = aa.font, base.col = aa.col, scheme=scheme, bs=.AA_raw, cs=.AA_1letter,...) } image.DNAbin <- function(x, what, col, bg = "white", xlab = "", ylab = "", show.labels = TRUE, cex.lab = 1, legend = TRUE, grid = FALSE, show.bases = FALSE, base.cex = 1, base.font = 1, base.col = "black", scheme="Ape_NT", ...) { scheme <- match.arg(scheme, c("Ape_NT", "RY_NT")) scheme <- get(scheme, environment(image.AAbin)) if (missing(what)){ if(!is.null(scheme)) what <- scheme$properties else what <- Ape_AA$properties } if (missing(col)){ if(!is.null(scheme)) col <- scheme$color else col <- c("red", "yellow", "blue") } image.worker(x, what, col, bg = bg, xlab = xlab, ylab = ylab, show.labels = show.labels, cex.lab = cex.lab, legend = legend, grid = grid, show.bases = show.bases, base.cex = base.cex, base.font = base.font, base.col = base.col, scheme=scheme, bs=as.raw(._bs_), cs=._cs_,...) } checkAlignment <- function(x, check.gaps = TRUE, plot = TRUE, what = 1:4) { cat("\nNumber of sequences:", n <- nrow(x), "\nNumber of sites:", s <- ncol(x), "\n") if (check.gaps) { cat("\n") y <- DNAbin2indel(x) gap.length <- sort(unique.default(y))[-1] if (!length(gap.length)) cat("No gap in alignment.\n") else { rest <- gap.length %% 3 if (any(cond <- rest > 0)) { cat("Some gap lengths are not multiple of 3:", gap.length[cond]) } else cat("All gap lengths are multiple of 3.") tab <- tabulate(y, gap.length[length(gap.length)]) tab <- tab[gap.length] cat("\n\nFrequencies of gap lengths:\n") names(tab) <- gap.length print(tab) ## find gaps on the borders: col1 <- unique(y[, 1]) if (!col1[1]) col1 <- col1[-1] if (length(col1)) cat(" => length of gaps on the left border of the alignment:", unique(col1), "\n") else cat(" => no gap on the left border of the alignment\n") i <- which(y != 0, useNames = FALSE) jcol <- i %/% nrow(y) + 1 yi <- y[i] j <- yi == s - jcol + 1 if (any(j)) cat(" => length of gaps on the right border of the alignment:", yi[j], "\n") else cat(" => no gap on the right border of the alignment\n") ## find base segments: A <- B <- numeric() for (i in seq_len(n)) { j <- which(y[i, ] != 0) # j: start of each gap in the i-th sequence if (!length(j)) next k <- j + y[i, j] # k: start of each base segment in the i-th sequence if (j[1] != 1) k <- c(1, k) else j <- j[-1] if (k[length(k)] > s) k <- k[-length(k)] else j <- c(j, s + 1) A <- c(A, j) B <- c(B, k) } AB <- unique(cbind(A, B)) not.multiple.of.3 <- (AB[, 1] - AB[, 2]) %% 3 != 0 left.border <- AB[, 2] == 1 right.border <- AB[, 1] == s + 1 Nnot.mult3 <- sum(not.multiple.of.3) cat("\nNumber of unique contiguous base segments defined by gaps:", nrow(AB), "\n") if (!Nnot.mult3) cat("All segment lengths multiple of 3.\n") else { Nleft <- sum(not.multiple.of.3 & left.border) Nright <- sum(not.multiple.of.3 & right.border) cat("Number of segment lengths not multiple of 3:", Nnot.mult3, "\n", " => on the left border of the alignement:", Nleft, "\n", " => on the right border :", Nright, "\n") if (Nright + Nleft < Nnot.mult3) { cat(" => positions of these segments inside the alignment: ") sel <- not.multiple.of.3 & !left.border & !right.border cat(paste(AB[sel, 2], AB[sel, 1] - 1, sep = ".."), "\n") } } } } else gap.length <- numeric() ss <- seg.sites(x) cat("\nNumber of segregating sites (including gaps):", length(ss)) BF.col <- matrix(NA_real_, length(ss), 4) for (i in seq_along(ss)) BF.col[i, ] <- base.freq(x[, ss[i]])#, freq = TRUE) tmp <- apply(BF.col, 1, function(x) sum(x > 0)) cat("\nNumber of sites with at least one substitution:", sum(tmp > 1)) cat("\nNumber of sites with 1, 2, 3 or 4 observed bases:\n") tab2 <- tabulate(tmp, 4L) tab2[1] <- s - sum(tab2) names(tab2) <- 1:4 print(tab2) cat("\n") H <- numeric(s) H[ss] <- apply(BF.col, 1, function(x) {x <- x[x > 0]; -sum(x * log(x))}) G <- rep(1, s) G[ss] <- tmp if (plot) { if (length(what) == 4) { mat <- if (length(gap.length)) 1:4 else c(1, 0, 2, 3) layout(matrix(mat, 2, 2)) } else { if (length(what) != 1) { what <- what[1] warning("argument 'what' has length > 1: the first value is taken") } } if (1 %in% what) image(x) if (2 %in% what && length(gap.length)) barplot(tab, xlab = "Gap length") if (3 %in% what) plot(1:s, H, "h", xlab = "Sequence position", ylab = "Shannon index (H)") if (4 %in% what) plot(1:s, G, "h", xlab = "Sequence position", ylab = "Number of observed bases") } } all.equal.DNAbin <- function(target, current, plot = FALSE, ...) { if (identical(target, current)) return(TRUE) name.target <- deparse(substitute(target)) name.current <- deparse(substitute(current)) st1 <- "convert list as matrix for further comparison." # st2 <- "" st3 <- "Subset your data for further comparison." isali1 <- is.matrix(target) isali2 <- is.matrix(current) if (isali1 && !isali2) return(c("1st object is a matrix, 2nd object is a list:", st1)) if (!isali1 && isali2) return(c("1st object is a list, 2nd object is a matrix:", st1)) if (!isali1 && !isali2) return(c("Both objects are lists:", "convert them as matrices for further comparison.")) # n1 <- if (isali1) nrow(target) else length(target) # n2 <- if (isali2) nrow(current) else length(current) if (ncol(target) != ncol(current)) return("Numbers of columns different: comparison stopped here.") foo <- function(n) ifelse(n == 1, "sequence", "sequences") doComparison <- function(target, current) which(target != current, arr.ind = TRUE, useNames = FALSE) n1 <- nrow(target) n2 <- nrow(current) labs1 <- labels(target) labs2 <- labels(current) if (identical(labs1, labs2)) { res <- "Labels in both objects identical." res <- list(messages = res, different.sites = doComparison(target, current)) } else { in12 <- labs1 %in% labs2 in21 <- labs2 %in% labs1 if (n1 != n2) { res <- c("Number of sequences different:", paste(n1, foo(n1), "in 1st object;", n2, foo(n2), "in 2nd object."), st3) plot <- FALSE } else { # n1 == n2 if (any(!in12)) { res <- c("X: 1st object (target), Y: 2nd object (current).", paste("labels in X not in Y:", paste(labs1[!in12], collapse = ", ")), paste("labels in X not in Y:", paste(labs2[!in21], collapse = ", ")), st3) plot <- FALSE } else { res <- c("Labels in both objects identical but not in the same order.", "Comparing sequences after reordering rows of the second matrix.") current <- current[labs1, ] if (identical(target, current)) { res <- c(res, "Sequences are identical.") plot <- FALSE } else { res <- list(messages = res, different.sites = doComparison(target, current)) } } } } if (plot) { cols <- unique(res$different.sites[, 2]) diff.cols <- diff(cols) j <- which(diff.cols != 1) end <- c(cols[j], cols[length(cols)]) start <- c(cols[1], cols[j + 1]) v <- cumsum(end - start + 1) + 0.5 f <- function(lab) { axis(2, at = seq_len(n1), labels = FALSE) axis(1, at = seq_along(cols), labels = cols) mtext(lab, line = 1, adj = 0, font = 2) } layout(matrix(1:2, 2)) par(xpd = TRUE) image(target[, cols], show.labels = FALSE, axes = FALSE, ...) f(name.target) xx <- c(0.5, v) segments(xx, 0.5, xx, n1, lty = 2, col = "white", lwd = 2) segments(xx, 0.5, xx, -1e5, lty = 2, lwd = 2) image(current[, cols], show.labels = FALSE, axes = FALSE, ...) f(name.current) segments(xx, 0.5, xx, n2, lty = 2, col = "white", lwd = 2) segments(xx, 1e5, xx, n2, lty = 2, lwd = 2) #segments(0.5, -5, length(cols) + 0.5, -5, lwd = 5, col = "grey") #rect(0.5, -4, length(cols) + 0.5, -3, col = "grey") #segments(0.5, 0.5, 10, -3) } res } ## From Franz Krah : ## estensions of the AAbin class to complement the DNAbin class funcitons c.AAbin <- function(..., recursive = FALSE) { if (!all(unlist(lapply(list(...), is.list)))) stop("the 'c' method for \"AAbin\" accepts only lists") structure(NextMethod("c"), class = "AAbin") } rbind.AAbin <- function(...) { obj <- list(...) n <- length(obj) if (n == 1) return(obj[[1]]) for (i in 1:n) if (!is.matrix(obj[[1]])) stop("the 'rbind' method for \"AAbin\" accepts only matrices") NC <- unlist(lapply(obj, ncol)) if (length(unique(NC)) > 1) stop("matrices do not have the same number of columns.") for (i in 1:n) class(obj[[i]]) <- NULL # safe but maybe not really needed structure(do.call(rbind, obj), class = "AAbin") } cbind.AAbin <- function(..., check.names = TRUE, fill.with.Xs = FALSE, quiet = FALSE) { obj <- list(...) n <- length(obj) if (n == 1) return(obj[[1]]) for (i in 1:n) if (!is.matrix(obj[[1]])) stop("the 'cbind' method for \"AAbin\" accepts only matrices") NR <- unlist(lapply(obj, nrow)) for (i in 1:n) class(obj[[i]]) <- NULL if (check.names) { NMS <- lapply(obj, rownames) for (i in 1:n) if (anyDuplicated(NMS[[i]])) stop("Duplicated rownames in matrix ", i, ": see ?cbind.AAbin") nms <- unlist(NMS) if (fill.with.Xs) { NC <- unlist(lapply(obj, ncol)) nms <- unique(nms) ans <- matrix(charToRaw("X"), length(nms), sum(NC)) rownames(ans) <- nms from <- 1 for (i in 1:n) { to <- from + NC[i] - 1 k <- match(NMS[[i]], nms) ans[k, from:to] <- obj[[i]] from <- to + 1 } } else { tab <- table(nms) ubi <- tab == n nms <- names(tab)[which(ubi)] ans <- obj[[1]][nms, , drop = FALSE] for (i in 2:n) ans <- cbind(ans, obj[[i]][nms, , drop = FALSE]) if (!quiet && !all(ubi)) warning("some rows were dropped.") } } else { if (length(unique(NR)) > 1) stop("matrices do not have the same number of rows.") ans <- matrix(unlist(obj), NR) rownames(ans) <- rownames(obj[[1]]) } class(ans) <- "AAbin" ans } as.AAbin.list <- function(x, ...) { obj <- lapply(x, as.AAbin) class(obj) <- "AAbin" obj } as.list.AAbin <- function(x, ...) { if (is.list(x)) return(x) if (is.null(dim(x))) obj <- list(x) # cause is.vector() doesn't work else { # matrix n <- nrow(x) obj <- vector("list", n) for (i in seq_len(n)) obj[[i]] <- x[i, , drop = TRUE] names(obj) <- rownames(x) } class(obj) <- "AAbin" obj } as.matrix.AAbin <- function(x, ...) { if (is.matrix(x)) return(x) if (!is.list(x)) { # vector dim(x) <- c(1, length(x)) return(x) } s <- unique(lengths(x, use.names = FALSE)) if (length(s) != 1) stop("AA sequences in list not of the same length.") n <- length(x) y <- matrix(raw(), n, s) for (i in seq_len(n)) y[i, ] <- x[[i]] rownames(y) <- names(x) class(y) <- "AAbin" y } rDNAbin <- function(n, nrow, ncol, base.freq = rep(0.25, 4), prefix = "Ind_") { foo <- function(n, prob) { vec <- as.raw(._bs_[1:4]) vec[sample.int(4L, n, TRUE, prob, FALSE)] } base.freq <- if (all(base.freq == 0.25)) NULL else base.freq[c(1, 3, 2, 4)] if (missing(n)) { if (missing(nrow) && missing(ncol)) stop("nrow and ncol should be given if n is missing") res <- foo(nrow * ncol, base.freq) dim(res) <- c(nrow, ncol) rownames(res) <- paste0(prefix, 1:nrow) } else { res <- lapply(n, foo, prob = base.freq) names(res) <- paste0(prefix, seq_along(n)) } class(res) <- "DNAbin" res } dnds <- function(x, code = 1, codonstart = 1, quiet = FALSE, details = FALSE, return.categories = FALSE) { if (code > 6) stop("only the genetic codes 1--6 are available for now") if (is.list(x)) x <- as.matrix(x) n <- nrow(x) if (nrow(unique.matrix(x)) != n) stop("sequences are not unique") ### if (any(base.freq(x, TRUE, TRUE)[-(1:4)] > 0)) stop("ambiguous bases are not permitted") if (codonstart > 1) { del <- -(1:(codonstart - 1)) x <- x[, del] } p <- ncol(x) rest <- p %% 3 if (rest) { p <- p - rest x <- x[, 1:p] msg <- sprintf("sequence length not a multiple of 3: %d %s dropped", rest, ngettext(rest, "base", "bases")) warning(msg) } degMat <- .buildDegeneracyMatrix(code) Lcat <- matrix(0L, n, p) V1 <- V2 <- V3 <- integer(136) i <- c(136L, 72L, 40L, 24L) V1[i] <- c(1L, 17L, 33L, 49L) V2[i] <- c(0L, 4L, 8L, 12L) V3[i] <- 0:3 class(x) <- NULL z <- as.integer(x) N <- length(x) SHIFT <- c(0L, n, 2L * n) p <- 1L + SHIFT while (p[3] <= N) { for (i in 1:n) { codon <- z[p] ii <- V1[codon[1]] + V2[codon[2]] + V3[codon[3]] if (!is.na(ii)) Lcat[p] <- degMat[ii, ] p <- p + 1L } p <- p[3] + SHIFT } if (return.categories) return(Lcat) if (details) quiet <- TRUE deg <- c(0, 2, 4) # the 3 levels of degeneracy nout <- n*(n - 1)/2 res <- numeric(nout) k <- 1L for (i in 1:(n - 1)) { for (j in (i + 1):n) { if (!quiet) cat("\r", round(100*k/nout), "%") z <- x[c(i, j), ] Lavg <- (Lcat[i, ] + Lcat[j, ])/2 Lavg[Lavg == 1] <- 2 Lavg[Lavg == 3] <- 4 ii <- lapply(deg, function(x) which(x == Lavg)) L <- lengths(ii) S <- lapply(ii, function(id) dist.dna(z[, id, drop = FALSE], "TS")) V <- lapply(ii, function(id) dist.dna(z[, id, drop = FALSE], "TV")) S <- unlist(S, use.names = FALSE) V <- unlist(V, use.names = FALSE) if (details) { cat(sprintf("\nComparing sequences %d and %d:\n", i, j)) tmp <- rbind(S, V) dimnames(tmp) <- list(c("Transitions", "Transversions"), c("Nondegenerate", "Twofold", "Fourfold")) print(tmp) } P <- S/L Q <- V/L a <- 1/(1 - 2*P - Q) b <- 1/(1 - 2*Q) c <- (a - b)/2 A <- log(a)/2 - log(b)/4 B <- log(b)/2 dS <- (L[2]*A[2] + L[3]*A[3])/sum(L[2:3]) + B[3] dN <- A[1] + (L[1]*B[1] + L[2]*B[2])/sum(L[1:2]) res[k] <- dN/dS k <- k + 1L } } if (!quiet) cat("... done\n") attr(res, "Size") <- n attr(res, "Labels") <- rownames(x) attr(res, "Diag") <- attr(res, "Upper") <- FALSE attr(res, "call") <- match.call() attr(res, "method") <- "dNdS (Li 1993)" class(res) <- "dist" res } .buildDegeneracyMatrix <- function(code) { b <- as.raw(._bs_[1:4]) CODONS <- cbind(rep(b, each = 16), rep(rep(b, each = 4), 4), rep(b, 16)) AA <- trans(CODONS, code = code) degeneracyMatrix <- matrix(0L, 64L, 3L) deg <- c(4L, 2L, 2L, 0L) ## 1/ find the bases at 3rd positions that are twofold/fourfold degenerate s <- 1:4 while (s[4L] <= 64) { degeneracyMatrix[s, 3L] <- deg[length(unique(AA[s]))] s <- s + 4L } ## 2/ all bases at 2nd positions are nondegenerate: no need to do anything ## 3/ are some bases at 1st positions twofold degenerate? s <- c(1L, 17L, 33L, 49L) while (s[1L] < 17) { degeneracyMatrix[s, 1L] <- deg[length(unique(AA[s]))] s <- s + 1L } degeneracyMatrix } latag2n <- function(x) { if (is.list(x)) x <- as.matrix(x) dx <- dim(x) clx <- class(x) res <- .Call(leading_trailing_gaps_to_N, x) ## the order of the next two commands is crucial if 'clx' is ## c("matrix", "array") because the dimension must be set before ## setting the class (this is the case for pegas::mjn() where the ## class "DNAbin" is dropped before calling latag2n()) dim(res) <- dx class(res) <- clx res } solveAmbiguousBases <- function(x, method = "columnwise", random = TRUE) { if (method == "columnwise") { if (is.list(x)) x <- as.matrix(x) p <- ncol(x) for (j in 1:p) { BF <- base.freq(x[, j], TRUE, TRUE) ambi <- BF[5:15] K <- which(ambi > 0) if (length(K)) { agct <- BF[c(1, 3, 2, 4)] for (b in K) { base <- as.DNAbin(names(ambi[b])) sel <- agct[rev(rawToBits(base))[1:4] == 1] if (!sum(sel)) sel[] <- 1L i <- which(x[, j] == base) tmp <- if (random) sample(names(sel), length(i), TRUE, sel) else names(sel)[which.max(sel)] x[i, j] <- as.DNAbin(tmp) } } } } x } ape/R/triangMtd.R0000644000176200001440000000244012465112403013272 0ustar liggesusers## treePop.R (2011-10-11) ## Tree Reconstruction With the Triangles Method ## Copyright 2011 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. triangMtd <- function(X) { if (is.matrix(X)) X <- as.dist(X) if (any(is.na(X))) stop("missing values are not allowed in the distance matrix") N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_triangMtd, as.double(X), as.integer(N), integer(2*N - 3), integer(2*N - 3), double(2*N - 3), NAOK = TRUE) obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } triangMtds <- function(X) { if (is.matrix(X)) X <- as.dist(X) X[is.na(X)] <- -1 X[X < 0] <- -1 N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_triangMtds, as.double(X), as.integer(N), integer(2*N - 3), integer(2*N - 3), double(2*N - 3), NAOK = TRUE) obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } ape/R/bind.tree.R0000644000176200001440000001721313745254566013242 0ustar liggesusers## bind.tree.R (2020-10-25) ## Bind Trees ## Copyright 2003-2020 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. `+.phylo` <- function(x, y) { p <- if (is.null(x$root.edge)) 0 else x$root.edge bind.tree(x, y, position = p) } bind.tree <- function(x, y, where = "root", position = 0, interactive = FALSE) { nx <- length(x$tip.label) mx <- x$Nnode ROOTx <- nx + 1L ny <- length(y$tip.label) my <- y$Nnode if (interactive) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) if (lastPP$type != "phylogram" || lastPP$direction != "rightwards") stop("you must plot tree 'x' as a 'rightward phylogram'") cat("Click where you want to graft tree 'y'...\n") xy <- locator(1) d <- abs(xy$y - lastPP$yy) d[lastPP$xx - xy$x < 0] <- Inf where <- which.min(d) position <- lastPP$xx[where] - xy$x if (position < 0) position <- 0 cat("The following parameters are used:\n") cat(" where =", where, " position =", position, "\n") } else { if (where == 0 || where == "root") where <- ROOTx if (position < 0) position <- 0 if (where > nx + mx) stop("argument 'where' out of range for tree 'x'") } ## check whether both trees have branch lengths: switch(is.null(x$edge.length) + is.null(y$edge.length) + 1L, wbl <- TRUE, { x$edge.length <- y$edge.length <- NULL wbl <- FALSE warning("one tree has no branch lengths, they have been ignored") }, wbl <- FALSE) yHasNoRootEdge <- is.null(y$root.edge) xHasNoRootEdge <- is.null(x$root.edge) x <- reorder(x) # fix by Veronika Boskova y <- reorder(y) ## find the row of 'where' before renumbering if (where == ROOTx) case <- 1 else { i <- which(x$edge[, 2] == where) case <- if (where <= nx) 2 else 3 } ## case = 1 -> y is bound on the root of x ## case = 2 -> y is bound on a tip of x ## case = 3 -> y is bound on a node of x ## check that 'position' is correct if (position && wbl) { ### New in ape 3.0-1: this makes possible binding 'y' below ### a node of 'x' thus creating a new node in 'x' ### if (!wbl) ### stop("'position' is non-null but trees have no branch lengths") if (case == 1) { if (xHasNoRootEdge) stop("tree 'x' has no root edge") if (position > x$root.edge) stop("'position' is larger than x's root edge") } else { if (x$edge.length[i] < position) stop("'position' is larger than the branch length") } } ## the special case of substituting two tips: if (case == 2 && ny == 1 && !position) { x$tip.label[x$edge[i, 2]] <- y$tip.label if (wbl) x$edge.length[i] <- x$edge.length[i] + y$edge.length return(x) } ### because in all situations internal nodes need to be ### renumbered, they are changed to negatives first, and ### nodes eventually added will be numbered sequentially nodes <- x$edge > nx x$edge[nodes] <- -(x$edge[nodes] - nx) # -1, ..., -mx nodes <- y$edge > ny y$edge[nodes] <- -(y$edge[nodes] - ny + mx) # -(mx+1), ..., -(mx+my) ROOT <- -1L # may change later next.node <- -(mx + my) - 1L ## renumber now the tips in y: new.nx <- if (where <= nx && !position) nx - 1L else nx y$edge[!nodes] <- y$edge[!nodes] + new.nx ## if 'y' as a root edge, use it: if (!yHasNoRootEdge) { y$edge <- rbind(c(0, y$edge[1]), y$edge) ## ^ will be filled later next.node <- next.node - 1L if (wbl) y$edge.length <- c(y$root.edge, y$edge.length) } switch(case, { # case = 1 if (position) { x$root.edge <- x$root.edge - position x$edge <- rbind(c(next.node, x$edge[1]), x$edge) ROOT <- next.node if (wbl) x$edge.length <- c(position, x$edge.length) } if (yHasNoRootEdge) { j <- which(y$edge[, 1] == y$edge[1]) y$edge[j, 1] <- ROOT } else y$edge[1] <- ROOT x$edge <- rbind(x$edge, y$edge) if (wbl) x$edge.length <- c(x$edge.length, y$edge.length) }, { # case = 2 if (position) { x$edge[i, 2] <- next.node x$edge <- rbind(x$edge[1:i, ], c(next.node, where), x$edge[-(1:i), ]) if (wbl) { x$edge.length[i] <- x$edge.length[i] - position x$edge.length <- c(x$edge.length[1:i], position, x$edge.length[-(1:i)]) } i <- i + 1L if (yHasNoRootEdge) { j <- which(y$edge[, 1] == y$edge[1]) y$edge[j, 1] <- x$edge[i, 1] } else y$edge[1] <- x$edge[i, 1] } else { if (yHasNoRootEdge) x$edge[i, 2] <- y$edge[1] else { ## the root edge of y is fused with the terminal edge of x if (wbl) y$edge.length[1] <- y$edge.length[1] + x$edge.length[i] y$edge[1] <- x$edge[i, 1] ## delete i-th edge in x: x$edge <- x$edge[-i, ] if (wbl) x$edge.length <- x$edge.length[-i] i <- i - 1L } x$tip.label <- x$tip.label[-where] ## renumber the tips that need to: ii <- which(x$edge[, 2] > where & x$edge[, 2] <= nx) x$edge[ii, 2] <- x$edge[ii, 2] - 1L } x$edge <- rbind(x$edge[1:i, ], y$edge, x$edge[-(1:i), ]) if (wbl) x$edge.length <- c(x$edge.length[1:i], y$edge.length, x$edge.length[-(1:i)]) }, { # case = 3 if (position) { if (yHasNoRootEdge) { j <- which(y$edge[, 1] == y$edge[1]) y$edge[j, 1] <- next.node } else y$edge[1] <- next.node x$edge <- rbind(x$edge[1:i, ], c(next.node, x$edge[i, 2]), x$edge[-(1:i), ]) x$edge[i, 2] <- next.node if (wbl) { x$edge.length[i] <- x$edge.length[i] - position x$edge.length <- c(x$edge.length[1:i], position, x$edge.length[-(1:i)]) } i <- i + 1L } else { if (yHasNoRootEdge) { j <- which(y$edge[, 1] == y$edge[1]) y$edge[j, 1] <- x$edge[i, 2] } else y$edge[1] <- x$edge[i, 2] } x$edge <- rbind(x$edge[1:i, ], y$edge, x$edge[-(1:i), ]) if (wbl) x$edge.length <- c(x$edge.length[1:i], y$edge.length, x$edge.length[-(1:i)]) }) x$tip.label <- c(x$tip.label, y$tip.label) if (is.null(x$node.label)) { if (!is.null(y$node.label)) x$node.label <- c(rep(NA, mx), y$node.label) } else { x$node.label <- if (is.null(y$node.label)) c(x$node.label, rep(NA, my)) else c(x$node.label, y$node.label) } n <- length(x$tip.label) x$Nnode <- dim(x$edge)[1] + 1L - n ## update the node labels before renumbering (this adds NA for ## the added nodes, and drops the label for those deleted) if (!is.null(x$node.label)) x$node.label <- x$node.label[sort(-unique(x$edge[, 1]))] ## renumber nodes: newNb <- integer(x$Nnode) newNb[-ROOT] <- n + 1L sndcol <- x$edge[, 2] < 0 ## executed from right to left, so newNb is modified before x$edge: x$edge[sndcol, 2] <- newNb[-x$edge[sndcol, 2]] <- n + 2:x$Nnode x$edge[, 1] <- newNb[-x$edge[, 1]] if (!is.null(x$node.label)) x$node.label <- x$node.label[order(newNb[newNb > 0])] attr(x, "order") <- NULL reorder(x) } ape/R/as.matching.R0000644000176200001440000000377112465112403013545 0ustar liggesusers## as.matching.R (2011-02-26) ## Conversion Between Phylo and Matching Objects ## Copyright 2005-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. as.matching <- function(x, ...) UseMethod("as.matching") as.matching.phylo <- function(x, labels = TRUE, ...) { nb.tip <- length(x$tip.label) nb.node <- x$Nnode if (nb.tip != nb.node + 1) stop("the tree must be dichotomous AND rooted.") x <- reorder(x, "pruningwise") # cannot use "postorder" here! mat <- matrix(x$edge[, 2], ncol = 2, byrow = TRUE) nodes <- x$edge[seq(by = 2, length.out = nb.node), 1] ## we can use match() becoz each node appears once in `mat' O <- match(mat, nodes) new.nodes <- 1:nb.node + nb.tip sel <- !is.na(O) mat[sel] <- new.nodes[O[sel]] mat <- t(apply(mat, 1, sort)) obj <- list(matching = mat) if (!is.null(x$edge.length)) warning("branch lengths have been ignored") if (labels) { obj$tip.label <- x$tip.label if (!is.null(x$node.label)) obj$node.label <- x$node.label[match(new.nodes, nodes)] } class(obj) <- "matching" obj } as.phylo.matching <- function(x, ...) { nb.node <- dim(x$matching)[1] nb.tip <- nb.node + 1 N <- 2 * nb.node edge <- matrix(NA, N, 2) new.nodes <- numeric(N + 1) new.nodes[N + 1] <- nb.tip + 1 nextnode <- nb.tip + 2 j <- 1 for (i in nb.node:1) { edge[j:(j + 1), 1] <- new.nodes[i + nb.tip] for (k in 1:2) { if (x$matching[i, k] > nb.tip) { edge[j + k - 1, 2] <- new.nodes[x$matching[i, k]] <- nextnode nextnode <- nextnode + 1 } else edge[j + k - 1, 2] <- x$matching[i, k] } j <- j + 2 } obj <- list(edge = edge) if (!is.null(x$tip.label)) obj$tip.label <- x$tip.label else obj$tip.label <- as.character(1:nb.tip) obj$Nnode <- nb.node class(obj) <- "phylo" read.tree(text = write.tree(obj)) } ape/R/apetools.R0000644000176200001440000001421713310223646013176 0ustar liggesusers## apetools.R (2018-06-13) ## APE Tools ## Copyright 2017-2018 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .file.extensions <- list(clustal = "aln", fasta = c("fas", "fasta", "fa"), fastq = c("fq", "fastq"), newick = c("nwk", "newick", "tre", "tree"), nexus = c("nex", "nexus"), phylip = "phy") Xplorefiles <- function(from = "HOME", recursive = TRUE, ignore.case = TRUE) { if (from == "HOME") from <- Sys.getenv("HOME") FILES <- list.files(path = from, recursive = recursive, full.names = TRUE) ext <- if (exists(".file.extensions", envir = .PlotPhyloEnv)) get(".file.extensions", envir = .PlotPhyloEnv) else .file.extensions res <- vector("list", length(ext)) names(res) <- names(ext) for (i in seq_along(res)) { e <- paste0("\\.", ext[[i]], "$") if (length(e) > 1) e <- paste(e, collapse = "|") x <- grep(e, FILES, ignore.case = ignore.case, value = TRUE) res[[i]] <- data.frame(File = x, Size = file.size(x), stringsAsFactors = FALSE) } res } editFileExtensions <- function() { foo <- function(x) { n <- length(x) if (n < m) x[(n + 1):m] <- NA x } res <- if (exists(".file.extensions", envir = .PlotPhyloEnv)) get(".file.extensions", envir = .PlotPhyloEnv) else .file.extensions m <- max(lengths(res, FALSE)) res <- lapply(res, foo) res <- as.data.frame(res, stringsAsFactors = FALSE) res <- edit(res) res <- lapply(res, function(x) x[!is.na(x)]) assign(".file.extensions", res, envir = .PlotPhyloEnv) } bydir <- function(x) { nofile <- which(sapply(x, nrow) == 0) if (length(nofile)) x <- x[-nofile] if (!length(x)) { cat("No file\n") return(invisible(NULL)) } for (i in seq_along(x)) x[[i]]$Type <- names(x)[i] x <- do.call(rbind, x) x <- x[order(x$File), ] SPLIT <- strsplit(x$File, "/") LL <- lengths(SPLIT) foo <- function(i, PATH) { K <- grep(paste0("^", PATH, "/"), x$File) sel <- intersect(K, which(LL == i + 1L)) if (length(sel)) { y <- x[sel, ] y$File <- gsub(".*/", "", y$File) cat("\n", PATH, "/\n", sep = "") print(y, row.names = FALSE) } if (length(sel) < length(K)) { d <- setdiff(K, sel) subdir <- unlist(lapply(SPLIT[d], "[", i + 1L)) for (z in unique(subdir)) foo(i + 1L, paste(PATH, z, sep = "/")) } } top <- unlist(lapply(SPLIT, "[", 1L)) for (z in unique(top)) foo(1L, z) } Xplor <- function(from = "HOME") { ext <- if (exists(".file.extensions", envir = .PlotPhyloEnv)) get(".file.extensions", envir = .PlotPhyloEnv) else .file.extensions OUT <- paste0(tempfile(), ".html") mycat <- function(...) cat(..., sep = "", file = OUT, append = TRUE) FILES <- Xplorefiles(from = from) filetypes <- names(FILES) ## nb of files of each type: NR <- sapply(FILES, nrow) ## HTML header mycat('Files Sorted by Type') ## build the TOC mycat('

File types searched:

') mycat('') mycat('') for (type in filetypes) { mycat('') } mycat('
Type Number of files Extensions*
', type, '', NR[type], '', paste(paste0(".", ext[[type]]), collapse = " "), '
') mycat('
*Case-independent
To change the files extensions, type in R: editFileExtensions()
') if (all(NR == 0)) { browseURL(OUT) return(invisible(NULL)) } OUTBYDIR <- paste0(tempfile(), ".html") sink(OUTBYDIR) cat('Files Sorted by Directory') .bydir.html(FILES) cat('') sink(NULL) mycat('

Files sorted by directory (in new tab)


') for (type in filetypes) { nr <- NR[type] mycat('

', toupper(type), '

') if (nr == 0) { mycat('no file of this type') next } DF <- FILES[[type]] mycat('') mycat('') for (i in 1:nr) mycat('') mycat('
File name Size (KB)
', DF[i, 1], '', round(DF[i, 2]/1000, 1), '
') } mycat('') browseURL(OUT) } .bydir.html <- function(x) { nofile <- which(sapply(x, nrow) == 0) if (length(nofile)) x <- x[-nofile] if (!length(x)) return(NULL) for (i in seq_along(x)) x[[i]]$Type <- names(x)[i] x <- do.call(rbind, x) x <- x[order(x$File), ] SPLIT <- strsplit(x$File, "/") LL <- lengths(SPLIT) foo <- function(i, PATH) { K <- grep(paste0("^", PATH, "/"), x$File) sel <- intersect(K, which(LL == i + 1L)) if (length(sel)) { y <- x[sel, ] y$File <- gsub(".*/", "", y$File) cat('

', PATH, '/

', sep = "") cat('') cat('') for (i in 1:nrow(y)) cat('', sep = "") cat('
File Size (KB) Type
', y[i, 1], '', round(y[i, 2]/1000, 1), '', y[i, 3], '

') } if (length(sel) < length(K)) { d <- setdiff(K, sel) subdir <- unlist(lapply(SPLIT[d], "[", i + 1L)) for (z in unique(subdir)) foo(i + 1L, paste(PATH, z, sep = "/")) } } top <- unlist(lapply(SPLIT, "[", 1L)) for (z in unique(top)) foo(1L, z) } ape/R/alex.R0000644000176200001440000000234313075424101012273 0ustar liggesusers## alex.R (2017-04-18) ## Alignment Explorer With Multiple Devices ## Copyright 2012-2017 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. alex <- function(x, ...) { n <- nrow(x) s <- ncol(x) devmain <- dev.cur() on.exit(dev.set(devmain)) NEW <- TRUE cat("Click on two opposite corners of the zone you want to zoom-in. Right-click to exit.\n") repeat { xy <- locator(2) if (is.null(xy)) break xy$y <- n - xy$y + 1 xy <- lapply(xy, function(x) sort(round(x))) i1 <- xy$y[1L]; i2 <- xy$y[2L] j1 <- xy$x[1L]; j2 <- xy$x[2L] if (i1 > n || j1 > s) cat("Try again!\n") else { if (i1 <= 0) i1 <- 1L if (j1 <= 0) j1 <- 1L if (i2 > n) i2 <- n if (j2 > s) j2 <- s if (NEW) { dev.new() devsub <- dev.cur() NEW <- FALSE } else dev.set(devsub) image(x[i1:i2, j1:j2], xaxt = "n", ...) atx <- axTicks(1) axis(1, atx, labels = (j1:j2)[atx]) title(sub = paste("From", sQuote(deparse(substitute(x))))) dev.set(devmain) } } } ape/R/rotate.R0000644000176200001440000001117212465112403012641 0ustar liggesusers## rotate.R (2014-06-05) ## Ancestral Character Estimation ## Copyright 2007 Christoph Heibl ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. rotate <- function(phy, node, polytom = c(1,2)){ # load DESCENDANTS function DESCENDANTS <- function(tree, node){ tips <- length(tree$tip.label) x <- tree$edge[,2][tree$edge[,1] == node] while(max(x) > tips){ x <- x[x > tips] for(h in 1:length(x)) tree$edge <- tree$edge[!tree$edge[,2] == x[h],] for(i in 1:length(x)) tree$edge[,1][tree$edge[,1] == x[i]] <- node x <- tree$edge[,2][tree$edge[,1] == node] } x } if (!inherits(phy, "phylo")) # is phy of class phylo? stop("object \"phy\" is not of class \"phylo\"") phy <- reorder(phy) # added by EP 2014-06-05 nb.tips <- length(phy$tip.label) # number of tiplabels max.int.node <- phy$Nnode+nb.tips # number of last internal node nb.edges <- dim(phy$edge)[1] # number of branches if (length(node) == 2){ # get MRCA if tips are given for node if (mode(node) == "character"){ if (any(!node %in% phy$tip.label)) # do tiplabels correspond stop("object \"node\" contains tiplabels not present in object \"phy\"") tips <- cbind(phy$tip.label, 1:nb.tips) node[1] <- tips[,2][tips[,1] == node[1]] node[2] <- tips[,2][tips[,1] == node[2]] node <- as.numeric(node) } if (any(!node %in% 1:nb.tips)) stop("object \"node\" does not contain terminal nodes") node <- getMRCA(phy, node) } if (node <= nb.tips || node > max.int.node) # is node really internal? stop("object \"node\" is not an internal node of object \"phy\"") with.br.length <- !is.null(phy$edge.length) # does phy contain brlength? G <- cbind(phy$edge, 1:(length(phy$edge)/2)) N <- phy$edge[phy$edge[,1] == node] N <- N[N != node] if (length(N) > 2) N <- N[polytom] CLADE1 <- N[1] CLADE2 <- N[2] # do clades comprise interior nodes? if (CLADE1 > nb.tips) CLADE11 <- DESCENDANTS(phy, CLADE1) if (CLADE2 > nb.tips) CLADE22 <- DESCENDANTS(phy, CLADE2) # calculate inidices of clades in phy.edge if (CLADE1 > nb.tips){ c1 <- G[,3][G[,2] == CLADE1] c2 <- G[,3][G[,2] == max(CLADE11)] } else { c1 <- G[,3][G[,2] == CLADE1] c2 <- G[,3][G[,2] == CLADE1] } if (CLADE2 > nb.tips){ c3 <- G[,3][G[,2] == CLADE2] c4 <- G[,3][G[,2] == max(CLADE22)] } else { c3 <- G[,3][G[,2] == CLADE2] c4 <- G[,3][G[,2] == CLADE2] } # create new phy$edge and phy$edge.length if (c2+1 == c3){ if (c1 == 1 && c4 != nb.edges){ phy$edge <- rbind(phy$edge[c3:c4,], phy$edge[c1:c2,], phy$edge[(c4+1):nb.edges,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[c3:c4], phy$edge.length[c1:c2], phy$edge.length[(c4+1):nb.edges]) } if (c1 !=1 && c4 == nb.edges){ phy$edge <- rbind(phy$edge[1:(c1-1),], phy$edge[c3:c4,], phy$edge[c1:c2,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[1:(c1-1)], phy$edge.length[c3:c4], phy$edge.length[c1:c2]) } if (c1 !=1 && c4 != nb.edges){ phy$edge <- rbind(phy$edge[1:(c1-1),], phy$edge[c3:c4,], phy$edge[c1:c2,], phy$edge[(c4+1):nb.edges,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[1:(c1-1)], phy$edge.length[c3:c4], phy$edge.length[c1:c2], phy$edge.length[(c4+1):nb.edges]) } if (c1 ==1 && c4 == nb.edges){ phy$edge <- rbind(phy$edge[c3:c4,], phy$edge[c1:c2,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[c3:c4], phy$edge.length[c1:c2]) } } else { if (c1 == 1 && c4 != nb.edges){ phy$edge <- rbind(phy$edge[c3:c4,], phy$edge[(c2+1):(c3-1),], phy$edge[c1:c2,], phy$edge[(c4+1):nb.edges,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[c3:c4], phy$edge.length[(c2+1):(c3-1)], phy$edge.length[c1:c2], phy$edge.length[(c4+1):nb.edges]) } if (c1 !=1 && c4 == nb.edges){ phy$edge <- rbind(phy$edge[1:(c1-1),], phy$edge[c3:c4,], phy$edge[(c2+1):(c3-1),], phy$edge[c1:c2,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[1:(c1-1)], phy$edge.length[c3:c4], phy$edge.length[(c2+1):(c3-1)], phy$edge.length[c1:c2]) } if (c1 !=1 && c4 != nb.edges){ phy$edge <- rbind(phy$edge[1:(c1-1),], phy$edge[c3:c4,], phy$edge[(c2+1):(c3-1),], phy$edge[c1:c2,], phy$edge[(c4+1):nb.edges,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[1:(c1-1)], phy$edge.length[c3:c4], phy$edge.length[(c2+1):(c3-1)], phy$edge.length[c1:c2], phy$edge.length[(c4+1):nb.edges]) } if (c1 ==1 && c4 == nb.edges){ phy$edge <- rbind(phy$edge[c3:c4,], phy$edge[(c2+1):(c3-1),], phy$edge[c1:c2,]) if (with.br.length) phy$edge.length <- c(phy$edge.length[c3:c4], phy$edge.length[(c2+1):(c3-1)], phy$edge.length[c1:c2]) } } phy } ape/R/vcv.phylo.R0000644000176200001440000000407214157270341013302 0ustar liggesusers## vcv.phylo.R (2012-02-21) ## Phylogenetic Variance-Covariance or Correlation Matrix ## Copyright 2002-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. vcv <- function(phy, ...) UseMethod("vcv") vcv.phylo <- function(phy, model = "Brownian", corr = FALSE, ...) { if (is.null(phy$edge.length)) stop("the tree has no branch lengths") pp <- prop.part(phy) phy <- reorder(phy, "postorder") n <- length(phy$tip.label) e1 <- phy$edge[, 1] e2 <- phy$edge[, 2] EL <- phy$edge.length ## xx: vecteur donnant la distance d'un noeud ## ou d'un tip a partir de la racine ## (same than in is.ultrametric) xx <- numeric(n + phy$Nnode) vcv <- matrix(0, n, n) ## the loop below starts from the bottom of the edge matrix, so ## from the root for (i in length(e1):1) { var.cur.node <- xx[e1[i]] xx[e2[i]] <- var.cur.node + EL[i] # sets the variance j <- i - 1L while (e1[j] == e1[i] && j > 0) { left <- if (e2[j] > n) pp[[e2[j] - n]] else e2[j] right <- if (e2[i] > n) pp[[e2[i] - n]] else e2[i] vcv[left, right] <- vcv[right, left] <- var.cur.node # sets the covariance j <- j - 1L } } diag.elts <- 1 + 0:(n - 1)*(n + 1) vcv[diag.elts] <- xx[1:n] if (corr) { ## This is inspired from the code of cov2cor (2005-09-08): Is <- sqrt(1 / vcv[diag.elts]) ## below 'vcv[] <- ...' has been changed to 'vcv <- ...' ## which seems to be twice faster for n = 1000 and ## respects the additional attributes (2012-02-21): vcv <- Is * vcv * rep(Is, each = n) vcv[diag.elts] <- 1 } dimnames(vcv)[1:2] <- list(phy$tip.label) vcv } vcv.corPhyl <- function(phy, corr = FALSE, ...) { labels <- attr(phy, "tree")$tip.label dummy.df <- data.frame(seq_along(labels), row.names = labels) res <- corMatrix(Initialize.corPhyl(phy, dummy.df), corr = corr) dimnames(res) <- list(labels, labels) res } ape/R/corphylo.R0000644000176200001440000002440514533611317013213 0ustar liggesusers## corphylo.R (2021-04-24) ## Ancestral Character Estimation ## Copyright 2015 Anthony R. Ives ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. corphylo <- function(X, U = list(), SeM = NULL, phy = NULL, REML = TRUE, method = c("Nelder-Mead", "SANN"), constrain.d = FALSE, reltol = 10^-6, maxit.NM = 1000, maxit.SA = 1000, temp.SA = 1, tmax.SA = 1, verbose = FALSE) { # Begin corphylo.LL corphylo.LL <- function(par, XX, UU, MM, tau, Vphy, REML, constrain.d, verbose) { n <- nrow(X) p <- ncol(X) L.elements <- par[1:(p + p * (p - 1)/2)] L <- matrix(0, nrow = p, ncol = p) L[lower.tri(L, diag = T)] <- L.elements R <- t(L) %*% L if (constrain.d == TRUE) { logit.d <- par[(p + p * (p - 1)/2 + 1):length(par)] if (max(abs(logit.d)) > 10) return(10^10) d <- 1/(1 + exp(-logit.d)) } else { d <- par[(p + p * (p - 1)/2 + 1):length(par)] if (max(d) > 10) return(10^10) } # OU transform C <- matrix(0, nrow = p * n, ncol = p * n) for (i in 1:p) for (j in 1:p) { Cd <- (d[i]^tau * (d[j]^t(tau)) * (1 - (d[i] * d[j])^Vphy))/(1 - d[i] * d[j]) C[(n * (i - 1) + 1):(i * n), (n * (j - 1) + 1):(j * n)] <- R[i, j] * Cd } V <- C + diag(as.numeric(MM)) if (anyNA(V)) return(10^10) if (is.nan(rcond(V)) || rcond(V) < 10^-10) return(10^10) iV <- solve(V) denom <- t(UU) %*% iV %*% UU if (anyNA(denom)) return(10^10) if (is.nan(rcond(denom)) || rcond(denom) < 10^-10) return(10^10) num <- t(UU) %*% iV %*% XX B <- solve(denom, num) B <- as.matrix(B) H <- XX - UU %*% B logdetV <- -determinant(iV)$modulus[1] if (is.infinite(logdetV)) return(10^10) if (REML == TRUE) { # REML likelihood function LL <- 0.5 * (logdetV + determinant(t(UU) %*% iV %*% UU)$modulus[1] + t(H) %*% iV %*% H) } else { # ML likelihood function LL <- 0.5 * (logdetV + t(H) %*% iV %*% H) } if (verbose == T) show(c(as.numeric(LL), par)) return(as.numeric(LL)) } # End corphylo.LL # Main program if (!inherits(phy, "phylo")) stop("Object \"phy\" is not of class \"phylo\".") if (is.null(phy$edge.length)) stop("The tree has no branch lengths.") if (is.null(phy$tip.label)) stop("The tree has no tip labels.") phy <- reorder(phy, "postorder") n <- length(phy$tip.label) # Input X if (dim(X)[1] != n) stop("Number of rows of the data matrix does not match the length of the tree.") if (is.null(rownames(X))) { warning("No tip labels on X; order assumed to be the same as in the tree.\n") data.names = phy$tip.label } else data.names = rownames(X) order <- match(data.names, phy$tip.label) if (sum(is.na(order)) > 0) { warning("Data names do not match with the tip labels.\n") rownames(X) <- data.names } else { temp <- X rownames(X) <- phy$tip.label X[order, ] <- temp[1:nrow(temp), ] } p <- dim(X)[2] # Input SeM if (!is.null(SeM)) { if (dim(SeM)[1] != n) stop("Number of rows of the SeM matrix does not match the length of the tree.") if (is.null(rownames(SeM))) { warning("No tip labels on SeM; order assumed to be the same as in the tree.\n") data.names = phy$tip.label } else data.names = rownames(SeM) order <- match(data.names, phy$tip.label) if (sum(is.na(order)) > 0) { warning("SeM names do not match with the tip labels.\n") rownames(SeM) <- data.names } else { temp <- SeM rownames(SeM) <- phy$tip.label SeM[order, ] <- temp[1:nrow(temp), ] } } else { SeM <- matrix(0, nrow = n, ncol = p) } # Input U if (length(U) > 0) { if (length(U) != p) stop("Number of elements of list U does not match the number of columns in X.") for (i in 1:p) { if (!is.null(U[[i]])){ if (dim(U[[i]])[1] != n) stop("Number of rows of an element of U does not match the tree.") if (is.null(rownames(U[[i]]))) { warning("No tip labels on U; order assumed to be the same as in the tree.\n") data.names = phy$tip.label } else data.names = rownames(U[[i]]) order <- match(data.names, phy$tip.label) if (sum(is.na(order)) > 0) { warning("U names do not match with the tip labels.\n") rownames(U[[i]]) <- data.names } else { temp <- U[[i]] rownames(U[[i]]) <- phy$tip.label U[[i]][order, ] <- temp[1:nrow(temp), ] } } else { U[[i]] <- matrix(0, nrow=n, ncol=1) rownames(U[[i]]) <- phy$tip.label } } } # Standardize all variables Xs <- X for (i in 1:p) Xs[, i] <- (X[, i] - mean(X[, i]))/sd(X[, i]) if (!is.null(SeM)) { SeMs <- SeM for (i in 1:p) SeMs[, i] <- SeM[, i]/sd(X[, i]) } if (length(U) > 0) { Us <- U for (i in 1:p) for (j in 1:ncol(U[[i]])) { if (sd(U[[i]][, j]) > 0) { Us[[i]][, j] <- (U[[i]][, j] - mean(U[[i]][, j]))/sd(U[[i]][, j]) } else { Us[[i]][, j] <- U[[i]][, j] - mean(U[[i]][, j]) } } } # Set up matrices Vphy <- vcv(phy) Vphy <- Vphy/max(Vphy) Vphy <- Vphy/exp(determinant(Vphy)$modulus[1]/n) XX <- matrix(as.matrix(Xs), ncol = 1) MM <- matrix(as.matrix(SeMs^2), ncol = 1) UU <- kronecker(diag(p), matrix(1, nrow = n, ncol = 1)) if (length(U) > 0) { zeros <- 0 * (1:p) for (i in 1:p) { dd <- zeros dd[i] <- 1 u <- kronecker(dd, as.matrix(Us[[i]])) for (j in 1:dim(u)[2]) if (sd(u[, j]) > 0) UU <- cbind(UU, u[, j]) } } # Compute initial estimates assuming no phylogeny if not provided if (length(U) > 0) { eps <- matrix(nrow = n, ncol = p) for (i in 1:p) { if (ncol(U[[i]]) > 0) { u <- as.matrix(Us[[i]]) z <- lm(Xs[, i] ~ u) eps[, i] <- resid(z) } else { eps[, i] <- Xs[, i] - mean(Xs[, i]) } } L <- t(chol(cov(eps))) } else { L <- t(chol(cov(Xs))) } L.elements <- L[lower.tri(L, diag = T)] par <- c(L.elements, array(0.5, dim = c(1, p))) tau <- matrix(1, nrow = n, ncol = 1) %*% diag(Vphy) - Vphy if (method == "Nelder-Mead") opt <- optim(fn = corphylo.LL, par = par, XX = XX, UU = UU, MM = MM, tau = tau, Vphy = Vphy, REML = REML, verbose = verbose, constrain.d = constrain.d, method = "Nelder-Mead", control = list(maxit = maxit.NM, reltol = reltol)) if (method == "SANN") { opt <- optim(fn = corphylo.LL, par = par, XX = XX, UU = UU, MM = MM, tau = tau, Vphy = Vphy, REML = REML, verbose = verbose, constrain.d = constrain.d, method = "SANN", control = list(maxit = maxit.SA, temp = temp.SA, tmax = tmax.SA, reltol = reltol)) par <- opt$par opt <- optim(fn = corphylo.LL, par = par, XX = XX, UU = UU, MM = MM, tau = tau, Vphy = Vphy, REML = REML, verbose = verbose, constrain.d = constrain.d, method = "Nelder-Mead", control = list(maxit = maxit.NM, reltol = reltol)) } # Extract parameters par <- Re(opt$par) LL <- opt$value L.elements <- par[1:(p + p * (p - 1)/2)] L <- matrix(0, nrow = p, ncol = p) L[lower.tri(L, diag = T)] <- L.elements R <- t(L) %*% L Rd <- diag(diag(R)^-0.5) cor.matrix <- Rd %*% R %*% Rd if (constrain.d == TRUE) { logit.d <- par[(p + p * (p - 1)/2 + 1):length(par)] d <- 1/(1 + exp(-logit.d)) } else { d <- par[(p + p * (p - 1)/2 + 1):length(par)] } # OU transform C <- matrix(0, nrow = p * n, ncol = p * n) for (i in 1:p) for (j in 1:p) { Cd <- (d[i]^tau * (d[j]^t(tau)) * (1 - (d[i] * d[j])^Vphy))/(1 - d[i] * d[j]) C[(n * (i - 1) + 1):(i * n), (n * (j - 1) + 1):(j * n)] <- R[i, j] * Cd } V <- C + diag(MM) iV <- solve(V) denom <- t(UU) %*% iV %*% UU num <- t(UU) %*% iV %*% XX B <- solve(denom, num) B <- as.matrix(B) B.cov <- solve(t(UU) %*% iV %*% UU) H <- XX - UU %*% B # Back-transform B counter <- 0 sd.list <- matrix(0, nrow = dim(UU)[2], ncol = 1) for (i in 1:p) { counter <- counter + 1 B[counter] <- B[counter] + mean(X[, i]) sd.list[counter] <- sd(X[, i]) if (length(U) > 0) { for (j in 1:ncol(U[[i]])) { if (sd(U[[i]][, j]) > 0) { counter <- counter + 1 B[counter] <- B[counter] * sd(X[, i])/sd(U[[i]][, j]) sd.list[counter] <- sd(X[, i])/sd(U[[i]][, j]) } } } } B.cov <- diag(as.numeric(sd.list)) %*% B.cov %*% diag(as.numeric(sd.list)) B.se <- as.matrix(diag(B.cov))^0.5 B.zscore <- B/B.se B.pvalue <- 2 * pnorm(abs(B/B.se), lower.tail = FALSE) # RowNames for B if (length(U) > 0) { B.rownames <- NULL for (i in 1:p) { B.rownames <- c(B.rownames, paste("B", i, ".0", sep = "")) if (ncol(U[[i]]) > 0) for (j in 1:ncol(U[[i]])) if (sd(U[[i]][, j]) > 0) { if (is.null(colnames(U[[i]])[j])) B.rownames <- c(B.rownames, paste("B", i, ".", j, sep = "")) if (!is.null(colnames(U[[i]])[j])) B.rownames <- c(B.rownames, paste("B", i, ".", colnames(U[[i]])[j], sep = "")) } } } else { B.rownames <- NULL for (i in 1:p) { B.rownames <- c(B.rownames, paste("B", i, ".0", sep = "")) } } rownames(B) <- B.rownames rownames(B.cov) <- B.rownames colnames(B.cov) <- B.rownames rownames(B.se) <- B.rownames rownames(B.zscore) <- B.rownames rownames(B.pvalue) <- B.rownames if (REML == TRUE) { logLik <- -0.5 * ((n * p) - ncol(UU)) * log(2 * pi) + 0.5 * determinant(t(XX) %*% XX)$modulus[1] - LL } else { logLik <- -0.5 * (n * p) * log(2 * pi) - LL } k <- length(par) + ncol(UU) AIC <- -2 * logLik + 2 * k BIC <- -2 * logLik + k * (log(n) - log(pi)) results <- list(cor.matrix = cor.matrix, d = d, B = B, B.se = B.se, B.cov = B.cov, B.zscore = B.zscore, B.pvalue = B.pvalue, logLik = logLik, AIC = AIC, BIC = BIC, REML = REML, constrain.d = constrain.d, XX = XX, UU = UU, MM = MM, Vphy = Vphy, R = R, V = V, C = C, convcode = opt$convergence, niter = opt$counts) class(results) <- "corphylo" return(results) } # Printing corphylo objects print.corphylo <- function(x, digits = max(3, getOption("digits") - 3), ...) { cat("Call to corphylo\n\n") logLik = x$logLik AIC = x$AIC BIC = x$BIC names(logLik) = "logLik" names(AIC) = "AIC" names(BIC) = "BIC" print(c(logLik, AIC, BIC), digits = digits) cat("\ncorrelation matrix:\n") rownames(x$cor.matrix) <- 1:dim(x$cor.matrix)[1] colnames(x$cor.matrix) <- 1:dim(x$cor.matrix)[1] print(x$cor.matrix, digits = digits) cat("\nfrom OU process:\n") d <- data.frame(d = x$d) print(d, digits = digits) if (x$constrain.d == TRUE) cat("\nvalues of d constrained to be in [0, 1]\n") cat("\ncoefficients:\n") coef <- data.frame(Value = x$B, Std.Error = x$B.se, Zscore = x$B.zscore, Pvalue = x$B.pvalue) rownames(coef) <- rownames(x$B) printCoefmat(coef, P.values = TRUE, has.Pvalue = TRUE) cat("\n") if (x$convcode != 0) cat("\nWarning: convergence in optim() not reached\n") } ape/R/print.lmorigin.R0000644000176200001440000000440514033475355014332 0ustar liggesusers'print.lmorigin' <- function(x, ...) { if(x$origin) { cat("\nRegression through the origin",'\n') } else { cat("\nMultiple regression with estimation of intercept",'\n') } cat("\nCall:\n") cat(deparse(x$call),'\n') if(x$origin) { names <- x$var.names[-1] } else { names <- c("(Intercept)",x$var.names[-1]) } cat("\nCoefficients and parametric test results \n",'\n') res <- as.data.frame(cbind(summary(x$reg)$coefficients[,1], summary(x$reg)$coefficients[,2], summary(x$reg)$coefficients[,3], summary(x$reg)$coefficients[,4])) rownames(res) <- names colnames(res) <- c("Coefficient","Std_error","t-value","Pr(>|t|)") printCoefmat(res, P.values=TRUE, signif.stars=TRUE) if(x$nperm > 0) { cat("\nTwo-tailed tests of regression coefficients\n",'\n') res2 <- as.data.frame(cbind(summary(x$reg)$coefficients[,1], x$p.param.t.2tail, x$p.perm.t.2tail)) rownames(res2) <- names colnames(res2) <- c("Coefficient","p-param","p-perm") nc <- 3 printCoefmat(res2, P.values=TRUE, signif.stars=TRUE, has.Pvalue = 3 && substr(colnames(res2)[3],1,6) == "p-perm") cat("\nOne-tailed tests of regression coefficients:",'\n') cat("test in the direction of the sign of the coefficient\n",'\n') res1 <- as.data.frame(cbind(summary(x$reg)$coefficients[,1], x$p.param.t.1tail, x$p.perm.t.1tail)) rownames(res1) <- names colnames(res1) <- c("Coefficient","p-param","p-perm") nc <- 3 printCoefmat(res1, P.values=TRUE, signif.stars=TRUE, has.Pvalue = 3 && substr(colnames(res1)[3],1,6) == "p-perm") } cat("\nResidual standard error:", summary(x$reg)$sigma, "on", summary(x$reg)$df[2],"degrees of freedom",'\n') cat("Multiple R-square:", summary(x$reg)$r.squared," Adjusted R-square:", summary(x$reg)$adj.r.squared,'\n') F <- summary(x$reg)$fstatistic[[1]] df1 <- summary(x$reg)$fstatistic[[2]] df2 <- summary(x$reg)$fstatistic[[3]] p.param.F <- pf(F, df1, df2, lower.tail=FALSE) cat("\nF-statistic:", F, "on", df1, "and", df2, "DF:\n") cat(" parametric p-value :", p.param.F,'\n') if(x$nperm > 0) { cat(" permutational p-value:", x$p.perm.F,'\n') if(x$method == "raw") { cat("after",x$nperm,"permutations of",x$method,"data",'\n','\n') } else { cat("after",x$nperm,"permutations of",x$method,"of full model",'\n','\n') } } invisible(x) } ape/R/delta.plot.R0000644000176200001440000000220212465112403013403 0ustar liggesusers## delta.plot.R (2010-01-12) ## Delta Plots ## Copyright 2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. delta.plot <- function(X, k = 20, plot = TRUE, which = 1:2) { if (is.matrix(X)) X <- as.dist(X) n <- attr(X, "Size") if (n < 4) stop("need at least 4 observations") ## add a category for the cases delta = 1 ans <- .C(delta_plot, as.double(X), as.integer(n), as.integer(k), integer(k + 1), double(n), NAOK = TRUE) counts <- ans[[4]] ## add the counts of delta=1 to the last category: counts[k] <- counts[k] + counts[k + 1] counts <- counts[-(k + 1)] delta.bar <- ans[[5]]/choose(n - 1, 3) if (plot) { if (length(which) == 2) layout(matrix(1:2, 1, 2)) if (1 %in% which) { barplot(counts, space = 0, xlab = expression(delta[q])) a <- axTicks(1) axis(1, at = a, labels = a/k) } if (2 %in% which) plot(delta.bar, type = "h", ylab = expression(bar(delta))) } invisible(list(counts = counts, delta.bar = delta.bar)) } ape/R/phymltest.R0000644000176200001440000001144212465112403013374 0ustar liggesusers## phymltest.R (2014-11-07) ## Fits a Bunch of Models with PhyML ## Copyright 2004-2014 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .phymltest.model <- c("JC69", "JC69+I", "JC69+G", "JC69+I+G", "K80", "K80+I", "K80+G", "K80+I+G", "F81", "F81+I", "F81+G", "F81+I+G", "F84", "F84+I", "F84+G", "F84+I+G", "HKY85", "HKY85+I", "HKY85+G", "HKY85+I+G", "TN93", "TN93+I", "TN93+G", "TN93+I+G", "GTR", "GTR+I", "GTR+G", "GTR+I+G") .phymltest.nfp <- c(1, 2, 2, 3, 2, 3, 3, 4, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, 9, 10, 10, 11) phymltest <- function(seqfile, format = "interleaved", itree = NULL, exclude = NULL, execname = NULL, append = TRUE) { os <- Sys.info()[1] ## default names of PhyML: if (is.null(execname)) { execname <- switch(os, "Linux" = { ## PhyML location for Debian and Fedora packages and maybe for other distributions (fix by Dylan A\"issi) if (file.exists("/usr/bin/phyml")) "/usr/bin/phyml" else "phyml_3.0.1_linux32" }, "Darwin" = "phyml_3.0.1_macintel", "Windows" = "phyml_3.0.1_win32") } if (is.null(execname)) stop("you must give an executable file name for PHYML") N <- length(.phymltest.model) format <- match.arg(format, c("interleaved", "sequential")) fmt <- rep("", N) if (format != "interleaved") fmt[] <- "-q" boot <- rep("-b 0", N) # to avoid any testing mdl <- paste("-m", rep(c("JC69", "K80", "F81", "F84", "HKY85", "TN93", "GTR"), each = 4)) # fix by Luiz Max Fagundes de Carvalho tstv <- rep("-t e", N) # ignored by PhyML with JC69 or F81 inv <- rep(c("", "-v e"), length.out = N) ## no need to use the -c option of PhyML (4 categories by default if '-a e' is set): alpha <- rep(rep(c("-c 1", "-a e"), each = 2), length.out = N) tree <- rep("", N) if (!is.null(itree)) tree[] <- paste("-u ", itree) cmd <- paste(execname, "-i", seqfile, fmt, boot, mdl, tstv, inv, alpha, tree, "--append ") outfile <- paste(seqfile, "_phyml_stats.txt", sep = "") if (!append) { unlink(outfile) unlink(paste(seqfile, "_phyml_tree.txt", sep = "")) } imod <- 1:N if (!is.null(exclude)) imod <- imod[!.phymltest.model %in% exclude] for (i in imod) system(cmd[i]) l <- readLines(outfile) l <- grep("Log-likelihood:", l, value = TRUE) ## in case there were already some results in the output file: if (dd <- length(l) - length(imod)) l <- l[-(1:dd)] loglik <- as.numeric(sub(". Log-likelihood:", "", l)) names(loglik) <- .phymltest.model[imod] class(loglik) <- "phymltest" loglik } print.phymltest <- function(x, ...) { nfp <- .phymltest.nfp[.phymltest.model %in% names(x)] X <- cbind(nfp, x, 2 * (nfp - x)) rownames(X) <- names(x) colnames(X) <- c("nb.free.para", "loglik", "AIC") print(X) } summary.phymltest <- function(object, ...) { nfp <- .phymltest.nfp[.phymltest.model %in% names(object)] N <- length(object) model1 <- model2 <- character(0) chi2 <- df <- P.val <- numeric(0) for (i in 1:(N - 1)) { for (j in (i + 1):N) { if (nfp[i] >= nfp[j]) next m1 <- unlist(strsplit(names(object)[i], "\\+")) m2 <- unlist(strsplit(names(object)[j], "\\+")) if (m1[1] == "K80" && m2[1] == "F81") next ## a verifier que ds les 2 lignes suivantes les conversions ## se font bien correctement!!!! if (length(grep("\\+I", names(object)[i])) > 0 && length(grep("\\+I", names(object)[j])) == 0) next if (length(grep("\\+G", names(object)[i])) > 0 && length(grep("\\+G", names(object)[j])) == 0) next ## Now we should be sure that m1 is nested in m2. chi2 <- c(chi2, 2 * (object[j] - object[i])) df <- c(df, nfp[j] - nfp[i]) P.val <- c(P.val, 1 - pchisq(2 * (object[j] - object[i]), nfp[j] - nfp[i])) model1 <- c(model1, names(object)[i]) model2 <- c(model2, names(object)[j]) } } data.frame(model1, model2, chi2, df, P.val = round(P.val, 4)) } plot.phymltest <- function(x, main = NULL, col = "blue", ...) { nfp <- .phymltest.nfp[.phymltest.model %in% names(x)] N <- length(x) aic <- 2 * (nfp - x) if (is.null(main)) main <- paste("Akaike information criterion for", deparse(substitute(x))) plot(rep(1, N), aic, bty = "n", xaxt = "n", yaxt = "n", type = "n", xlab = "", ylab = "", main = main, ...) axis(side = 2, pos = 0.85, las = 2) abline(v = 0.85) y.lab <- seq(min(aic), max(aic), length = N) segments(0.85, sort(aic), 1.1, y.lab, col = col) text(1.1, y.lab, parse(text = sub("\\+G", "\\+Gamma", names(sort(aic)))), adj = 0) } ape/R/as.phylo.R0000644000176200001440000001105714125232410013076 0ustar liggesusers## as.phylo.R (2021-05-05) ## Conversion Among Tree Objects ## Copyright 2005-2021 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. old2new.phylo <- function(phy) { mode(phy$edge) <- "numeric" phy$Nnode <- -min(phy$edge) n <- length(phy$tip.label) NODES <- phy$edge < 0 phy$edge[NODES] <- n - phy$edge[NODES] phy } new2old.phylo <- function(phy) { NTIP <- length(phy$tip.label) NODES <- phy$edge > NTIP phy$edge[NODES] <- NTIP - phy$edge[NODES] mode(phy$edge) <- "character" phy$Nnode <- NULL phy } as.phylo <- function (x, ...) { if (identical(class(x), "phylo")) return(x) UseMethod("as.phylo") } as.phylo.default <- function(x, ...) { if (inherits(x, "phylo")) return(x) stop('object does not inherit the class "phylo": found no appropriate method to convert it') } as.phylo.hclust <- function(x, ...) { N <- dim(x$merge)[1] edge <- matrix(0L, 2*N, 2) edge.length <- numeric(2*N) ## `node' gives the number of the node for the i-th row of x$merge node <- integer(N) node[N] <- N + 2L cur.nod <- N + 3L j <- 1L for (i in N:1) { edge[j:(j + 1), 1] <- node[i] for (l in 1:2) { k <- j + l - 1L y <- x$merge[i, l] if (y > 0) { edge[k, 2] <- node[y] <- cur.nod cur.nod <- cur.nod + 1L edge.length[k] <- x$height[i] - x$height[y] } else { edge[k, 2] <- -y edge.length[k] <- x$height[i] } } j <- j + 2L } if (is.null(x$labels)) x$labels <- as.character(1:(N + 1)) obj <- list(edge = edge, edge.length = edge.length / 2, tip.label = x$labels, Nnode = N) class(obj) <- "phylo" reorder(obj) } as.phylo.phylog <- function(x, ...) { tr <- read.tree(text = x$tre) n <- length(tr$tip.label) edge.length <- numeric(dim(tr$edge)[1]) term <- which(tr$edge[, 2] <= n) inte <- which(tr$edge[, 2] > n) edge.length[term] <- x$leaves[tr$tip.label] edge.length[inte] <- x$nodes[tr$node.label][-1] tr$edge.length <- edge.length if (x$nodes["Root"] != 0) { tr$edge.root <- x$nodes["Root"] names(tr$edge.root) <- NULL } tr } as.hclust.phylo <- function(x, ...) { if (!is.ultrametric(x)) stop("the tree is not ultrametric") if (!is.binary.phylo(x)) stop("the tree is not binary") if (!is.rooted(x)) stop("the tree is not rooted") n <- length(x$tip.label) if (n == 1) stop("needs n >= 2 observations for a classification") is_tip <- x$edge[,2] <= n order <- x$edge[is_tip, 2] if (n == 2) { m <- matrix(c(-1L, -2L), 1, 2) bt <- x$edge.length[1] } else { x$node.label <- NULL # by Jinlong Zhang (2010-12-15) bt <- branching.times(x) N <- n - 1L x <- reorder(x, "postorder") m <- matrix(x$edge[, 2], N, 2, byrow = TRUE) anc <- x$edge[c(TRUE, FALSE), 1] bt <- bt[as.character(anc)] # 1st, reorder ## 2nd, sort keeping the root branching time in last (in case of ## rounding error if there zero-lengthed branches nead the root) bt <- c(sort(bt[-N]), bt[N]) o <- match(names(bt), anc) m <- m[o, ] ## first renumber the tips: TIPS <- m <= n m[TIPS] <- -m[TIPS] ## then renumber the nodes: oldnodes <- as.numeric(names(bt))[-N] m[match(oldnodes, m)] <- 1:(N - 1) names(bt) <- NULL } obj <- list(merge = m, height = 2*bt, order = order, labels = x$tip.label, call = match.call(), method = "unknown") class(obj) <- "hclust" obj } if (getRversion() >= "2.15.1") utils::globalVariables(c("network", "network.vertex.names<-")) as.network.phylo <- function(x, directed = is.rooted(x), ...) { if (is.null(x$node.label)) x <- makeNodeLabel(x) res <- network(x$edge, directed = directed, ...) network.vertex.names(res) <- c(x$tip.label, x$node.label) res } as.igraph.phylo <- function(x, directed = is.rooted(x), use.labels = TRUE, ...) { ## local copy because x will be changed before evaluating is.rooted(x): directed <- directed if (use.labels) { if (is.null(x$node.label)) x <- makeNodeLabel(x) ## check added by Klaus: if (anyDuplicated(c(x$tip.label, x$node.label))) stop("Duplicated labels!") x$edge <- matrix(c(x$tip.label, x$node.label)[x$edge], ncol = 2) } igraph::graph_from_edgelist(x$edge, directed = directed) } ape/R/MoranI.R0000644000176200001440000001562213223162137012536 0ustar liggesusers## MoranI.R (2008-01-14) ## Moran's I Autocorrelation Index ## Copyright 2004 Julien Dutheil, 2007-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ## code cleaned-up by EP (Dec. 2007) Moran.I <- function(x, weight, scaled = FALSE, na.rm = FALSE, alternative = "two.sided") { if(dim(weight)[1] != dim(weight)[2]) stop("'weight' must be a square matrix") n <- length(x) if(dim(weight)[1] != n) stop("'weight' must have as many rows as observations in 'x'") ## Expected mean: ei <- -1/(n - 1) nas <- is.na(x) if (any(nas)) { if (na.rm) { x <- x[!nas] n <- length(x) weight <- weight[!nas, !nas] } else { warning("'x' has missing values: maybe you wanted to set na.rm = TRUE?") return(list(observed = NA, expected = ei, sd = NA, p.value = NA)) } } ## normalizing the weights: ## Note that we normalize after possibly removing the ## missing data. ROWSUM <- rowSums(weight) ## the following is useful if an observation has no "neighbour": ROWSUM[ROWSUM == 0] <- 1 weight <- weight/ROWSUM # ROWSUM is properly recycled s <- sum(weight) m <- mean(x) y <- x - m # centre the x's cv <- sum(weight * y %o% y) v <- sum(y^2) obs <- (n/s) * (cv/v) ## Scaling: if (scaled) { i.max <- (n/s) * (sd(rowSums(weight) * y)/sqrt(v/(n - 1))) obs <- obs/i.max } ## Expected sd: S1 <- 0.5 * sum((weight + t(weight))^2) S2 <- sum((apply(weight, 1, sum) + apply(weight, 2, sum))^2) ## the above is the same than: ##S2 <- 0 ##for (i in 1:n) ## S2 <- S2 + (sum(weight[i, ]) + sum(weight[, i]))^2 s.sq <- s^2 k <- (sum(y^4)/n) / (v/n)^2 sdi <- sqrt((n*((n^2 - 3*n + 3)*S1 - n*S2 + 3*s.sq) - k*(n*(n - 1)*S1 - 2*n*S2 + 6*s.sq))/ ((n - 1)*(n - 2)*(n - 3)*s.sq) - 1/((n - 1)^2)) alternative <- match.arg(alternative, c("two.sided", "less", "greater")) pv <- pnorm(obs, mean = ei, sd = sdi) if (alternative == "two.sided") pv <- if (obs <= ei) 2*pv else 2*(1 - pv) if (alternative == "greater") pv <- 1 - pv list(observed = obs, expected = ei, sd = sdi, p.value = pv) } weight.taxo <- function(x) { d <- outer(x, x, "==") diag(d) <- 0 # implicitly converts 'd' into numeric d } weight.taxo2 <- function(x, y) { d <- outer(x, x, "==") & outer(y, y, "!=") diag(d) <- 0 d } correlogram.formula <- function(formula, data = NULL, use = "all.obs") { err <- 'formula must be of the form "y1+...+yn ~ x1/x2/../xn"' use <- match.arg(use, c("all.obs", "complete.obs", "pairwise.complete.obs")) if (formula[[1]] != "~") stop(err) lhs <- formula[[2]] y.nms <- if (length(lhs) > 1) unlist(strsplit(as.character(as.expression(lhs)), " \\+ ")) else as.character(as.expression(lhs)) rhs <- formula[[3]] gr.nms <- if (length(rhs) > 1) rev(unlist(strsplit(as.character(as.expression(rhs)), "/"))) else as.character(as.expression(rhs)) if (is.null(data)) { ## we 'get' the variables in the .GlobalEnv: y <- as.data.frame(sapply(y.nms, get)) gr <- as.data.frame(sapply(gr.nms, get)) } else { y <- data[y.nms] gr <- data[gr.nms] } if (use == "all.obs") { na.fail(y) na.fail(gr) } if (use == "complete.obs") { sel <- complete.cases(y, gr) y <- y[sel] gr <- gr[sel] } na.rm <- use == "pairwise.complete.obs" foo <- function(x, gr, na.rm) { res <- data.frame(obs = NA, p.values = NA, labels = colnames(gr)) for (i in 1:length(gr)) { sel <- if (na.rm) !is.na(x) & !is.na(gr[, i]) else TRUE xx <- x[sel] g <- gr[sel, i] w <- if (i > 1) weight.taxo2(g, gr[sel, i - 1]) else weight.taxo(g) o <- Moran.I(xx, w, scaled = TRUE) res[i, 1] <- o$observed res[i, 2] <- o$p.value } ## We need to specify the two classes; if we specify ## only "correlogram", 'res' is coerced as a list ## (data frames are of class "data.frame" and mode "list") structure(res, class = c("correlogram", "data.frame")) } if (length(y) == 1) foo(y[[1]], gr, na.rm) else structure(lapply(y, foo, gr = gr, na.rm = na.rm), names = y.nms, class = "correlogramList") } plot.correlogram <- function(x, legend = TRUE, test.level = 0.05, col = c("grey", "red"), type = "b", xlab = "", ylab = "Moran's I", pch = 21, cex = 2, ...) { BG <- col[(x$p.values < test.level) + 1] if (pch > 20 && pch < 26) { bg <- col col <- CO <- "black" } else { CO <- BG BG <- bg <- NULL } plot(1:length(x$obs), x$obs, type = type, xaxt = "n", xlab = xlab, ylab = ylab, col = CO, bg = BG, pch = pch, cex = cex, ...) axis(1, at = 1:length(x$obs), labels = x$labels) if (legend) legend("top", legend = paste(c("P >=", "P <"), test.level), pch = pch, col = col, pt.bg = bg, pt.cex = cex, horiz = TRUE) } plot.correlogramList <- function(x, lattice = TRUE, legend = TRUE, test.level = 0.05, col = c("grey", "red"), xlab = "", ylab = "Moran's I", type = "b", pch = 21, cex = 2, ...) { n <- length(x) obs <- unlist(lapply(x, "[[", "obs")) pval <- unlist(lapply(x, "[[", "p.values")) gr <- factor(unlist(lapply(x, "[[", "labels")), ordered = TRUE, levels = x[[1]]$labels) vars <- gl(n, nlevels(gr), labels = names(x)) BG <- col[(pval < test.level) + 1] if (lattice) { ## trellis.par.set(list(plot.symbol=list(pch=19))) xyplot(obs ~ gr | vars, xlab = xlab, ylab = ylab, panel = function(x, y) { panel.lines(x, y, lty = 2) panel.points(x, y, cex = cex, pch = 19, col = BG) ##lattice::panel.abline(h = 0, lty = 3) }) } else { if (pch > 20 && pch < 26) { bg <- col CO <- rep("black", length(obs)) col <- "black" } else { CO <- BG BG <- bg <- NULL } plot(as.numeric(gr), obs, type = "n", xlab = xlab, ylab = ylab, xaxt = "n") for (i in 1:n) { sel <- as.numeric(vars) == i lines(as.numeric(gr[sel]), obs[sel], type = type, lty = i, col = CO[sel], bg = BG[sel], pch = pch, cex = cex, ...) } axis(1, at = 1:length(x[[i]]$obs), labels = x[[i]]$labels) if (legend) { legend("topright", legend = names(x), lty = 1:n, bty = "n") legend("top", legend = paste(c("P >=", "P <"), test.level), pch = pch, col = col, pt.bg = bg, pt.cex = cex, horiz = TRUE) } } } ape/R/print.parafit.R0000644000176200001440000000111712465112403014122 0ustar liggesusers'print.parafit' <- function(x, ...) { cat("\nTest of host-parasite coevolution",'\n','\n') cat("Global test: ParaFitGlobal =",x$ParaFitGlobal,", p-value =", x$p.global, "(", x$nperm,"permutations)",'\n','\n') n.links <- nrow(x$link.table) cat("There are",n.links,"host-parasite links in matrix HP",'\n','\n') cat("Test of individual host-parasite links", "(", x$nperm, "permutations)",'\n','\n') print(x$link.table) cat('\n',"Number of parasites per host",'\n') print(x$para.per.host) cat('\n',"Number of hosts per parasite",'\n') print(x$host.per.para) invisible(x) } ape/R/collapse.singles.R0000644000176200001440000000416114456450026014620 0ustar liggesusers## collapse.singles.R (2017-07-27) ## Collapse "Single" Nodes ## Copyright 2015 Emmanuel Paradis, 2017 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. has.singles <- function(tree) { fun <- function(x) { tab <- tabulate(x$edge[, 1]) if (any(tab == 1L)) return(TRUE) FALSE } if (inherits(tree, "phylo")) return(fun(tree)) if (inherits(tree, "multiPhylo")) return(sapply(tree, fun)) } collapse.singles <- function(tree, root.edge = FALSE) { n <- length(tree$tip.label) if (n == 0) { return(tree) } tree <- reorder(tree) # this works now e1 <- tree$edge[, 1] e2 <- tree$edge[, 2] tab <- tabulate(e1) if (all(tab[-c(1:n)] > 1)) return(tree) # tips are zero if (is.null(tree$edge.length)) { root.edge <- FALSE wbl <- FALSE } else { wbl <- TRUE el <- tree$edge.length } if (root.edge) ROOTEDGE <- 0 ## start with the root node: ROOT <- n + 1L while (tab[ROOT] == 1) { i <- which(e1 == ROOT) ROOT <- e2[i] if (wbl) { if (root.edge) ROOTEDGE <- ROOTEDGE + el[i] el <- el[-i] } e1 <- e1[-i] e2 <- e2[-i] } singles <- which(tabulate(e1) == 1) if (length(singles) > 0) { ii <- sort(match(singles, e1), decreasing = TRUE) jj <- match(e1[ii], e2) for (i in 1:length(singles)) { e2[jj[i]] <- e2[ii[i]] if (wbl) el[jj[i]] <- el[jj[i]] + el[ii[i]] } e1 <- e1[-ii] e2 <- e2[-ii] if (wbl) el <- el[-ii] } Nnode <- length(e1) - n + 1L oldnodes <- unique(e1) if (!is.null(tree$node.label)) tree$node.label <- tree$node.label[oldnodes - n] newNb <- integer(max(oldnodes)) newNb[ROOT] <- n + 1L sndcol <- e2 > n e2[sndcol] <- newNb[e2[sndcol]] <- n + 2:Nnode e1 <- newNb[e1] tree$edge <- cbind(e1, e2, deparse.level = 0) tree$Nnode <- Nnode if (wbl) { if (root.edge) tree$root.edge <- ROOTEDGE tree$edge.length <- el } tree } ape/R/CDF.birth.death.R0000644000176200001440000005440514207642475014155 0ustar liggesusers## CDF.birth.death.R (2022-03-02) ## Functions to Simulate and Fit Time-Dependent Birth-Death Models ## Copyright 2010-2022 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. integrateTrapeze <- function(FUN, from, to, nint = 10) ## compute an integral with a simple trapeze method ## (apparently, Vectorize doesn't give faster calculation) { x <- seq(from = from, to = to, length.out = nint + 1) ## reorganized to minimize the calls to FUN: out <- FUN(x[1]) + FUN(x[length(x)]) for (i in 2:nint) out <- out + 2 * FUN(x[i]) (x[2] - x[1]) * out/2 # (x[2] - x[1]) is the width of the trapezes } ## case: ## 1: birth and death rates constant ## 2: no primitive available ## 3: primitives are available ## 4: death rate constant, no primitive available ## 5: birth rate constant, no primitive available ## 6: death rate constant, primitive available for birth(t) ## 7: birth rate constant, primitive available for death(t) .getCase <- function(birth, death, BIRTH = NULL, DEATH = NULL) { if (is.numeric(birth)) { if (is.numeric(death)) 1 else { if (is.null(DEATH)) 5 else 7 } } else { if (is.numeric(death)) { if (is.null(BIRTH)) 4 else 6 } else if (is.null(BIRTH) || is.null(DEATH)) 2 else 3 } } ## if (getRversion() >= "2.15.1") -- R 3.2.0 is required for ape utils::globalVariables("Tmax") .getRHOetINT <- function(birth, death, BIRTH = NULL, DEATH = NULL, case, fast) { ## build the RHO(), \rho(t), and INT(), I(t), functions switch (case, { # case 1: RHO <- function(t1, t2) (t2 - t1)*(death - birth) INT <- function(t) { rho <- death - birth death*(exp(rho*(Tmax - t)) - 1)/rho } },{ # case 2: if (fast) { RHO <- function(t1, t2) integrateTrapeze(function(t) death(t) - birth(t), t1, t2) INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death(u) integrateTrapeze(FOO, t, Tmax) } } else { RHO <- function(t1, t2) integrate(function(t) death(t) - birth(t), t1, t2)$value INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death(u) integrate(Vectorize(FOO), t, Tmax)$value # Vectorize required } } },{ # case 3: RHO <- function(t1, t2) DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1) INT <- function(t) { # vectorized FOO <- function(u) exp(RHO(tt, u)) * death(u) out <- t for (i in 1:length(t)) { tt <- t[i] out[i] <- integrate(FOO, tt, Tmax)$value } out } },{ # case 4: if (fast) { RHO <- function(t1, t2) death * (t2 - t1) - integrateTrapeze(birth, t1, t2) INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death integrateTrapeze(Vectorize(FOO), t, Tmax) } } else { RHO <- function(t1, t2) death * (t2 - t1) - integrate(birth, t1, t2)$value INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death integrate(Vectorize(FOO), t, Tmax)$value } } },{ # case 5: RHO <- function(t1, t2) integrate(death, t1, t2)$value - birth * (t2 - t1) if (fast) { INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death(u) integrateTrapeze(FOO, t, Tmax) } } else { INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death(u) integrate(Vectorize(FOO), t, Tmax)$value } } },{ # case 6: RHO <- function(t1, t2) death * (t2 - t1) - BIRTH(t2) + BIRTH(t1) INT <- function(t) { # vectorized FOO <- function(u) exp(RHO(tt, u)) * death out <- t for (i in 1:length(t)) { tt <- t[i] out[i] <- integrate(FOO, tt, Tmax)$value } out } },{ # case 7: RHO <- function(t1, t2) DEATH(t2) - DEATH(t1) - birth * (t2 - t1) if (fast) { INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death(u) integrateTrapeze(FOO, t, Tmax) } } else { INT <- function(t) { FOO <- function(u) exp(RHO(t, u)) * death(u) integrate(Vectorize(FOO), t, Tmax)$value } } }) list(RHO, INT) } CDF.birth.death <- function(birth, death, BIRTH = NULL, DEATH = NULL, Tmax, x, case, fast = FALSE) { ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case, fast) RHO <- ff[[1]] INT <- ff[[2]] environment(INT) <- environment() # so that INT() can find Tmax .CDF.birth.death2(RHO, INT, birth, death, BIRTH, DEATH, Tmax, x, case, fast) } .CDF.birth.death2 <- function(RHO, INT, birth, death, BIRTH, DEATH, Tmax, x, case, fast) { Pi <- if (case %in% c(1, 5, 7)) function(t) (1/(1 + INT(t)))^2 * 2 * exp(-RHO(0, t)) * birth else function(t) (1/(1 + INT(t)))^2 * 2 * exp(-RHO(0, t)) * birth(t) if (!case %in% c(1, 3, 6)) Pi <- Vectorize(Pi) denom <- if (fast) integrateTrapeze(Pi, 0, Tmax) else integrate(Pi, 0, Tmax)$value n <- length(x) p <- numeric(n) if (fast) { for (i in 1:n) p[i] <- integrateTrapeze(Pi, 0, x[i]) } else { for (i in 1:n) p[i] <- integrate(Pi, 0, x[i])$value } p/denom } .makePhylo <- function(edge, edge.length, i) { NODES <- edge > 0 edge[NODES] <- edge[NODES] + i + 1L edge[!NODES] <- 1:(i + 1L) phy <- list(edge = edge, edge.length = edge.length, tip.label = paste("t", 1:(i + 1), sep = ""), Nnode = i) class(phy) <- "phylo" attr(phy, "order") <- "cladewise" phy } rlineage <- function(birth, death, Tmax = 50, BIRTH = NULL, DEATH = NULL, eps = 1e-6) { case <- .getCase(birth, death, BIRTH, DEATH) rTimeToEvent <- function(t) { ## CDF of the times to event (speciation or extinction): switch (case, { # case 1: Foo <- function(t, x) 1 - exp(-(birth + death)*(x - t)) },{ # case 2: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-integrate(function(t) birth(t) + death(t), t, x)$value) } },{ # case 3: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-(BIRTH(x) - BIRTH(t) + DEATH(x) - DEATH(t))) } },{ # case 4: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-(integrate(function(t) birth(t), t, x)$value + death*(x - t))) } },{ # case 5: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-(birth*(x - t) + integrate(function(t) death(t), t, x)$value)) } },{ # case 6: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-(BIRTH(x) - BIRTH(t) + death*(x - t))) } },{ # case 7: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-(birth*(x - t) + DEATH(x) - DEATH(t))) } }) ## generate a random time to event by the inverse method: P <- runif(1) ## in case speciation probability is so low ## that time to speciation is infinite: if (Foo(t, Tmax) < P) return(Tmax + 1) inc <- 10 x <- t + inc while (inc > eps) { # la precision influe sur le temps de calcul if (Foo(t, x) > P) { x <- x - inc inc <- inc/10 } else x <- x + inc } x - t } if (case == 1) speORext <- function(t) birth/(birth + death) if (case == 2 || case == 3) speORext <- function(t) birth(t)/(birth(t) + death(t)) if (case == 4 || case == 6) speORext <- function(t) birth(t)/(birth(t) + death) if (case == 5 || case == 7) speORext <- function(t) birth/(birth + death(t)) ## the recursive function implementing algorithm 1 foo <- function(node) { for (k in 0:1) { X <- rTimeToEvent(t[node]) tmp <- t[node] + X ## is the event a speciation or an extinction? if (tmp >= Tmax) { Y <- 0 tmp <- Tmax } else Y <- rbinom(1, size = 1, prob = speORext(tmp)) j <<- j + 1L edge.length[j] <<- tmp - t[node] if (Y) { i <<- i + 1L t[i] <<- tmp ## set internal edge: edge[j, ] <<- c(node, i) foo(i) } else ## set terminal edge: edge[j, ] <<- c(node, 0L) } } edge <- matrix(0L, 1e5, 2) edge.length <- numeric(1e5) j <- 0L; i <- 1; t <- 0 foo(1L) .makePhylo(edge[1:j, ], edge.length[1:j], i) } drop.fossil <- function(phy, tol = 1e-8) { n <- Ntip(phy) x <- dist.nodes(phy)[n + 1, ][1:n] drop.tip(phy, which(x < max(x) - tol)) } rbdtree <- function(birth, death, Tmax = 50, BIRTH = NULL, DEATH = NULL, eps = 1e-6) { case <- .getCase(birth, death, BIRTH, DEATH) ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case, FALSE) RHO <- ff[[1]] INT <- ff[[2]] ## so that RHO() and INT() can find Tmax: environment(RHO) <- environment(INT) <- environment() rtimetospe <- function(t) { ## CDF of the times to speciation: Foo <- if (case %in% c(1, 5, 7)) function(t, x) 1 - exp(-birth*(x - t)) else { if (case %in% c(3, 6)) function(t, x) 1 - exp(-(BIRTH(x) - BIRTH(t))) else { function(t, x) { if (t == x) return(0) 1 - exp(-integrate(birth, t, x)$value) } } } ## generate a random time to speciation by the inverse method: P <- runif(1) ## in case speciation probability is so low ## that time to speciation is infinite: if (Foo(t, Tmax) < P) return(Tmax + 1) inc <- 10 x <- t + inc while (inc > eps) { # la precision influe sur le temps de calcul if (Foo(t, x) > P) { x <- x - inc inc <- inc/10 } else x <- x + inc } x - t } ## the recursive function implementing algorithm 2 foo <- function(node, start) { node <- node # make a local copy for (k in 0:1) { tau <- start # because tau is changed below NoDesc <- TRUE X <- rtimetospe(tau) while (X < Tmax - tau) { tau <- tau + X ## does the new lineage survive until Tmax? Y <- rbinom(1, size = 1, prob = 1/(1 + INT(tau))) if (Y) { i <<- i + 1L t[i] <<- tau ## set internal edge: j <<- j + 1L edge[j, ] <<- c(node, i) edge.length[j] <<- tau - t[node] foo(i, t[i]) NoDesc <- FALSE break } X <- rtimetospe(tau) } ## set terminal edge: if (NoDesc) { j <<- j + 1L edge[j, 1] <<- node # the 2nd column is already set to 0 edge.length[j] <<- Tmax - t[node] } } } edge <- matrix(0L, 1e5, 2) edge.length <- numeric(1e5) j <- 0L; i <- 1L; t <- 0 foo(1L, 0) .makePhylo(edge[1:j, ], edge.length[1:j], i) } bd.time <- function(phy, birth, death, BIRTH = NULL, DEATH = NULL, ip, lower, upper, fast = FALSE, boot = 0, trace = 0) { guess.bounds <- if (missing(lower)) TRUE else FALSE BIG <- 1e10 PrNt <- function(t, T, x) { tmp <- exp(-RHO(t, T)) Wt <- tmp * (1 + INT(t)) out <- numeric(length(x)) zero <- x == 0 if (length(zero)) { out[zero] <- 1 - tmp/Wt out[!zero] <- (tmp/Wt^2)*(1 - 1/Wt)^(x[!zero] - 1) } else out[] <- (tmp/Wt^2)*(1 - 1/Wt)^(x - 1) out } case <- .getCase(birth, death, BIRTH, DEATH) if (is.function(birth)) { paranam <- names(formals(birth)) if (guess.bounds) { upper <- rep(BIG, length(paranam)) lower <- -upper } formals(birth) <- alist(t=) environment(birth) <- environment() if (!is.null(BIRTH)) environment(BIRTH) <- environment() } else { paranam <- "birth" if (guess.bounds) { upper <- 1 lower <- 0 } } if (is.function(death)) { tmp <- names(formals(death)) np2 <- length(tmp) if (guess.bounds) { upper <- c(upper, rep(BIG, np2)) lower <- c(lower, rep(-BIG, np2)) } paranam <- c(paranam, tmp) formals(death) <- alist(t=) environment(death) <- environment() if (!is.null(DEATH)) environment(DEATH) <- environment() } else { paranam <- c(paranam, "death") if (guess.bounds) { upper <- c(upper, .1) lower <- c(lower, 0) } } np <- length(paranam) ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case = case, fast = fast) RHO <- ff[[1]] INT <- ff[[2]] environment(RHO) <- environment(INT) <- environment() x <- branching.times(phy) n <- length(x) Tmax <- x[1] x <- Tmax - x # change the time scale so the root is t=0 x <- sort(x) foo <- function(para) { for (i in 1:np) assign(paranam[i], para[i], pos = sys.frame(1)) p <- CDF.birth.death(birth, death, BIRTH, DEATH, Tmax = Tmax, x = x, case = case, fast = fast) ## w is the probability of the observed tree size (= number of tips) w <- PrNt(0, Tmax, Ntip(phy)) ## p is the expected CDF of branching times ## ecdf(x)(x) is the observed CDF sum((1:n/n - p)^2)/w # faster than sum((ecdf(x)(x) - p)^2)/w } if (missing(ip)) ip <- (upper - lower)/2 out <- nlminb(ip, foo, control = list(trace = trace, eval.max = 500), upper = upper, lower = lower) names(out$par) <- paranam names(out)[2] <- "SS" if (boot) { # nonparametric version PAR <- matrix(NA, boot, np) i <- 1L while (i <= boot) { cat("\rDoing bootstrap no.", i, "\n") x <- sort(sample(x, replace = TRUE)) o <- try(nlminb(ip, foo, control = list(trace = 0, eval.max = 500), upper = upper, lower = lower)) if (inherits(o, "list")) { PAR[i, ] <- o$par i <- i + 1L } } out$boot <- PAR } out } LTT <- function(birth = 0.1, death = 0, N = 100, Tmax = 50, PI = 95, scaled = TRUE, eps = 0.1, add = FALSE, backward = TRUE, ltt.style = list("black", 1, 1), pi.style = list("blue", 1, 2), ...) { case <- .getCase(birth, death, NULL, NULL) Time <- seq(0, Tmax, eps) F <- CDF.birth.death(birth, death, BIRTH = NULL, DEATH = NULL, Tmax = Tmax, x = Time, case = case, fast = TRUE) if (PI) { i <- (1 - PI/100)/2 Flow <- qbinom(i, N - 2, F) Fup <- qbinom(1 - i, N - 2, F) if (scaled) { Flow <- Flow/N Fup <- Fup/N } } if (!scaled) F <- F * N if (backward) Time <- Time - Tmax if (add) lines(Time, F, "l", col = ltt.style[[1]], lwd = ltt.style[[2]], lty = ltt.style[[3]]) else plot(Time, F, "l", col = ltt.style[[1]], lwd = ltt.style[[2]], lty = ltt.style[[3]], ylab = "Number of lineages", ...) if (PI) lines(c(Time, NA, Time), c(Flow, NA, Fup), col = pi.style[[1]], lwd = pi.style[[2]], lty = pi.style[[3]]) } rphylo <- function(n, birth, death, BIRTH = NULL, DEATH = NULL, T0 = 50, fossils = FALSE, eps = 1e-6) { case <- .getCase(birth, death, BIRTH, DEATH) ## Foo(): CDF of the times to event (speciation or extinction) ## rTimeToEvent(): generate a random time to event by the inverse method switch(case, { # case 1: rTimeToEvent <- function(t) t - rexp(1, N * (birth + death)) # much faster than using Foo() speORext <- function(t) birth/(birth + death) ## Foo <- function(t, x) ## 1 - exp(-N*(birth + death)*(x - t)) },{ # case 2: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-integrate(function(t) birth(t) + death(t), t, x)$value * N) } speORext <- function(t) birth(t)/(birth(t) + death(t)) },{ # case 3: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-N*(BIRTH(x) - BIRTH(t) + DEATH(x) - DEATH(t))) } speORext <- function(t) birth(t)/(birth(t) + death(t)) },{ # case 4: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-N*(integrate(function(t) birth(t), t, x)$value + death*(x - t))) } speORext <- function(t) birth(t)/(birth(t) + death) },{ # case 5: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-N*(birth*(x - t) + integrate(function(t) death(t), t, x)$value)) } speORext <- function(t) birth/(birth + death(t)) },{ # case 6: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-N*(BIRTH(x) - BIRTH(t) + death*(x - t))) } speORext <- function(t) birth(t)/(birth(t) + death) },{ # case 7: Foo <- function(t, x) { if (t == x) return(0) 1 - exp(-N*(birth*(x - t) + DEATH(x) - DEATH(t))) } speORext <- function(t) birth/(birth + death(t)) }) if (case != 1) { rTimeToEvent <- function(t) { P <- runif(1) inc <- 10 x <- t - inc while (inc > eps) { if (Foo(x, t) > P) { # fixed by Niko Yasui (2016-07-06) + fixed 2019-11-07 x <- x + inc inc <- inc/10 } x <- x - inc } x } } storage.mode(n) <- "integer" N <- n t <- T0 j <- 0L # number of edges already created POOL <- seq_len(N) # initial pool (only tips at start) if (!fossils) { Nedge <- 2L * N - 2L nextnode <- 2L * N - 1L e1 <- integer(Nedge) e2 <- integer(Nedge) TIME <- numeric(nextnode) # record the times TIME[POOL] <- T0 # the times of the n tips are the present time while (j < Nedge) { X <- rTimeToEvent(t) ## is the event a speciation or an extinction? Y <- rbinom(1, size = 1, prob = speORext(X)) if (Y) { # speciation i <- sample.int(N, 2) fossil <- POOL[i] == 0 if (any(fossil)) { ## we drop the fossil lineage, or the first one if both are fossils POOL <- POOL[-i[which(fossil)[1]]] } else { # create a node and an edge j <- j + 2L k <- c(j - 1, j) e1[k] <- nextnode e2[k] <- POOL[i] TIME[nextnode] <- X POOL <- c(POOL[-i], nextnode) nextnode <- nextnode - 1L } N <- N - 1L } else { # extinction => create a tip, store it in POOL but don't create an edge ## fossil lineages are numbered 0 to find them if Y = 1 N <- N + 1L POOL <- c(POOL, 0L) } t <- X } Nnode <- n - 1L } else { # fossils = TRUE nextnode <- -1L # nodes are numbered with negatives nexttip <- N + 1L # tips are numbered with positives e1 <- integer(1e5) e2 <- integer(1e5) time.tips <- numeric(1e5) # accessed with positive indices time.nodes <- numeric(1e5) # accessed with negative indices time.tips[POOL] <- T0 # the times of the n living tips are the present time while (N > 1) { X <- rTimeToEvent(t) ## is the event a speciation or an extinction? Y <- rbinom(1, size = 1, prob = speORext(X)) if (Y) { # speciation => create a node i <- sample.int(N, 2) j <- j + 2L k <- c(j - 1, j) e1[k] <- nextnode e2[k] <- POOL[i] time.nodes[-nextnode] <- X POOL <- c(POOL[-i], nextnode) nextnode <- nextnode - 1L N <- N - 1L } else { # extinction => create a tip N <- N + 1L time.tips[nexttip] <- X POOL <- c(POOL, nexttip) nexttip <- nexttip + 1L } t <- X } n <- nexttip - 1L # update n Nnode <- n - 1L EDGE <- seq_len(j) e1 <- e1[EDGE] e2 <- e2[EDGE] e1 <- e1 + n + Nnode + 1L # e1 has only nodes... NODES <- e2 < 0 # ... so this is needed only on e2 e2[NODES] <- e2[NODES] + n + Nnode + 1L ## concatenate the vectors of times after dropping the extra 0's: TIME <- c(time.tips[seq_len(n)], rev(time.nodes[seq_len(Nnode)])) } obj <- list(edge = cbind(e1, e2, deparse.level = 0), edge.length = TIME[e2] - TIME[e1], tip.label = paste0("t", seq_len(n)), Nnode = Nnode) class(obj) <- "phylo" reorder(obj) } ape/R/ltt.plot.R0000644000176200001440000001220514020062153013114 0ustar liggesusers## ltt.plot.R (2021-03-01) ## Lineages Through Time Plot ## Copyright 2002-2021 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ltt.plot.coords <- function(phy, backward = TRUE, tol = 1e-6, type = "S") { if (!type %in% c("S", "s")) { stop("argument type be \"S\" or \"s\".") } if (has.singles(phy)) phy <- collapse.singles(phy, root.edge = TRUE) # added 2021-03-01 if (is.ultrametric(phy, tol)) { if (is.binary.phylo(phy)) { N <- numeric(phy$Nnode + 1) N[] <- 1 } else { node.order <- tabulate(phy$edge[, 1]) N <- node.order[-(1:length(phy$tip.label))] - 1 } bt <- branching.times(phy) names(bt) <- NULL o <- order(bt, decreasing = TRUE) time <- c(-bt[o], 0) if (!is.binary.phylo(phy)) N <- c(1, N[o]) } else { if (!is.binary.phylo(phy)) phy <- multi2di(phy) n <- Ntip(phy) m <- phy$Nnode ROOT <- n + 1L event <- time.event <- numeric(n + m) time.event[ROOT] <- 0 phy <- reorder(phy) for (i in 1:nrow(phy$edge)) time.event[phy$edge[i, 2]] <- time.event[phy$edge[i, 1]] + phy$edge.length[i] present <- max(time.event) event[1:n] <- -1 event[ROOT:(n + m)] <- 1 ## delete the events that are too close to present: past.event <- present - time.event > tol event <- event[past.event] time.event <- time.event[past.event] ## reorder wrt time: o <- order(time.event) time.event <- time.event[o] event <- event[o] time <- c(time.event - present, 0) N <- c(1, event) } N <- cumsum(N) if (!is.null(phy$root.edge)) { time <- c(time[1] - phy$root.edge, time) N <- c(1, N) } if (!backward) time <- time - time[1] if (type == "s") { N <- c(N[2:length(N)], N[length(N)]) } cbind(time, N) } ltt.plot <- function(phy, xlab = "Time", ylab = "N", backward = TRUE, tol = 1e-6, ...) { if (!inherits(phy, "phylo")) stop("object \"phy\" is not of class \"phylo\"") xy <- ltt.plot.coords(phy, backward, tol, type = "S") plot.default(xy, xlab = xlab, ylab = ylab, xaxs = "r", yaxs = "r", type = "S", ...) } ltt.lines <- function(phy, backward = TRUE, tol = 1e-6, ...) { xy <- ltt.plot.coords(phy, backward, tol, type = "S") lines(xy, type = "S", ...) } mltt.plot <- function(phy, ..., dcol = TRUE, dlty = FALSE, legend = TRUE, xlab = "Time", ylab = "N", log = "", backward = TRUE, tol = 1e-6) { if (inherits(phy, "phylo")) { # if a tree of class "phylo" TREES <- list(ltt.plot.coords(phy, backward, tol, type = "S")) names(TREES) <- deparse(substitute(phy)) } else { # a list of trees TREES <- lapply(phy, ltt.plot.coords, backward = backward, tol = tol, type = "S") names(TREES) <- names(phy) if (is.null(names(TREES))) names(TREES) <- paste(deparse(substitute(phy)), "-", 1:length(TREES)) } dts <- list(...) n <- length(dts) if (n) { mc <- as.character(match.call())[-(1:2)] nms <- mc[1:n] for (i in 1:n) { if (inherits(dts[[i]], "phylo")) { a <- list(ltt.plot.coords(dts[[i]], backward, tol, type = "S")) names(a) <- nms[i] } else { # a list of trees a <- lapply(dts[[i]], ltt.plot.coords, backward = backward, tol = tol, type = "S") names(a) <- names(dts[[i]]) if (is.null(names(a))) names(a) <- paste(deparse(substitute(phy)), "-", seq_along(a)) } TREES <- c(TREES, a) } } n <- length(TREES) range.each.tree <- sapply(TREES, function(x) range(x[, 1])) xl <- range(range.each.tree) yl <- c(1, max(sapply(TREES, function(x) max(x[, 2])))) ## if backward is FALSE, we have to rescale the time scales of each tree: if (!backward) { for (i in seq_along(TREES)) { tmp <- TREES[[i]] tmp[, 1] <- tmp[, 1] + xl[2] - range.each.tree[2, i] TREES[[i]] <- tmp } } plot.default(NA, type = "n", xlim = xl, ylim = yl, xaxs = "r", yaxs = "r", xlab = xlab, ylab = ylab, log = log) lty <- if (!dlty) rep(1, n) else 1:n col <- if (!dcol) rep(1, n) else topo.colors(n) for (i in 1:n) lines(TREES[[i]], col = col[i], lty = lty[i], type = "S") if (legend) legend(xl[1], yl[2], legend = names(TREES), lty = lty, col = col, bty = "n") } ltt.coplot <- function(phy, backward = TRUE, ...) { layout(matrix(1:2, 2)) par(mar = c(0, 3, 0.5, 0.5)) o <- plot(phy, root.edge = TRUE, ...) par(mar = c(3, 3, 0, 0.5)) ltt.plot(phy, xlim = o$x.lim, backward = FALSE, xaxt = "n") if (backward) axisPhylo() else axis(1) } ape/R/read.GenBank.R0000644000176200001440000001262114605156500013566 0ustar liggesusers## read.GenBank.R (2024-02-13) ## Read DNA Sequences and Annotations from GenBank ## Copyright 2002-2022 Emmanuel Paradis and Klaus Schliep 2024 ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. read.GenBank <- function(access.nb, seq.names = access.nb, species.names = TRUE, as.character = FALSE, chunk.size = 400, quiet = TRUE, type = "DNA") { type <- match.arg(type, c("DNA", "AA")) db <- ifelse(type == "DNA", "nucleotide", "protein") chunk.size <- as.integer(chunk.size) N <- length(access.nb) ## if more than 400 sequences, we break down the requests a <- 1L b <- if (N > chunk.size) chunk.size else N fl <- paste0(tempfile(), ".fas") if (!quiet) cat("Note: chunk.size =", chunk.size, "(max nb of sequences downloaded together)\n") repeat { if (!quiet) cat("\rDownloading sequences:", b, "/", N, "...") URL <- paste0("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=", db, "&id=", paste(access.nb[a:b], collapse = ","), "&rettype=fasta&retmode=text") X <- scan(file = URL, what = "", sep = "\n", quiet = TRUE) cat(X, sep = "\n", file = fl, append = TRUE) if (b == N) break a <- b + 1L b <- b + chunk.size if (b > N) b <- N } if (!quiet) { cat(" Done.\nNote: the downloaded sequences are in file:", fl) cat("\nReading sequences...") } res <- read.FASTA(fl, type = type) if (is.null(res)) return(NULL) attr(res, "description") <- names(res) if (length(access.nb) != length(res)) { names(res) <- gsub("\\..*$", "", names(res)) failed <- paste(access.nb[! access.nb %in% names(res)], collapse = ", ") warning(paste0("cannot get the following sequence(s):\n", failed)) } else names(res) <- access.nb if (as.character) res <- as.character(res) if (!quiet) cat("\n") if (species.names) { a <- 1L b <- if (N > chunk.size) chunk.size else N sp <- character(0) repeat { if (!quiet) cat("\rDownloading species names:", b, "/", N) URL <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=", db, "&id=", paste(access.nb[a:b], collapse = ","), "&rettype=gb&retmode=text", sep = "") X <- scan(file = URL, what = "", sep = "\n", quiet = TRUE, n = -1) sp <- c(sp, gsub(" +ORGANISM +", "", grep("ORGANISM", X, value = TRUE))) if (b == N) break a <- b + 1L b <- b + chunk.size if (b > N) b <- N } if (!quiet) cat(".\n") attr(res, "species") <- gsub(" ", "_", sp) } if (!quiet) cat("Done.\n") res } .parse.annotations.file <- function(file) { get.product.others.gene <- function(a, b) { res <- rep(NA_character_, 3L) if (a > b) return(res) y <- x[a:b] li <- length(i <- grep("product\t", y)) lj <- length(j <- grep("gene\t", y)) if (li) res[1L] <- gsub("product\t", "", y[i]) if (lj) res[2L] <- gsub("gene\t", "", y[j]) if (length(y) > li + lj) res[3L] <- paste(y[-c(i, j)], collapse = "; ") res } convert.with.incomplete <- function(vec) { i <- grep("<|>", vec) if (length(i)) { incomplete <<- c(incomplete, i) vec[i] <- gsub("<|>", "", vec[i]) } as.integer(vec) } x <- scan(file, what = "", sep = "\n", quiet = TRUE, skip = 1) n <- length(x) i <- grep("^\t\t\t", x) i2 <- seq_len(n)[-i] Y <- strsplit(x[i2], "\t") incomplete <- NULL start <- convert.with.incomplete(sapply(Y, "[", 1L)) end <- convert.with.incomplete(sapply(Y, "[", 2L)) if (!is.null(incomplete)) { incomplete <- sort(unique(incomplete)) warning(paste("features were incomplete in row(s):", paste(incomplete, collapse = ", "))) } res <- data.frame(start, end) sel <- which(!duplicated.data.frame(res)) res <- res[sel, ] res$type <- sapply(Y, "[", 3L)[sel] i3 <- i2[sel] from <- i3 + 1L to <- c(i3[-1L] - 1L, n) x[i] <- gsub("^\t\t\t", "", x[i]) Z <- mapply(get.product.others.gene, from, to) res$product <- Z[1L, ] gene <- Z[2L, ] res$others <- gsub("\t", ": ", Z[3L, ]) if (!all(is.na(gene))) res$gene <- gene row.names(res) <- as.character(seq_len(nrow(res))) res } getAnnotationsGenBank <- function(access.nb, quiet = TRUE) { N <- length(access.nb) res <- setNames(vector("list", N), access.nb) notfound <- NULL s1 <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=" s3 <- "&rettype=ft&retmode=text" for (i in 1:N) { if (!quiet) cat("\rDownloading annotations:", i, "/", N) URL <- paste0(s1, access.nb[i], s3) fl <- tempfile() ans <- try(download.file(URL, fl, quiet = TRUE), silent = TRUE) if (inherits(ans, "try-error")) { notfound <- c(notfound, access.nb[i]) next } res[[i]] <- .parse.annotations.file(fl) } if (!quiet) cat(". Done.\n") if (!is.null(notfound)) { warning(paste0("cannot get features for the following accession(s):\n", paste(notfound, collapse = ", "))) if (length(notfound) == N) return(NULL) } if (N == 1) res[[1L]] else res } ape/R/reconstruct.R0000644000176200001440000003635314533612366013741 0ustar liggesusers## reconstruct.R (2022-06-02) ## Ancestral Character Estimation ## Copyright 2014-2022 Manuela Royer-Carenzi, Didier Gilles ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. #renvoie la racine d'arbre racine <- function(arbre) { Ntip(arbre) + 1 } # renvoie une liste dont la premiere composante est l'arbre renumerote # de telle sorte que l'index d'un enfant est superieur a celui de son pere, # la seconde compopsante est la fonction de l'index initial vers le second, # et la troisieme son inverse # (attention probleme pour l'image de 0 mise a l'image du max) # renumeroteArbre <- function(arbre) { m <- Ntip(arbre) + Nnode(arbre) v<-numeric(m) t<-numeric(m) stack<-numeric(m) istack<-1 stack[istack]<-racine(arbre) codeI<-1 codeL<-Nnode(arbre)+1 while(istack>0){ cour<-stack[istack] istack<-istack-1 l <- which(arbre$edge[, 1] == cour) if(length(l)>0){ v[cour] <- codeI t[codeI] <- cour codeI <- codeI+1 for(i in 1:length(l)) { istack<-istack+1 stack[istack] <- arbre$edge[l[i], 2] } } else { v[cour] <- codeL t[codeL] <- cour codeL <- codeL+1 } } arbrebis<-arbre #renumeroter les noms for(i in 1:Nedge(arbre)) { arbrebis$edge[i,1] <- v[arbre$edge[i,1]] arbrebis$edge[i,2] <- v[arbre$edge[i,2]] } l <- list(arbre = arbrebis, cod = v, dec = t) l } #calcule la matrice C selon le modele BM ou ABM # calculeC_ABM <- function(arbre) { m <- max(arbre[["edge"]]) C <- matrix(0,nrow=m,ncol=m) for(i in 1:(m)) { l <- which(arbre$edge[, 2] == i) if(length(l)>0){ for(j in 1:(m)) { C[j,i] <- C[j, arbre$edge[l[1], 1]] } } C[i,i]<-1; } t(C) } #calcule la matrice C selon le modele OU ou OU* # calculeC_OU <- function(arbre, a) { m <- max(arbre[["edge"]]) C <- matrix(0,nrow=m,ncol=m) for(i in 1:(m)) { l <- which(arbre$edge[, 2] == i) if(length(l)>0){ for(j in 1:(m)) { C[j,i] <- C[j, arbre$edge[l[1], 1]]*exp(-a*arbre$edge.length[l[1]]) } } C[i,i]<-1; } t(C) } #calcule la matrice C selon le modele type qui vaut ABM ou OU calculeC <- function(type, arbre, a) { switch(type, ABM = calculeC_ABM(arbre), OU = calculeC_OU(arbre, a)) } ### calcul Variance getSumSquare <- function(value, arbre) { sum <- 0. for(eu in 1:Nedge(arbre)) sum <- sum + (value[arbre$edge[eu,2]]-value[arbre$edge[eu,1]])^2/arbre$edge.length[eu] sum } getMLHessian <- function(value, arbre) { sumSqu <- getSumSquare(value, arbre) nI <- Nnode(arbre) nT <- length(arbre$tip.label) nE <- nI+nT-1 sizeH<-nI+1 hessian <- matrix(0., nrow=sizeH, ncol=sizeH) var <- sumSqu/nE sd <- sqrt(var) hessian[1,1] <- -nE/(2*var^2)+sumSqu/var^3 for(i in 1:nI) { child <- which(arbre$edge[, 1] == nT+i) if(length(child)>0) { for(j in 1:length(child)) { hessian[1,i+1] <- hessian[1,i+1]-(value[arbre$edge[child[j],2]]-value[nT+i])/arbre$edge.length[child[j]] hessian[i+1,i+1] <- hessian[i+1,i+1]+1./arbre$edge.length[child[j]] if(arbre$edge[child[j],2]>nT) hessian[i+1,arbre$edge[child[j],2]-nT+1] <- -1./(var*arbre$edge.length[child[j]]) } } anc <- which(arbre$edge[, 2] == nT+i) if(length(anc)>0) { for(j in 1:length(anc)) { hessian[1,i+1] <- hessian[1,i+1]+(value[nT+i]-value[arbre$edge[anc[j],1]])/arbre$edge.length[anc[j]] hessian[i+1,i+1] <- hessian[i+1,i+1]+1./arbre$edge.length[anc[j]] hessian[i+1,arbre$edge[anc[j],1]-nT+1] <- -1./(var*arbre$edge.length[anc[j]]) } } hessian[1,i+1] <- -hessian[1,i+1]/sd^2 hessian[i+1,1] <- hessian[1,i+1] hessian[i+1,i+1] <- hessian[i+1,i+1]/var } hessian } getREMLHessian <- function(value, arbre, sigma2) { nI <- Nnode(arbre) nT <- length(arbre$tip.label) sizeH<-nI hessian <- matrix(0., nrow=sizeH, ncol=sizeH) for(i in 1:nI) { child <- which(arbre$edge[, 1] == nT+i) if(length(child)>0) { for(j in 1:length(child)) { hessian[i,i] <- hessian[i,i]+1./arbre$edge.length[child[j]] if(arbre$edge[child[j],2]>nT) hessian[i,arbre$edge[child[j],2]-nT] <- -1./(sigma2*arbre$edge.length[child[j]]) } } anc <- which(arbre$edge[, 2] == nT+i) if(length(anc)>0) { for(j in 1:length(anc)) { hessian[i,i] <- hessian[i,i]+1./arbre$edge.length[anc[j]] hessian[i,arbre$edge[anc[j],1]-nT] <- -1./(sigma2*arbre$edge.length[anc[j]]) } } hessian[i,i] <- hessian[i,i]/sigma2 } hessian } glsBM <- function (phy, x, CI=TRUE) { obj <- list() nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode nbTotN <- nb.tip+nb.node sigmaMF <- 1 TsTemps <- dist.nodes(phy) TempsRacine <- TsTemps[(nb.tip+1),] IndicesMRCA <- mrca(phy, full=T) M <- matrix(NA, ncol=nbTotN, nrow=nbTotN) for (i in 1:nbTotN) { for (j in 1:nbTotN) { M[i,j] <- sigmaMF^2 * TempsRacine[IndicesMRCA[i,j]] } } # M = SigmaZ varAL <- M[-(1:nb.tip), 1:nb.tip] varAA <- M[-(1:nb.tip), -(1:nb.tip)] varLL <- M[(1:nb.tip), 1:nb.tip] invVarLL <- solve(varLL) UL <- rep(1, length=nb.tip) UA <- rep(1, length=nb.node) TL <- TempsRacine[1:nb.tip] TA <- TempsRacine[(nb.tip+1):(nb.tip+nb.node)] # IVL_Z <- invVarLL %*% x IVL_T <- invVarLL %*% TL IVL_U <- invVarLL %*% UL U_IVL_U <- t(UL) %*% IVL_U U_IVL_Z <- t(UL) %*% IVL_Z DeltaU <- UA - varAL %*% IVL_U # Racine_chap <- solve(U_IVL_U) %*% U_IVL_Z Racine_chap <- as.numeric(Racine_chap) Anc_chap <- Racine_chap * DeltaU + varAL %*% IVL_Z Anc_chap <- as.vector(Anc_chap) obj$ace <- Anc_chap names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) # if (CI) { Vec <- x - Racine_chap Num <- t(Vec) %*% invVarLL %*% Vec Num <- as.numeric(Num) sigma2_chap <- Num / (nb.tip-1) obj$sigma2 <- sigma2_chap se <- sqrt((varAA - varAL %*% invVarLL %*% t(varAL))[cbind(1:nb.node, 1:nb.node)]) se <- se * sqrt(sigma2_chap) tmp <- se * qt(0.025, df=nb.tip-1) obj$CI95 <- cbind(lower=obj$ace + tmp, upper=obj$ace - tmp) } obj } glsABM <- function (phy, x, CI=TRUE) { obj <- list() nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode nbTotN <- nb.tip+nb.node sigmaMF <- 1 TsTemps <- dist.nodes(phy) TempsRacine <- TsTemps[(nb.tip+1),] IndicesMRCA <- mrca(phy, full=T) M <- matrix(NA, ncol=nbTotN, nrow=nbTotN) for (i in 1:nbTotN) { for (j in 1:nbTotN) { M[i,j] <- sigmaMF^2 * TempsRacine[IndicesMRCA[i,j]] } } # M = SigmaZ varAL <- M[-(1:nb.tip), 1:nb.tip] varAA <- M[-(1:nb.tip), -(1:nb.tip)] varLL <- M[(1:nb.tip), 1:nb.tip] invVarLL <- solve(varLL) UL <- rep(1, length=nb.tip) UA <- rep(1, length=nb.node) TL <- TempsRacine[1:nb.tip] TA <- TempsRacine[(nb.tip+1):(nb.tip+nb.node)] # IVL_Z <- invVarLL %*% x IVL_T <- invVarLL %*% TL IVL_U <- invVarLL %*% UL U_IVL_U <- t(UL) %*% IVL_U T_IVL_T <- t(TL) %*% IVL_T U_IVL_T <- t(UL) %*% IVL_T U_IVL_Z <- t(UL) %*% IVL_Z T_IVL_Z <- t(TL) %*% IVL_Z DeltaT <- TA - varAL %*% IVL_T DeltaU <- UA - varAL %*% IVL_U # Den <- U_IVL_U * T_IVL_T - U_IVL_T^2 Den <- as.numeric(Den) Mu_chap <- (U_IVL_U * T_IVL_Z - U_IVL_T * U_IVL_Z) / Den Mu_chap <- as.numeric(Mu_chap) Racine_chap <- (T_IVL_T * U_IVL_Z - U_IVL_T * T_IVL_Z) / Den Racine_chap <- as.numeric(Racine_chap) Anc_chap <- Mu_chap * DeltaT + Racine_chap * DeltaU + varAL %*% IVL_Z Anc_chap <- as.vector(Anc_chap) obj$ace <- Anc_chap names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) obj$mu <- Mu_chap # if (CI) { Vec <- x - Racine_chap - Mu_chap * TL Num <- t(Vec) %*% invVarLL %*% Vec Num <- as.numeric(Num) sigma2_chap <- Num / (nb.tip-2) obj$sigma2 <- sigma2_chap se <- sqrt((varAA - varAL %*% invVarLL %*% t(varAL))[cbind(1:nb.node, 1:nb.node)]) se <- se * sqrt(sigma2_chap) tmp <- se * qt(0.025, df=nb.tip-2) obj$CI95 <- cbind(lower=obj$ace + tmp, upper=obj$ace - tmp) } obj } # theta = z0 glsOUv1 <- function (phy, x, alpha, CI=TRUE) { obj <- list() nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode nbTotN <- nb.tip+nb.node sigmaMF <- 1 alphaM <- alpha nbTotN <- nb.tip+nb.node TsTemps <- dist.nodes(phy) TempsRacine <- TsTemps[(nb.tip+1),] IndicesMRCA <- mrca(phy, full=T) M <- matrix(NA, ncol=nbTotN, nrow=nbTotN) for (i in 1:nbTotN) { for (j in 1:nbTotN) { Tempsm <- TempsRacine[IndicesMRCA[i,j]] Tempsi <- TempsRacine[i] Tempsj <- TempsRacine[j] M[i,j] <- sigmaMF^2 * exp(-alphaM * (Tempsi+Tempsj-2*Tempsm)) * (1-exp(-2*alphaM * Tempsm)) / (2 * alphaM) } } # M = SigmaZ varAL <- M[-(1:nb.tip), 1:nb.tip] varAA <- M[-(1:nb.tip), -(1:nb.tip)] varLL <- M[(1:nb.tip), 1:nb.tip] invVarLL <- solve(varLL) UL <- rep(1, length=nb.tip) UA <- rep(1, length=nb.node) TL <- TempsRacine[1:nb.tip] TA <- TempsRacine[(nb.tip+1):(nb.tip+nb.node)] # IVL_Z <- invVarLL %*% x IVL_T <- invVarLL %*% TL IVL_U <- invVarLL %*% UL U_IVL_U <- t(UL) %*% IVL_U U_IVL_Z <- t(UL) %*% IVL_Z DeltaU <- UA - varAL %*% IVL_U # Racine_chap <- solve(U_IVL_U) %*% U_IVL_Z Racine_chap <- as.numeric(Racine_chap) Anc_chap <- Racine_chap * DeltaU + varAL %*% IVL_Z Anc_chap <- as.vector(Anc_chap) obj$ace <- Anc_chap names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) # # vraisemblance # mL <- Racine_chap Num <- t(x-mL) %*% invVarLL %*% (x-mL) Num <- as.numeric(Num) sigma2_chap <- Num / (nb.tip-1) obj$sigma <- sqrt(sigma2_chap) VL <- sigma2_chap * varLL invVL <- invVarLL / sigma2_chap LVrais <- - t(x-mL) %*% invVL %*% (x-mL) /2 - nb.tip * log(2*pi)/2 - log(det(VL))/2 obj$LLik <- as.numeric(LVrais) # if (CI) { se <- sqrt((varAA - varAL %*% invVarLL %*% t(varAL))[cbind(1:nb.node, 1:nb.node)]) se <- se * sqrt(sigma2_chap) tmp <- se * qt(0.025, df=nb.tip-1) obj$CI95 <- cbind(lower=obj$ace + tmp, upper=obj$ace - tmp) } obj } # theta pas egal a z0 glsOUv2 <- function (phy, x, alpha, CI=TRUE) { obj <- list() nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode nbTotN <- nb.tip+nb.node sigmaMF <- 1 nbTotN <- nb.tip+nb.node TsTemps <- dist.nodes(phy) TempsRacine <- TsTemps[(nb.tip+1),] IndicesMRCA <- mrca(phy, full=T) M <- matrix(NA, ncol=nbTotN, nrow=nbTotN) for (i in 1:nbTotN) { for (j in 1:nbTotN) { Tempsm <- TempsRacine[IndicesMRCA[i,j]] Tempsi <- TempsRacine[i] Tempsj <- TempsRacine[j] M[i,j] <- sigmaMF^2 * exp(-alpha * (Tempsi+Tempsj-2*Tempsm)) * (1-exp(-2*alpha * Tempsm)) / (2 * alpha) } } # M = SigmaZ varAL <- M[-(1:nb.tip), 1:nb.tip] varAA <- M[-(1:nb.tip), -(1:nb.tip)] varLL <- M[(1:nb.tip), 1:nb.tip] invVarLL <- solve(varLL) vecW <- exp(-alpha * TempsRacine) UL <- vecW[1:nb.tip] UA <- vecW[(nb.tip+1):(nb.tip+nb.node)] TL <- 1-UL TA <- 1-UA # # IVL_Z <- invVarLL %*% x IVL_T <- invVarLL %*% TL IVL_U <- invVarLL %*% UL U_IVL_U <- t(UL) %*% IVL_U T_IVL_T <- t(TL) %*% IVL_T U_IVL_T <- t(UL) %*% IVL_T U_IVL_Z <- t(UL) %*% IVL_Z T_IVL_Z <- t(TL) %*% IVL_Z DeltaT <- TA - varAL %*% IVL_T DeltaU <- UA - varAL %*% IVL_U # Den <- U_IVL_U * T_IVL_T - U_IVL_T^2 Den <- as.numeric(Den) Theta_chap <- (U_IVL_U * T_IVL_Z - U_IVL_T * U_IVL_Z) / Den Theta_chap <- as.numeric(Theta_chap) Racine_chap <- (T_IVL_T * U_IVL_Z - U_IVL_T * T_IVL_Z) / Den Racine_chap <- as.numeric(Racine_chap) Anc_chap <- Theta_chap * DeltaT + Racine_chap * DeltaU + varAL %*% IVL_Z Anc_chap <- as.vector(Anc_chap) obj$ace <- Anc_chap names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) obj$theta <- Theta_chap # # vraisemblance # mL <- (Racine_chap * UL + Theta_chap * TL) Num <- t(x-mL) %*% invVarLL %*% (x-mL) Num <- as.numeric(Num) sigma2_chap <- Num / (nb.tip-2) obj$sigma <- sqrt(sigma2_chap) VL <- sigma2_chap * varLL invVL <- invVarLL / sigma2_chap LVrais <- - t(x-mL) %*% invVL %*% (x-mL) /2 - nb.tip * log(2*pi)/2 - log(det(VL))/2 obj$LLik <- as.numeric(LVrais) # if (CI) { se <- sqrt((varAA - varAL %*% invVarLL %*% t(varAL))[cbind(1:nb.node, 1:nb.node)]) se <- se * sqrt(sigma2_chap) tmp <- se * qt(0.025, df=nb.tip-2) obj$CI95 <- cbind(lower=obj$ace + tmp, upper=obj$ace - tmp) } obj } reconstruct <- function (x, phyInit, method = "ML", alpha = NULL, low_alpha=0.0001, up_alpha=1, CI = TRUE) { if(!is.null(alpha)) { if(alpha<=0) stop("alpha has to be positive.") } if(up_alpha<=0) stop("alpha has to be positive.") if (!inherits(phyInit, "phylo")) stop("object \"phy\" is not of class \"phylo\"") if (is.null(phyInit$edge.length)) stop("tree has no branch lengths") nN <- phyInit$Nnode nT <- length(x) switch(method, ML = { Intern <- glsBM(phy=phyInit, x=x, CI=F)$ace Value <- c(x, Intern) Hessian <- getMLHessian(Value, phyInit) se <- sqrt(diag(solve(Hessian))) se <- se[-1] tmp <- se*qt(0.025, nN) CI95 <- cbind(lower=Intern+tmp, upper=Intern-tmp) }, REML={ minusLogLik <- function(sig2) { if (sig2 < 0) return(1e+100) V <- sig2 * vcv(phyInit) distval <- stats::mahalanobis(x, center = mu, cov = V) logdet <- sum(log(eigen(V, symmetric = TRUE, only.values = TRUE)$values)) (nT * log(2 * pi) + logdet + distval)/2 } Intern <- glsBM(phy=phyInit, x=x, CI=F)$ace Value <- c(x, Intern) GM <- Intern[1] mu <- rep(GM, nT) out <- nlm(minusLogLik, 1, hessian = FALSE) sigma2 <- out$estimate Hessian <- getREMLHessian(Value, phyInit, sigma2) se <- sqrt(diag(solve(Hessian))) tmp <- se*qt(0.025, nN) CI95 <- cbind(lower=Intern+tmp, upper=Intern-tmp) }, GLS = { result <- glsBM(phy=phyInit, x=x, CI=T) Intern <- result$ace CI95 <- result$CI95 }, GLS_ABM = { result <- glsABM(phy=phyInit, x=x, CI=T) Intern <- result$ace CI95 <- result$CI95 }, GLS_OUS = { if(is.null(alpha)) { funOpt1 <- function(alpha) { -glsOUv1(phy=phyInit, x=x, alpha, CI=F)$LLik } calOp <- optim(par=0.25, fn=funOpt1, method="Brent", lower=low_alpha, upper=up_alpha) if (calOp$convergence == 0) { alpha <- calOp$par } else { stop("Estimation error for alpha") } } result <- glsOUv1(phy=phyInit, x=x, alpha=alpha, CI=T) Intern <- result$ace CI95 <- result$CI95 }, GLS_OU = { if(is.null(alpha)) { funOpt2 <- function(alpha) { -glsOUv2(phy=phyInit, x=x, alpha, CI=F)$LLik } calOp <- optim(par=0.25, fn=funOpt2, method="Brent", lower=low_alpha, upper=up_alpha) if (calOp$convergence == 0) { alpha <- calOp$par } else { stop("Estimation error for alpha") } } result <- glsOUv2(phy=phyInit, x=x, alpha=alpha, CI=T) Intern <- result$ace CI95 <- result$CI95 } ) if (CI==TRUE) list(ace=Intern, CI95=CI95) else list(ace=Intern) } ape/R/coalescent.intervals.R0000644000176200001440000000273014356737452015513 0ustar liggesusers## coalescent.intervals.R (2002-09-12) ## Constructs objects with information on coalescent intervals ## Copyright 2002 Korbinian Strimmer ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. coalescent.intervals <- function(x) UseMethod("coalescent.intervals") # set up coalescent interval object (from NH tree) coalescent.intervals.phylo <- function(x) { if (!inherits(x, "phylo")) stop("object \"x\" is not of class \"phylo\"") # ensure we have a BINARY tree if (!is.binary.phylo(x)) stop("object \"x\" is not a binary tree") # ordered branching times t <- sort(branching.times(x)) lt <- length(t) # interval widths w <- numeric(lt) w[1] <- t[1] for (i in 2:lt) w[i] <- t[i] - t[i - 1] l <- (lt+1):2 # number of lineages obj <- list( lineages=l, interval.length=w, interval.count=lt, total.depth =sum(w)) class(obj) <- "coalescentIntervals" return(obj) } # set up coalescent interval object from vector of interval length coalescent.intervals.default <- function(x) { if (!is.vector(x)) stop("argument \"x\" is not a vector of interval lengths") # x = list of the widths of each interval lt <- length(x) l <- (lt+1):2 # number of lineages at the beginning of each interval obj <- list( lineages=l, interval.length=x, interval.count=lt, total.depth =sum(x)) class(obj) <- "coalescentIntervals" return(obj) } ape/R/read.caic.R0000644000176200001440000000416112465112403013154 0ustar liggesusers## read.caic.R (2005-09-21) ## Read Tree File in CAIC Format ## Copyright 2005 Julien Dutheil ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. read.caic <- function(file, brlen=NULL, skip = 0, comment.char="#", ...) { text <- scan(file = file, what = character(), sep="\n", skip = skip, comment.char = comment.char, ...) # Parse the whole file: n <- length(text) / 2 nodes <- 1:n; leaf.names <- character(n) patterns <- character(n) lengths <- numeric(n) for(i in 1:n) { leaf.names[i] <- text[2*i] patterns[i] <- text[2*i-1] lengths[i] <- nchar(patterns[i]) } # Sort all patterns if not done: i <- order(patterns); leaf.names <- leaf.names[i] patterns <- patterns[i] lengths <- lengths[i] # This inner function compares two patterns: test.patterns <- function(p1, p2) { t1 <- strsplit(p1, split="")[[1]] t2 <- strsplit(p2, split="")[[1]] if(length(t1) == length(t2)) { l <- length(t1) if(l==1) return(TRUE) return(all(t1[1:(l-1)]==t2[1:(l-1)]) & t1[l] != t2[l]) } return(FALSE) } # The main loop: while(length(nodes) > 1) { # Recompute indexes: index <- logical(length(nodes)) maxi <- max(lengths) for(i in 1:length(nodes)) { index[i] <- lengths[i] == maxi } i <- 1 while(i <= length(nodes)) { if(index[i]) { p <- paste("(",nodes[i],sep="") c <- i+1 while(c <= length(nodes) && index[c] && test.patterns(patterns[i], patterns[c])) { p <- paste(p, nodes[c], sep=",") c <- c+1 } if(c-i < 2) stop("Unvalid format.") p <- paste(p, ")", sep="") nodes[i] <- p patterns[i]<- substr(patterns[i],1,nchar(patterns[i])-1) lengths[i] <- lengths[i]-1 nodes <- nodes [-((i+1):(c-1))] lengths <- lengths [-((i+1):(c-1))] patterns <- patterns[-((i+1):(c-1))] index <- index [-((i+1):(c-1))] } i <- i+1 } } # Create a 'phylo' object and return it: phy <- read.tree(text=paste(nodes[1],";", sep="")) phy$tip.label <- leaf.names; if(!is.null(brlen)) { br <- read.table(file=brlen) phy$edge.length <- br[,1] } return(phy) } ape/R/write.phyloXML.R0000644000176200001440000000562614675025204014226 0ustar liggesusers## write.phyloXML.R (2024-06-17) ## Write Tree File in PhyloXML Format ## Copyright 2024 Federico Marotta ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # Save a "phylo" object to a file in phyloXML format # # tree: An object of class "phylo" or "multiPhylo". write.phyloXML <- function(phy, file = "", tree.names = FALSE) { phyloxml <- phylo_to_xml(phy, tree.names) cat(as.character(phyloxml), file = file) } phylo_to_xml <- function(phy, tree.names = FALSE) { if (!requireNamespace("xml2", quietly = TRUE)) { stop("Please install the `xml2` package if you want to write phyloXML files.") } if (inherits(phy, "phylo")) { phy <- c(phy) } n_trees <- length(phy) if (is.null(attr(phy, "TipLabel"))) { for (i in seq_len(n_trees)) { phy[[i]]$tip.label <- checkLabel(phy[[i]]$tip.label) } } else { attr(phy, "TipLabel") <- checkLabel(attr(phy, "TipLabel")) phy <- .uncompressTipLabel(phy) } if (is.logical(tree.names)) { if (tree.names) { tree.names <- if (is.null(names(phy))) { paste0("tree", seq_len(n_trees)) } else { names(phy) } } else { tree.names <- character(n_trees) } } phyloxml <- xml2::xml_new_root("phyloxml", `xmlns:xsi` = "http://www.w3.org/2001/XMLSchema-instance", xmlns = "http://www.phyloxml.org", `xsi:schemaLocation` = "http://www.phyloxml.org http://www.phyloxml.org/1.20/phyloxml.xsd" ) lapply(seq_len(n_trees), function(i) { root_idx <- unique( phy[[i]]$edge[! phy[[i]]$edge[, 1] %in% phy[[i]]$edge[, 2], 1] ) stopifnot(length(root_idx) == 1) clades <- .phylo_to_xml_clades(root_idx, phy[[i]]) if (!is.null(phy[[i]]$root.edge)) { xml2::xml_set_attr(clades, "branch_length", phy[[i]]$root.edge) } phylogeny <- xml2::read_xml("") xml2::xml_set_attr(phylogeny, "rooted", tolower(is.rooted(phy[[i]]))) if (nchar(tree.names[i])) { xml2::xml_add_child(phylogeny, "name", tree.names[i]) } xml2::xml_add_child(phylogeny, clades) xml2::xml_add_child(phyloxml, phylogeny) }) return(phyloxml) } .phylo_to_xml_clades <- function(parent_idx, tree) { parent <- xml2::read_xml("") node_name <- if (parent_idx <= length(tree$tip.label)) { tree$tip.label[parent_idx] } else if (!is.null(tree$node.label)) { tree$node.label[parent_idx - length(tree$tip.label)] } else { parent_idx } xml2::xml_add_child(parent, "name", node_name) which_children <- which(tree$edge[, 1] == parent_idx) lapply(which_children, function(which_child) { child_idx <- tree$edge[which_child, 2] child <- .phylo_to_xml_clades(child_idx, tree) if (!is.null(tree$edge.length)) { branch_length <- tree$edge.length[which_child] xml2::xml_set_attr(child, "branch_length", branch_length) } xml2::xml_add_child(parent, child) }) return(parent) } ape/R/njs.R0000644000176200001440000000325314430325572012145 0ustar liggesusers## njs.R (2023-05-15) ## Tree Reconstruction from Incomplete Distances With NJ* or bio-NJ* ## Copyright 2011-2013 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. njs <- function(X, fs = 15) { if (fs < 1) stop("argument 'fs' must be a non-zero positive integer") if (is.matrix(X)) X <- as.dist(X) X[is.na(X)] <- -1 X[X < 0] <- -1 X[is.nan(X)] <- -1 N <- attr(X, "Size") if (N < 3) stop("cannot build an NJ* tree with less than 3 observations") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_njs, as.double(X), as.integer(N), integer(2*N - 3), integer(2*N - 3), double(2*N - 3), as.integer(fs), NAOK = TRUE) obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } bionjs <- function(X, fs = 15) { if (fs < 1) stop("argument 'fs' must be a non-zero positive integer") if (is.matrix(X)) X <- as.dist(X) X[is.na(X)] <- -1 X[X < 0] <- -1 X[is.nan(X)] <- -1 N <- attr(X, "Size") if (N < 3) stop("cannot build a BIONJ* tree with less than 3 observations") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_bionjs, as.double(X), as.integer(N), integer(2*N - 3), integer(2*N - 3), double(2*N - 3), as.integer(fs), NAOK = TRUE) obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]], tip.label = labels, Nnode = N - 2L) class(obj) <- "phylo" reorder(obj) } ape/R/compar.ou.R0000644000176200001440000000573212465112403013253 0ustar liggesusers## compar.ou.R (2010-11-04) ## Ornstein--Uhlenbeck Model for Continuous Characters ## Copyright 2005-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. compar.ou <- function(x, phy, node = NULL, alpha = NULL) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo".') if (!is.numeric(x)) stop("'x' must be numeric.") if (!is.null(names(x))) { if (all(names(x) %in% phy$tip.label)) x <- x[phy$tip.label] else warning('the names of argument "x" and the tip labels of the tree did not match: the former were ignored in the analysis.') } n <- length(phy$tip.label) root <- n + 1L if (is.null(node)) node <- numeric(0) if (is.character(node)) { if (is.null(phy$node.label)) stop("argument 'node' is character but 'phy' has no node labels") node <- match(node, phy$node.label) + n phy$node.label <- NULL } if (root %in% node) node <- node[-1] bt <- branching.times(phy) Tmax <- bt[1] Wend <- matrix(0, n, length(node) + 1) colnames(Wend) <- c(names(sort(bt[node - n])), as.character(root)) Wstart <- Wend Wstart[, ncol(Wstart)] <- Tmax root2tip <- .Call(seq_root2tip, phy$edge, n, phy$Nnode) for (i in 1:n) { last.change <- names(Tmax) for (j in root2tip[[i]]) { if (j %in% node) { jb <- as.character(j) Wend[i, last.change] <- Wstart[i, jb] <- bt[jb] last.change <- jb } } } W <- cophenetic.phylo(phy) dev <- function(p) { alpha <- p[1] sigma2 <- p[2] if (sigma2 <= 0) return(1e100) theta <- p[-(1:2)] ## fixed a bug below: must be '%*% theta' instead of '* theta' (2010-03-15) M <- rowSums((exp(-alpha * Wend) - exp(-alpha * Wstart)) %*% theta) V <- exp(-alpha * W) * (1 - exp(-2 * alpha * (Tmax - W/2))) R <- chol(V) # correction by Cecile Ane (2010-11-04) n * log(2 * pi * sigma2) + 2 * sum(log(diag(R))) + (t(x - M) %*% chol2inv(R) %*% (x - M)) / sigma2 } out <- if (is.null(alpha)) nlm(function(p) dev(p), p = c(0.1, 1, rep(mean(x), ncol(Wstart))), hessian = TRUE) else nlm(function(p) dev(c(alpha, p)), p = c(1, rep(mean(x), ncol(Wstart))), hessian = TRUE) ## if alpha is estimated it may be that the Hessian matrix has the ## corresponding column and row filled with 0, making solve() fail se <- if (is.null(alpha) && all(out$hessian[1, ] == 0)) c(NA, sqrt(diag(solve(out$hessian[-1, -1])))) else sqrt(diag(solve(out$hessian))) para <- cbind(out$estimate, se) nms <- c("sigma2", paste("theta", 1:ncol(Wstart), sep = "")) if (is.null(alpha)) nms <- c("alpha", nms) dimnames(para) <- list(nms, c("estimate", "stderr")) obj <- list(deviance = out$minimum, para = para, call = match.call()) class(obj) <- "compar.ou" obj } ape/R/is.monophyletic.R0000644000176200001440000000316214204600672014472 0ustar liggesusers## is.monophyletic.R (2022-02-21) ## Test Monophyly ## Copyright 2009-2022 Johan Nylander and Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.monophyletic <- function(phy, tips, reroot = !is.rooted(phy), plot = FALSE, ...) { if (!inherits(phy, "phylo")) stop("object 'phy' is not of class 'phylo'") n <- length(phy$tip.label) ROOT <- n + 1L if (is.numeric(tips)) { if (any(tips > n)) stop("incorrect tip #: should not be greater than the number of tips") tips <- as.integer(tips) } if (is.character(tips)) { tips <- which(phy$tip.label %in% tips) if (anyNA(tips)) stop("some tip label(s) not found in the tree") } tips <- sort(tips) # fix (2020-07-10) if (length(tips) == 1L || length(tips) == n) return(TRUE) if (reroot) { outgrp <- phy$tip.label[-tips][1] phy <- root(phy, outgroup = outgrp, resolve.root = TRUE) rerooted <- TRUE } else rerooted <- FALSE phy <- reorder(phy) seq.nod <- .Call(seq_root2tip, phy$edge, n, phy$Nnode) sn <- seq.nod[tips] newroot <- ROOT i <- 2 repeat { x <- unique(unlist(lapply(sn, "[", i))) if (length(x) != 1) break newroot <- x i <- i + 1 } desc <- which(unlist(lapply(seq.nod, function(x) any(x %in% newroot)))) if (plot) { zoom(phy, tips, subtree = FALSE, ...) if (rerooted) mtext("Input tree arbitrarily rerooted", side = 1, cex = 0.9) } ## assuming that both vectors are sorted: identical(tips, desc) } ape/R/SDM.R0000644000176200001440000001644712725765216012017 0ustar liggesusers## SDM.R (2012-04-02) ## Construction of Consensus Distance Matrix With SDM ## Copyright 2011-2012 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. SDM <- function(...) { st <- list(...) # first half contains matrices, second half s_p k <- length(st)/2 ONEtoK <- seq_len(k) ## make sure we have only matrices: for (i in ONEtoK) st[[i]] <- as.matrix(st[[i]]) ## store the rownames of each matrix in a list because they are often called: ROWNAMES <- lapply(st[ONEtoK], rownames) ## the number of rows of each matrix: NROWS <- lengths(ROWNAMES) tot <- sum(NROWS) labels <- unique(unlist(ROWNAMES)) sp <- unlist(st[k + ONEtoK]) astart <- numeric(tot) # start of aip, astart[p] is start of aip astart[1] <- k for (i in 2:k) astart[i] <- astart[i - 1] + NROWS[i - 1] ## apparently erased by the operation below so no need to initialize: ## a <- mat.or.vec(1, k + tot + k + length(labels)) ## first k are alphas, subsequent ones aip ## each matrix p starting at astart[p], next are ## Lagrange multipliers, miu, niu, lambda in that order n <- length(labels) miustart <- k + tot niustart <- miustart + n lambstart <- niustart + k - 1 X <- matrix(0, n, n, dimnames = list(labels, labels)) V <- w <- X tmp <- 2 * k + tot + n col <- numeric(tmp) # free terms of system for (i in 1:(n - 1)) { for (j in (i + 1):n) { for (p in ONEtoK) { ## d <- st[[p]] # not needed anymore if (is.element(labels[i], ROWNAMES[[p]]) && is.element(labels[j], ROWNAMES[[p]])) { w[i, j] <- w[j, i] <- w[i, j] + sp[p] } } } } ONEtoN <- seq_len(n) Q <- matrix(0, tmp, tmp) ## first decompose first sum in paper for (p in ONEtoK) { d_p <- st[[p]] for (l in ONEtoK) { # first compute coefficients of alphas d <- st[[l]] sum <- 0 dijp <- -1 if (l == p) { # calculate alpha_p for (i in ONEtoN) { for (j in ONEtoN) { # check if {i,j}\subset L_l if (i == j) next # make sure i != j ## d <- st[[l]] # <- moved-up pos <- match(labels[c(i, j)], ROWNAMES[[l]]) # <- returns NA if not in this matrix if (all(!is.na(pos))) { ipos <- pos[1L] jpos <- pos[2L] dij <- d[ipos, jpos] sum <- sum + dij * dij - sp[l] * dij * dij / w[i,j] tmp2 <- dij - sp[l] * dij / w[i,j] Q[p, astart[l] + ipos] <- Q[p, astart[l] + ipos] + tmp2 Q[p, astart[l] + jpos] <- Q[p, astart[l] + jpos] + tmp2 } } } } else { for (i in ONEtoN) { for (j in ONEtoN) { # check if {i,j}\subset L_l if (i == j) next ## d <- st[[l]] # <- moved-up pos <- match(labels[c(i, j)], ROWNAMES[[l]]) posp <- match(labels[c(i, j)], ROWNAMES[[p]]) if (all(!is.na(pos)) && all(!is.na(posp))) { ipos <- pos[1L] jpos <- pos[2L] dij <- d[ipos, jpos] dijp <- d_p[posp[1L], posp[2L]] sum <- sum - sp[l] * dij * dijp / w[i, j] tmp2 <- sp[l] * dijp / w[i, j] Q[p,astart[l] + ipos] <- Q[p, astart[l] + ipos] - tmp2 Q[p,astart[l] + jpos] <- Q[p, astart[l] + jpos] - tmp2 } } } } Q[p, l] <- sum } Q[p, lambstart + 1] <- 1 } r <- k for (p in ONEtoK) { dp <- st[[p]] for (i in ONEtoN) { if (is.element(labels[i], ROWNAMES[[p]])) { r <- r + 1 for (l in ONEtoK) { d <- st[[l]] if (l == p) { ipos <- match(labels[i], ROWNAMES[[p]]) for (j in ONEtoN) { if (i == j) next jpos <- match(labels[j], ROWNAMES[[p]]) if (!is.na(jpos)) { dij <- d[ipos, jpos] Q[r, l] <- Q[r, l] + dij - sp[l] * dij / w[i, j] tmp2 <- 1 - sp[l] / w[i, j] Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] + tmp2 Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] + tmp2 } } } else { for (j in ONEtoN) { if (i == j) next if (!is.element(labels[j], rownames(dp))) next pos <- match(labels[c(i, j)], ROWNAMES[[l]]) if (all(!is.na(pos))) { ipos <- pos[1L] jpos <- pos[2L] dij <- d[ipos, jpos] Q[r, l] <- Q[r, l] - sp[l] * dij / w[i, j] tmp2 <- sp[l]/w[i, j] Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] - tmp2 Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] - tmp2 } } } } if (p < k) Q[r, ] <- Q[r, ] * sp[p] Q[r, miustart + i] <- 1 if (p < k) Q[r, niustart + p] <- 1 } } } r <- r + 1 col[r] <- k Q[r, ONEtoK] <- 1 ## for (i in 1:k) Q[r, i] <- 1 for (i in ONEtoN) { r <- r + 1 for (p in ONEtoK) { ## d <- st[[p]] # not needed ipos <- match(labels[i], ROWNAMES[[p]]) if (!is.na(ipos)) Q[r, astart[p] + ipos] <- 1 } } for (p in 1:(k - 1)) { r <- r + 1 for (i in ONEtoN) { ## d <- st[[p]] ipos <- match(labels[i], ROWNAMES[[p]]) if (!is.na(ipos)) Q[r, astart[p] + ipos] <- 1 } } a <- solve(Q, col, 1e-19) for (i in ONEtoN) { for (j in ONEtoN) { if (i == j) { X[i, j] <- V[i, j] <- 0 next } sum <- 0 sumv <- 0 for (p in ONEtoK) { d <- st[[p]] pos <- match(labels[c(i, j)], ROWNAMES[[p]]) if (all(!is.na(pos))) { ipos <- pos[1L] jpos <- pos[2L] dij <- d[ipos, jpos] sum <- sum + sp[p] * (a[p] * dij + a[astart[p] + ipos] + a[astart[p] + jpos]) sumv <- sumv + sp[p] * (a[p] * dij)^2 } } X[i, j] <- sum / w[i, j] V[i, j] <- sumv / (w[i, j])^2 } } list(X, V) } ape/R/mrca.R0000644000176200001440000000643113136640222012270 0ustar liggesusers## mrca.R (2017-07-28) ## Find Most Recent Common Ancestors Between Pairs ## Copyright 2005-2017 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. mrca <- function(phy, full = FALSE) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') ## Get all clades: n <- length(phy$tip.label) m <- phy$Nnode phy <- reorder.phylo(phy, "postorder") BP <- bipartition2(phy$edge, n) N <- n + m ROOT <- n + 1L ## In the following matrix, numeric indexing will be used: M <- numeric(N * N) dim(M) <- c(N, N) e1 <- phy$edge[, 1] e2 <- phy$edge[, 2] ## We start at the root: next.node <- ROOT while (length(next.node)) { tmp <- numeric(0) for (anc in next.node) { ## Find the branches which `anc' is the ancestor...: id <- which(e1 == anc) ## ... and get their descendants: desc <- e2[id] ## `anc' is itself the MRCA of its direct descendants: M[anc, desc] <- M[desc, anc] <- anc ## Find all 2-by-2 combinations of `desc': `anc' ## is their MRCA: for (i in 1:length(desc)) M[cbind(desc[i], desc[-i])] <- anc ## If one element of `desc' is a node, then the tips it ## leads to and the other elements of `desc' have also ## `anc' as MRCA! for (i in 1:length(desc)) { if (desc[i] < ROOT) next ## (get the tips:) tips <- BP[[desc[i] - n]] ## Same thing for the nodes... node.desc <- numeric(0) for (k in 1:m) { if (k == desc[i] - n) next ## If the clade of the current node is a ## subset of desc[i], then it is one of its ## descendants: if (all(BP[[k]] %in% tips)) node.desc <- c(node.desc, k) } ## all nodes and tips which are descendants of ## `desc[i]': ALLDESC <- c(tips, node.desc + n) M[ALLDESC, desc[-i]] <- M[desc[-i], ALLDESC] <- anc for (j in 1:length(desc)) { if (j == i || desc[j] < ROOT) next tips2 <- BP[[desc[j] - n]] node.desc <- numeric(0) for (k in 1:m) { if (k == desc[j] - n) next if (all(BP[[k]] %in% tips2)) node.desc <- c(node.desc, k) } ALLDESC2 <- c(tips2, node.desc + n) M[ALLDESC, ALLDESC2] <- M[ALLDESC2, ALLDESC] <- anc } ## `anc' is also the MRCA of itself and its descendants: M[ALLDESC, anc] <- M[anc, ALLDESC] <- anc } ## When it is done, `desc' i stored to become ## the new `next.node', if they are nodes: tmp <- c(tmp, desc[desc > n]) } next.node <- tmp } M[cbind(1:N, 1:N)] <- 1:N if (full) dimnames(M)[1:2] <- list(as.character(1:N)) else { M <- M[1:n, 1:n] dimnames(M)[1:2] <- list(phy$tip.label) } M } ape/R/clustal.R0000644000176200001440000003035414254545714013032 0ustar liggesusers## clustal.R (2022-06-22) ## Multiple Sequence Alignment with External Applications ## Copyright 2011-2022 Emmanuel Paradis, 2018 Franz Krah ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .errorAlignment <- function(exec, prog) { dirs <- strsplit(Sys.getenv("PATH"), .Platform$path.sep)[[1]] paste0("\n cannot find executable ", sQuote(exec), " on your computer.\n", " It is recommended that you place the executable of ", prog, "\n", " in a directory on the PATH of your computer which is:\n", paste(sort(dirs), collapse = "\n")) } clustalomega <- function (x, y, guide.tree, exec = NULL, MoreArgs = "", quiet = TRUE, original.ordering = TRUE, file) { os <- Sys.info()[1] if (is.null(exec)) { exec <- switch(os, Linux = "clustalo", Darwin = "clustalo", Windows = "clustalo.exe") } if (missing(x)) { out <- system(paste(exec, "-h")) if (out == 127) stop(.errorAlignment(exec, "Clustal-Omega")) return(invisible(NULL)) } type <- if (inherits(x, "DNAbin")) "DNA" else "AA" if (type == "AA" && !inherits(x, "AAbin")) stop("'x' should be of class \"DNAbin\" or \"AAbin\"") noy <- missing(y) fns <- character(4) for (i in 1:3) fns[i] <- tempfile(pattern = "clustal", tmpdir = tempdir(), fileext = ".fas") fns[4] <- tempfile(pattern = "guidetree", tmpdir = tempdir(), fileext = ".nwk") unlink(fns[file.exists(fns)]) x <- as.list(x) labels.bak <- names(x) names(x) <- paste0("Id", 1:length(x)) write.FASTA(x, fns[1]) if (noy) { opts <- paste("-i", fns[1], "-o", fns[3], "--force") ## add input guide tree if (!missing(guide.tree)) { if (!inherits(guide.tree, "phylo")) stop("object 'guide.tree' is not of class \"phylo\"") if (length(setdiff(labels.bak, guide.tree$tip.label))) stop("guide tree does not match sequence names") guide.tree$tip.label[match(guide.tree$tip.label, labels.bak)] <- names(x) if (!is.binary(guide.tree)) guide.tree <- multi2di(guide.tree) if (is.null(guide.tree$edge.length)) guide.tree$edge.length <- rep(1, Nedge(guide.tree)) write.tree(guide.tree, fns[4]) opts <- paste(opts, paste("--guidetree-in", fns[4])) } } else { y <- as.list(y) labels.baky <- names(y) names(y) <- paste0("Id", length(x) + 1:length(y)) write.FASTA(y, fns[2]) if (length(y) == 1) { opts <- paste("-i", fns[1],"--profile1", fns[2], "-o", fns[3], "--force") } else { opts <- paste("--profile1", fns[1],"--profile2", fns[2], "-o", fns[3], "--force") } } opts <- paste(opts, MoreArgs) if (!quiet) opts <- paste(opts, "-v") out <- system(paste(exec, opts), ignore.stdout = quiet) if (out == 127) stop(.errorAlignment(exec, "Clustal-Omega")) res <- as.matrix(read.FASTA(fns[3], type)) if (noy) { if (original.ordering) res <- res[labels(x), ] rownames(res) <- labels.bak } else { if (original.ordering) res <- res[c(labels(x), labels(y)), ] rownames(res) <- c(labels.bak, labels.baky) } unlink(fns[file.exists(fns)]) if (missing(file)) return(res) else write.FASTA(res, file) } clustal <- function(x, y, guide.tree, pw.gapopen = 10, pw.gapext = 0.1, gapopen = 10, gapext = 0.2, exec = NULL, MoreArgs = "", quiet = TRUE, original.ordering = TRUE, file) { os <- Sys.info()[1] if (is.null(exec)) { exec <- switch(os, Linux = "clustalw", Darwin = "clustalw2", Windows = "clustalw2.exe") } if (missing(x)) { out <- system(paste(exec, "-help")) if (out == 127) stop(.errorAlignment(exec, "Clustal")) return(invisible(NULL)) } type <- if (inherits(x, "DNAbin")) "DNA" else "AA" if (type == "AA" && !inherits(x, "AAbin")) stop("'x' should be of class \"DNAbin\" or \"AAbin\"") noy <- missing(y) fns <- character(4) for (i in 1:3) fns[i] <- tempfile(pattern = "clustal", tmpdir = tempdir(), fileext = ".fas") fns[4] <- tempfile(pattern = "guidetree", tmpdir = tempdir(), fileext = ".nwk") unlink(fns[file.exists(fns)]) x <- as.list(x) labels.bak <- names(x) names(x) <- paste0("Id", 1:length(x)) write.FASTA(x, fns[1]) if (noy) { prefix <- c("-INFILE", "-PWGAPOPEN", "-PWGAPEXT", "-GAPOPEN","-GAPEXT", "-OUTFILE","-OUTPUT") suffix <- c(fns[1], pw.gapopen, pw.gapext, gapopen, gapext, fns[3], "FASTA") ## add input guide tree if (!missing(guide.tree)) { if (!inherits(guide.tree, "phylo")) stop("object 'guide.tree' is not of class \"phylo\"") if (length(setdiff(labels.bak, guide.tree$tip.label))) stop("guide tree does not match sequence names") guide.tree$tip.label[match(guide.tree$tip.label, labels.bak)] <- names(x) if (!is.binary(guide.tree)) guide.tree <- multi2di(guide.tree) if (is.null(guide.tree$edge.length)) guide.tree$edge.length <- rep(1, Nedge(guide.tree)) write.tree(guide.tree, fns[4]) prefix <- c(prefix, "-USETREE") suffix <- c(suffix, fns[4]) } } else { y <- as.list(y) labels.baky <- names(y) names(y) <- paste0("Id", length(x) + 1:length(y)) write.FASTA(y, fns[2]) prefix <- c("-PROFILE1", "-PROFILE2", "-PWGAPOPEN", "-PWGAPEXT", "-GAPOPEN","-GAPEXT", "-OUTFILE","-OUTPUT") suffix <- c(fns[1], fns[2], pw.gapopen, pw.gapext, gapopen, gapext, fns[3], "FASTA") } opts <- paste(prefix, suffix, sep = "=", collapse = " ") opts <- paste(opts, MoreArgs) out <- system(paste(exec, opts), ignore.stdout = quiet) if (out == 127) stop(.errorAlignment(exec, "Clustal")) res <- as.matrix(read.FASTA(fns[3], type)) if (noy) { if (original.ordering) res <- res[labels(x), ] rownames(res) <- labels.bak } else { if (original.ordering) res <- res[c(labels(x), labels(y)), ] rownames(res) <- c(labels.bak, labels.baky) } unlink(fns[file.exists(fns)]) if (missing(file)) return(res) else write.FASTA(res, file) } muscle <- function (x, y, guide.tree, exec = "muscle", MoreArgs = "", quiet = TRUE, original.ordering = TRUE, file) { if (missing(x)) { out <- system(exec) if (out == 127) stop(.errorAlignment(exec, "MUSCLE")) return(invisible(NULL)) } type <- if (inherits(x, "DNAbin")) "DNA" else "AA" if (type == "AA" && !inherits(x, "AAbin")) stop("'x' should be of class \"DNAbin\" or \"AAbin\"") noy <- missing(y) ## Produce TEMP files fns <- character(4) for (i in 1:3) fns[i] <- tempfile(pattern = "muscle", tmpdir = tempdir(), fileext = ".fas") fns[4] <- tempfile(pattern = "guidetree", tmpdir = tempdir(), fileext = ".nwk") unlink(fns[file.exists(fns)]) ## Write input sequences x to file x <- as.list(x) labels.bak <- names(x) names(x) <- paste0("Id", 1:length(x)) write.FASTA(x, fns[1]) ## Run muscle for X if (noy) { opts <- paste("-in", fns[1], "-out", fns[3]) ## add input guide tree if (!missing(guide.tree)) { if (!inherits(guide.tree, "phylo")) stop("object 'guide.tree' is not of class \"phylo\"") if (length(setdiff(labels.bak, guide.tree$tip.label))) stop("guide tree does not match sequence names") guide.tree$tip.label[match(guide.tree$tip.label, labels.bak)] <- names(x) if (!is.binary(guide.tree)) guide.tree <- multi2di(guide.tree) if (is.null(guide.tree$edge.length)) guide.tree$edge.length <- rep(1, Nedge(guide.tree)) write.tree(guide.tree, fns[4]) opts <- paste(opts, paste("-usetree_nowarn", fns[4])) } } else { y <- as.list(y) labels.baky <- names(y) names(y) <- paste0("Id", length(x) + 1:length(y)) write.FASTA(y, fns[2]) opts <- paste("-profile", "-in1", fns[1],"-in2", fns[2], "-out", fns[3]) } if (quiet) opts <- paste(opts, "-quiet") opts <- paste(opts, MoreArgs) out <- system(paste(exec, opts)) if (out == 127) stop(.errorAlignment(exec, "MUSCLE")) res <- as.matrix(read.FASTA(fns[3], type)) if (noy) { if (original.ordering) res <- res[labels(x), ] rownames(res) <- labels.bak } else { if (original.ordering) res <- res[c(labels(x), labels(y)), ] rownames(res) <- c(labels.bak, labels.baky) } unlink(fns[file.exists(fns)]) if (missing(file)) return(res) else write.FASTA(res, file) } tcoffee <- function(x, exec = "t_coffee", MoreArgs = "", quiet = TRUE, original.ordering = TRUE) { if (missing(x)) { out <- system(exec) if (out == 127) stop(.errorAlignment(exec, "T-Coffee")) return(invisible(NULL)) } x <- as.list(x) labels.bak <- names(x) names(x) <- paste0("Id", 1:length(x)) d <- tempdir() od <- setwd(d) on.exit(setwd(od)) inf <- "input_tcoffee.fas" write.dna(x, inf, "fasta") opts <- paste(inf, MoreArgs) if (quiet) opts <- paste(opts, "-quiet=nothing") out <- system(paste(exec, opts)) if (out == 127) stop(.errorAlignment(exec, "T-Coffee")) res <- read.dna("input_tcoffee.aln", "clustal") if (original.ordering) res <- res[labels(x), ] rownames(res) <- labels.bak res } ## not called so far ##.getMUSCLEversion <- function(exec) ##{ ## o <- system2(exec, "-version", stdout = TRUE) ## ver <- 0L ## if (any(grepl("3\\.", o))) ver <- 3L ## if (any(grepl("5\\.", o))) ver <- 5L ## ver ##} .write.efa <- function(x, file) { N <- length(x) HDR <- names(x) if (is.null(HDR)) HDR <- paste("alignment", 1:N, sep = "_") for (i in 1:N) { hdr <- paste0("<", HDR[i]) write.FASTA(x[[i]], file, hdr, TRUE) } } muscle5 <- function(x, exec = "muscle", MoreArgs = "", quiet = FALSE, file, super5 = FALSE, mc.cores = 1) { if (missing(x)) { out <- system(exec) if (out == 127) stop(.errorAlignment(exec, "MUSCLE")) return(invisible(NULL)) } type <- if (inherits(x, "DNAbin")) "DNA" else "AA" if (type == "AA" && !inherits(x, "AAbin")) stop("'x' should be of class \"DNAbin\" or \"AAbin\"") ## Write input sequences x to file x <- as.list(x) labels.bak <- names(x) names(x) <- paste0("Id", 1:length(x)) ifl <- tempfile() ofl <- tempfile() on.exit(unlink(c(ifl, ofl))) write.FASTA(x, ifl) args <- paste(ifelse(super5, "-super5", "-align"), ifl, "-output", ofl, "-threads", mc.cores) if (MoreArgs != "") args <- paste(args, MoreArgs) if (!quiet) quiet <- "" o <- system2(exec, args, stdout = quiet, stderr = quiet) res <- read.FASTA(ofl, type) ## original.ordering is always TRUE res <- res[labels(x)] names(res) <- labels.bak if (missing(file)) return(as.matrix(res)) else write.FASTA(res, file) } efastats <- function(X, exec = "muscle", quiet = FALSE) { N <- length(X) ifl <- tempfile() ofl <- tempfile() on.exit(unlink(c(ifl, ofl))) .write.efa(X, ifl) args <- paste("-efastats", ifl, "-log", ofl) o <- system2(exec, args, stdout = TRUE, stderr = TRUE) if (!quiet) cat(scan(ofl, what = "", n = 1L, skip = 5L + N, sep = "\n", quiet = TRUE), sep = "\n") read.table(ofl, header = TRUE, skip = 4, nrows = N) } letterconf <- function(X, exec = "muscle") { N <- length(X) ifl <- tempfile() rfl <- tempfile() ofl <- tempfile() hfl <- tempfile() on.exit(unlink(c(ifl, rfl, ofl))) .write.efa(X, ifl) write.FASTA(X[[1]], rfl) args <- paste("-letterconf", ifl, "-ref", rfl, "-output", ofl, "-html", hfl) o <- system2(exec, args, stdout = TRUE, stderr = TRUE) browseURL(hfl) } ape/R/dist.gene.R0000644000176200001440000000306112465112403013221 0ustar liggesusers## dist.gene.R (2012-04-02) ## Pairwise Distances from Genetic Data ## Copyright 2002-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. dist.gene <- function(x, method = "pairwise", pairwise.deletion = FALSE, variance = FALSE) { if (is.data.frame(x)) x <- as.matrix(x) else { # suggestion by Markus Schlegel if (!is.matrix(x)) stop("'x' should be a matrix or a data.frame") } method <- match.arg(method, c("pairwise", "percentage")) if (!pairwise.deletion) { ## delete the columns with at least one NA: del <- apply(x, 2, function(xx) any(is.na(xx))) x <- x[, !del] } n <- dim(x) L <- n[2] n <- n[1] D <- double(n * (n - 1)/2) if (pairwise.deletion) L <- D k <- 1L for (i in 1:(n - 1)) { for (j in (i + 1):n) { y <- x[i, ] != x[j, ] if (pairwise.deletion) L[k] <- sum(!is.na(y)) D[k] <- sum(y, na.rm = TRUE) k <- k + 1L } } ## L is either a single integer value if pairwise.deletion = FALSE, ## or a vector of integers if pairwise.deletion = TRUE if (method == "percentage") D <- D/L attr(D, "Size") <- n attr(D, "Labels") <- dimnames(x)[[1]] attr(D, "Diag") <- attr(D, "Upper") <- FALSE attr(D, "call") <- match.call() attr(D, "method") <- method class(D) <- "dist" if (variance) { y <- if (method == "pairwise") L else 1 attr(D, "variance") <- D * (y - D)/L } D } ape/R/as.phylo.formula.R0000644000176200001440000000363314533610710014551 0ustar liggesusers## as.phylo.formula.R (2018-09-17) ## Conversion from Taxonomy Variables to Phylogenetic Trees ## Copyright 2005-2018 Julien Dutheil, 2018 Eric Marcon ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. as.phylo.formula <- function(x, data = parent.frame(), collapse = TRUE, ...) { ## Testing formula syntax: err <- "Formula must be of the kind ~A1/A2/.../An." if (length(x) != 2) stop(err) if (x[[1]] != "~") stop(err) f <- x[[2]] taxo <- list() while (length(f) == 3) { if (f[[1]] != "/") stop(err) f3.txt <- deparse(f[[3]]) if (!is.factor(data[[f3.txt]])) stop(paste("Variable", f3.txt, "must be a factor")) taxo[[f3.txt]] <- data[[f3.txt]] if (length(f) > 1) f <- f[[2]] } f.txt <- deparse(f) if (!is.factor(data[[f.txt]])) stop(paste("Variable", f.txt, "must be a factor.")) taxo[[f.txt]] <- data[[f.txt]] taxo.data <- as.data.frame(taxo) leaves.names <- as.character(taxo.data[, 1]) taxo.data[, 1] <- 1:nrow(taxo.data) ## Now builds the phylogeny: f.rec <- function(subtaxo) { # Recurrent utility function u <- ncol(subtaxo) levels <- unique(subtaxo[,u]) if (u == 1) { if (length(levels) != nrow(subtaxo)) warning("leaves names are not unique.") return(as.character(subtaxo[, 1])) } t <- character(length(levels)) for (l in 1:length(levels)) { x <- f.rec(subtaxo[subtaxo[,u] == levels[l], ][1:(u - 1)]) t[l] <- paste0("(", paste(x, collapse=","), ")", as.character(levels[l])) } t } string <- paste0("(", paste(f.rec(taxo.data), collapse = ","), ");") phy <- read.tree(text = string) if (collapse) phy <- collapse.singles(phy) phy$tip.label <- leaves.names[as.numeric(phy$tip.label)] phy } ape/R/additive.R0000644000176200001440000000173312465112403013136 0ustar liggesusers## additive.R (2013-10-04) ## Incomplete Distance Matrix Filling ## Copyright 2011-2013 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. additive <- function(X) { if (is.matrix(X)) X <- as.dist(X) X[is.na(X)] <- -1 X[X < 0] <- -1 X[is.nan(X)] <- -1 N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) m <- sum(X == -1) ans <- .C(C_additive, as.double(X), as.integer(N), as.integer(m), double(N*N)) matrix(ans[[4]], N, N) } ultrametric <- function(X) { if (is.matrix(X)) X <- as.dist(X) X[is.na(X)] <- -1 X[X < 0] <- -1 X[is.nan(X)] <- -1 N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) m <- sum(X == -1) ans <- .C(C_ultrametric, as.double(X), as.integer(N), as.integer(m), double(N*N)) matrix(ans[[4]], N, N) } ape/R/branching.times.R0000644000176200001440000000171413227431045014422 0ustar liggesusers## branching.times.R (2018-01-16) ## Branching Times of a Phylogenetic Tree ## Copyright 2002-2018 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. branching.times <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') phy <- reorder(phy) n <- length(phy$tip.label) e1 <- phy$edge[, 1] e2 <- phy$edge[, 2] EL <- phy$edge.length if (is.null(EL)) { warning("no branch length in tree") return(numeric()) } N <- length(e1) xx <- numeric(phy$Nnode) interns <- which(e2 > n) ## we loop only on the internal edges, this assumes ## that `xx' is already set with 0 for (i in interns) xx[e2[i] - n] <- xx[e1[i] - n] + EL[i] depth <- xx[e1[N] - n] + EL[N] xx <- depth - xx names(xx) <- if (is.null(phy$node.label)) (n + 1):(n + phy$Nnode) else phy$node.label xx } ape/R/ace.R0000644000176200001440000003600714674530552012114 0ustar liggesusers## ace.R (2024-09-24) ## Ancestral Character Estimation ## Copyright 2005-2024 Emmanuel Paradis and 2005 Ben Bolker ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .getSEs <- function(out) { h <- out$hessian if (any(diag(h) == 0)) { warning("The likelihood gradient seems flat in at least one dimension (gradient null):\ncannot compute the standard-errors of the transition rates.\n") se <- rep(NaN, nrow(h)) } else { se <- sqrt(diag(solve(h))) } se } ace <- function(x, phy, type = "continuous", method = if (type == "continuous") "REML" else "ML", CI = TRUE, model = if (type == "continuous") "BM" else "ER", scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1, use.expm = FALSE, use.eigen = TRUE, marginal = FALSE) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') if (is.null(phy$edge.length)) stop("tree has no branch lengths") type <- match.arg(type, c("continuous", "discrete")) nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode if (nb.node != nb.tip - 1) stop('"phy" is not rooted AND fully dichotomous.') if (length(x) != nb.tip) stop("length of phenotypic and of phylogenetic data do not match.") if (!is.null(names(x))) { if(all(names(x) %in% phy$tip.label)) x <- x[phy$tip.label] else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.") } obj <- list() if (kappa != 1) phy$edge.length <- phy$edge.length^kappa if (type == "continuous") { switch(method, "REML" = { minusLogLik <- function(sig2) { if (sig2 < 0) return(1e100) V <- sig2 * vcv(phy) ## next three lines borrowed from dmvnorm() in 'mvtnorm' distval <- mahalanobis(x, center = mu, cov = V) logdet <- sum(log(eigen(V, symmetric = TRUE, only.values = TRUE)$values)) (nb.tip * log(2 * pi) + logdet + distval)/2 } mu <- rep(ace(x, phy, method="pic")$ace[1], nb.tip) out <- nlm(minusLogLik, 1, hessian = TRUE) sigma2 <- out$estimate se_sgi2 <- sqrt(1/out$hessian) tip <- phy$edge[, 2] <= nb.tip minus.REML.BM <- function(p) { x1 <- p[phy$edge[, 1] - nb.tip] x2 <- numeric(length(x1)) x2[tip] <- x[phy$edge[tip, 2]] x2[!tip] <- p[phy$edge[!tip, 2] - nb.tip] -(-sum((x1 - x2)^2/phy$edge.length)/(2 * sigma2) - nb.node * log(sigma2)) } out <- nlm(function(p) minus.REML.BM(p), p = rep(mu[1], nb.node), hessian = TRUE) obj$resloglik <- -out$minimum obj$ace <- out$estimate names(obj$ace) <- nb.tip + 1:nb.node obj$sigma2 <- c(sigma2, se_sgi2) if (CI) { se <- .getSEs(out) tmp <- se * qt(0.025, nb.node) obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } }, "pic" = { if (model != "BM") stop('the "pic" method can be used only with model = "BM".') ## See pic.R for some annotations. phy <- reorder(phy, "postorder") phenotype <- numeric(nb.tip + nb.node) phenotype[1:nb.tip] <- if (is.null(names(x))) x else x[phy$tip.label] contr <- var.con <- numeric(nb.node) ans <- .C(C_pic, as.integer(nb.tip), as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]), as.double(phy$edge.length), as.double(phenotype), as.double(contr), as.double(var.con), as.integer(CI), as.integer(scaled)) obj$ace <- ans[[5]][-(1:nb.tip)] names(obj$ace) <- nb.tip + 1:nb.node if (CI) { se <- sqrt(ans[[7]]) tmp <- se * qnorm(0.025) obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } }, "ML" = { if (model == "BM") { tip <- phy$edge[, 2] <= nb.tip dev.BM <- function(p) { if (p[1] < 0) return(1e100) # in case sigma^2 is negative x1 <- p[-1][phy$edge[, 1] - nb.tip] x2 <- numeric(length(x1)) x2[tip] <- x[phy$edge[tip, 2]] x2[!tip] <- p[-1][phy$edge[!tip, 2] - nb.tip] -2 * (-sum((x1 - x2)^2/phy$edge.length)/(2*p[1]) - nb.node * log(p[1])) } out <- nlm(function(p) dev.BM(p), p = c(1, rep(mean(x), nb.node)), hessian = TRUE) obj$loglik <- -out$minimum / 2 obj$ace <- out$estimate[-1] names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) se <- .getSEs(out) obj$sigma2 <- c(out$estimate[1], se[1]) if (CI) { tmp <- se[-1] * qt(0.025, nb.node) obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } } }, "GLS" = { if (is.null(corStruct)) stop('you must give a correlation structure if method = "GLS".') if (class(corStruct)[1] == "corMartins") M <- corStruct[1] * dist.nodes(phy) if (class(corStruct)[1] == "corGrafen") phy <- compute.brlen(attr(corStruct, "tree"), method = "Grafen", power = exp(corStruct[1])) if (class(corStruct)[1] %in% c("corBrownian", "corGrafen")) { dis <- dist.nodes(attr(corStruct, "tree")) MRCA <- mrca(attr(corStruct, "tree"), full = TRUE) M <- dis[as.character(nb.tip + 1), MRCA] dim(M) <- rep(sqrt(length(M)), 2) } one2n <- 1:nb.tip varAY <- M[-one2n, one2n] varA <- M[-one2n, -one2n] DF <- data.frame(x) V <- corMatrix(Initialize(corStruct, DF), corr = FALSE) invV <- solve(V) o <- gls(x ~ 1, DF, correlation = corStruct) GM <- o$coefficients obj$ace <- drop(varAY %*% invV %*% (x - GM) + GM) names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) if (CI) { se <- sqrt((varA - varAY %*% invV %*% t(varAY))[cbind(1:nb.node, 1:nb.node)]) tmp <- se * qnorm(0.025) obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } }) } else { # type == "discrete" if (method != "ML") stop("only ML estimation is possible for discrete characters.") if (any(phy$edge.length < 0)) stop("some branches have negative length") if (!is.factor(x)) x <- factor(x) nl <- nlevels(x) lvls <- levels(x) x <- as.integer(x) if (is.character(model)) { rate <- matrix(NA, nl, nl) switch(model, "ER" = np <- rate[] <- 1, "ARD" = { np <- nl*(nl - 1) rate[col(rate) != row(rate)] <- 1:np }, "SYM" = { np <- nl * (nl - 1)/2 sel <- col(rate) < row(rate) rate[sel] <- 1:np rate <- t(rate) rate[sel] <- 1:np }) } else { if (ncol(model) != nrow(model)) stop("the matrix given as 'model' is not square") if (ncol(model) != nl) stop("the matrix 'model' must have as many rows as the number of categories in 'x'") rate <- model np <- max(rate) } index.matrix <- rate tmp <- cbind(1:nl, 1:nl) index.matrix[tmp] <- NA rate[tmp] <- 0 rate[rate == 0] <- np + 1 # to avoid 0's since we will use this as numeric indexing liks <- matrix(0, nb.tip + nb.node, nl) TIPS <- 1:nb.tip liks[cbind(TIPS, x)] <- 1 if (anyNA(x)) liks[which(is.na(x)), ] <- 1 phy <- reorder(phy, "postorder") Q <- matrix(0, nl, nl) e1 <- phy$edge[, 1] e2 <- phy$edge[, 2] EL <- phy$edge.length if (use.eigen) { dev <- function(p, output.liks = FALSE) { if (any(is.nan(p)) || any(is.infinite(p))) return(1e+50) comp <- numeric(nb.tip + nb.node) Q[] <- c(p, 0)[rate] diag(Q) <- -rowSums(Q) decompo <- eigen(Q) lambda <- decompo$values GAMMA <- decompo$vectors invGAMMA <- solve(GAMMA) for (i in seq(from = 1, by = 2, length.out = nb.node)) { j <- i + 1L anc <- e1[i] des1 <- e2[i] des2 <- e2[j] v.l <- GAMMA %*% diag(exp(lambda * EL[i])) %*% invGAMMA %*% liks[des1, ] v.r <- GAMMA %*% diag(exp(lambda * EL[j])) %*% invGAMMA %*% liks[des2, ] v <- v.l * v.r comp[anc] <- sum(v) liks[anc, ] <- v/comp[anc] } if (output.liks) return(liks[-TIPS, , drop = FALSE]) dev <- -2 * sum(log(comp[-TIPS])) if (is.na(dev)) Inf else dev } } else { if (!requireNamespace("expm", quietly = TRUE) && use.expm) { warning("package 'expm' not available; using function 'matexpo' from 'ape'") use.expm <- FALSE } E <- if (use.expm) expm::expm # to avoid Matrix::expm else matexpo dev <- function(p, output.liks = FALSE) { if (any(is.nan(p)) || any(is.infinite(p))) return(1e50) comp <- numeric(nb.tip + nb.node) # from Rich FitzJohn Q[] <- c(p, 0)[rate] diag(Q) <- -rowSums(Q) for (i in seq(from = 1, by = 2, length.out = nb.node)) { j <- i + 1L anc <- e1[i] des1 <- e2[i] des2 <- e2[j] v.l <- E(Q * EL[i]) %*% liks[des1, ] v.r <- E(Q * EL[j]) %*% liks[des2, ] v <- v.l * v.r comp[anc] <- sum(v) liks[anc, ] <- v/comp[anc] } if (output.liks) return(liks[-TIPS, , drop = FALSE]) dev <- -2 * sum(log(comp[-TIPS])) if (is.na(dev)) Inf else dev } } out <- nlminb(rep(ip, length.out = np), function(p) dev(p), lower = rep(0, np), upper = rep(1e50, np)) obj$loglik <- -out$objective/2 obj$rates <- out$par oldwarn <- options("warn") options(warn = -1) out.nlm <- try(nlm(function(p) dev(p), p = obj$rates, iterlim = 1, stepmax = 0, hessian = TRUE), silent = TRUE) options(oldwarn) obj$se <- if (inherits(out.nlm, "try-error")) { warning("model fit suspicious: gradients apparently non-finite") rep(NaN, np) } else .getSEs(out.nlm) obj$index.matrix <- index.matrix if (CI) { lik.anc <- dev(obj$rates, TRUE) if (!marginal) { Q[] <- c(obj$rates, 0)[rate] diag(Q) <- -rowSums(Q) for (i in seq(to = 1, by = -2, length.out = nb.node)) { anc <- e1[i] - nb.tip des1 <- e2[i] - nb.tip if (des1 > 0) { P <- matexpo(Q * EL[i]) tmp <- lik.anc[anc, ] / (lik.anc[des1, ] %*% P) lik.anc[des1, ] <- (tmp %*% P) * lik.anc[des1, ] } j <- i + 1L des2 <- e2[j] - nb.tip if (des2 > 0) { P <- matexpo(Q * EL[j]) tmp <- lik.anc[anc, ] / (lik.anc[des2, ] %*% P) lik.anc[des2, ] <- (tmp %*% P) * lik.anc[des2, ] } lik.anc <- lik.anc / rowSums(lik.anc) } } rownames(lik.anc) <- nb.tip + 1:nb.node colnames(lik.anc) <- lvls obj$lik.anc <- lik.anc } } ## edited from Thomas G (PR #106 and PR #127): if (!is.null(phy$node.label)) { if (!is.null(obj$ace)) names(obj$ace) <- phy$node.label if (!is.null(obj$CI95)) rownames(obj$CI95) <- phy$node.label if (!is.null(obj$lik.anc)) rownames(obj$lik.anc) <- phy$node.label } obj$call <- match.call() class(obj) <- "ace" obj } logLik.ace <- function(object, ...) object$loglik deviance.ace <- function(object, ...) -2*object$loglik AIC.ace <- function(object, ..., k = 2) { if (is.null(object$loglik)) return(NULL) ## Trivial test of "type"; may need to be improved ## if other models are included in ace(type = "c") np <- if (!is.null(object$sigma2)) 1 else length(object$rates) -2*object$loglik + np*k } ### by BB: anova.ace <- function(object, ...) { X <- c(list(object), list(...)) df <- lengths(lapply(X, "[[", "rates")) ll <- sapply(X, "[[", "loglik") ## check if models are in correct order dev <- c(NA, 2*diff(ll)) ddf <- c(NA, diff(df)) table <- data.frame(ll, df, ddf, dev, pchisq(dev, ddf, lower.tail = FALSE)) dimnames(table) <- list(1:length(X), c("Log lik.", "Df", "Df change", "Resid. Dev", "Pr(>|Chi|)")) structure(table, heading = "Likelihood Ratio Test Table", class = c("anova", "data.frame")) } print.ace <- function(x, digits = 4, ...) { cat("\n Ancestral Character Estimation\n\n") cat("Call: ") print(x$call) cat("\n") if (!is.null(x$loglik)) cat(" Log-likelihood:", x$loglik, "\n\n") if (!is.null(x$resloglik)) cat(" Residual log-likelihood:", x$resloglik, "\n\n") ratemat <- x$index.matrix if (is.null(ratemat)) { # to be improved class(x) <- NULL x$resloglik <- x$loglik <- x$call <- NULL print(x) } else { dimnames(ratemat)[1:2] <- dimnames(x$lik.anc)[2] cat("Rate index matrix:\n") print(ratemat, na.print = ".") cat("\n") npar <- length(x$rates) estim <- data.frame(1:npar, round(x$rates, digits), round(x$se, digits)) cat("Parameter estimates:\n") names(estim) <- c("rate index", "estimate", "std-err") print(estim, row.names = FALSE) if (!is.null(x$lik.anc)) { cat("\nScaled likelihoods at the root (type '...$lik.anc' to get them for all nodes):\n") print(x$lik.anc[1, ]) } } } ape/R/write.nexus.R0000644000176200001440000000440414714422363013646 0ustar liggesusers## write.nexus.R (2017-11-11) ## Write Tree File in Nexus Format ## Copyright 2003-2024 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. write.nexus <- function(..., file = "", translate = TRUE, digits = 10) { obj <- .getTreesFromDotdotdot(...) ntree <- length(obj) cat("#NEXUS\n", file = file) cat(paste("[R-package APE, ", date(), "]\n\n", sep = ""), file = file, append = TRUE) N <- length(obj[[1]]$tip.label) cat("BEGIN TAXA;\n", file = file, append = TRUE) cat(paste("\tDIMENSIONS NTAX = ", N, ";\n", sep = ""), file = file, append = TRUE) cat("\tTAXLABELS\n", file = file, append = TRUE) cat(paste("\t\t", obj[[1]]$tip.label, sep = ""), sep = "\n", file = file, append = TRUE) cat("\t;\n", file = file, append = TRUE) cat("END;\n", file = file, append = TRUE) cat("BEGIN TREES;\n", file = file, append = TRUE) if (translate) { cat("\tTRANSLATE\n", file = file, append = TRUE) obj <- .compressTipLabel(obj) X <- paste("\t\t", 1:N, "\t", attr(obj, "TipLabel"), ",", sep = "") ## We remove the last comma: X[length(X)] <- gsub(",", "", X[length(X)]) cat(X, file = file, append = TRUE, sep = "\n") cat("\t;\n", file = file, append = TRUE) class(obj) <- NULL for (i in 1:ntree) obj[[i]]$tip.label <- as.character(1:N) } else { if (is.null(attr(obj, "TipLabel"))) { for (i in 1:ntree) obj[[i]]$tip.label <- checkLabel(obj[[i]]$tip.label) } else { attr(obj, "TipLabel") <- checkLabel(attr(obj, "TipLabel")) obj <- .uncompressTipLabel(obj) } } title <- names(obj) if (is.null(title)) title <- rep("UNTITLED", ntree) else { if (any(s <- title == "")) title[s] <- "UNTITLED" } for (i in 1:ntree) { if (!inherits(obj[[i]], "phylo")) next root.tag <- if (is.rooted(obj[[i]])) "= [&R] " else "= [&U] " cat("\tTREE *", title[i], root.tag, file = file, append = TRUE) cat(write.tree(obj[[i]], file = "", digits = digits), "\n", sep = "", file = file, append = TRUE) } cat("END;\n", file = file, append = TRUE) } ape/R/def.R0000644000176200001440000000122312465112403012075 0ustar liggesusers## def.R (2014-10-24) ## Definition of Vectors for Plotting or Annotating ## Copyright 2014 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. def <- function(x, ..., default = NULL, regexp = FALSE) { dots <- list(...) if (is.null(default)) { if (is.numeric(dots[[1L]])) default <- 1 if (is.character(dots[[1L]])) default <- "black" } foo <- if (regexp) function(vec, y) grep(y, vec) else function(vec, y) which(vec == y) res <- rep(default, length(x)) nms <- names(dots) for (i in seq_along(nms)) res[foo(x, nms[i])] <- dots[[i]] res } ape/R/mcmc.popsize.R0000644000176200001440000003601414533611775013772 0ustar liggesusers## mcmc.popsize.R (2013-07-19) ## Run reversible jump MCMC to sample demographic histories ## Copyright 2004-2013 Rainer Opgen-Rhein and Korbinian Strimmer ## Portions of this function are adapted from rjMCMC code by ## Karl W Broman (see http://www.biostat.wisc.edu/~kbroman/) ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # run rjMCMC chain if (getRversion() >= "2.15.1") utils::globalVariables(c("loglik", "b.lin", "popsize")) mcmc.popsize <- function(tree, nstep, thinning = 1, burn.in = 0, progress.bar = TRUE, method.prior.changepoints = c("hierarchical", "fixed.lambda"), max.nodes = 30, lambda = 0.5, # "fixed.lambda" method.prior.changepoints gamma.shape = 0.5, gamma.scale = 2, # gamma distribution from which lambda is drawn (for "hierarchical" method) method.prior.heights = c("skyline", "constant", "custom"), prior.height.mean, prior.height.var) { method.prior.changepoints <- match.arg(method.prior.changepoints) method.prior.heights <- match.arg(method.prior.heights) ## Calculate skylineplot, coalescent intervals ## and estimated population sizes if (inherits(tree, "phylo")) { ci <- coalescent.intervals(tree) sk1 <- skyline(ci) } else if (inherits(tree, "coalescentIntervals")) { ci <- tree sk1 <- skyline(ci) } else stop("tree must be an object of class phylo or coalescentIntervals") ## consider possibility of more than one lineage ci$lineages <- ci$lineages[sk1$interval.length > 0] ci$interval.length <- ci$interval.length[sk1$interval.length > 0] data <- sk1$time <- sk1$time[sk1$interval.length > 0] sk1$population.size <- sk1$population.size[sk1$interval.length > 0] sk1$interval.length <- sk1$interval.length[sk1$interval.length > 0] ## constant prior for heights if (method.prior.heights == "constant") { prior.height.mean <- function(position) mean(sk1$population.size) prior.height.var <- function(position) (mean(sk1$population.size))^2 } ## skyline plot prior for heights if (method.prior.heights == "skyline") { TIME <- sk1$time numb.interv <- 10 prior.change.times <- abs((0:numb.interv) * max(TIME)/numb.interv) prior.height.mean.all <- prior.height.var.all <- vector(length = numb.interv) for (p.int in 1:(numb.interv)) { left <- p.int right <- p.int + 1 sample.pop <- sk1$population.size[sk1$time >= prior.change.times[left] & sk1$time <= prior.change.times[right]] while (length(sample.pop) < 10) { if (left > 1) left <- left - 1 if (right < length(prior.change.times)) right <- right + 1 sample.pop <- sk1$population.size[sk1$time >= prior.change.times[left] & sk1$time <= prior.change.times[right]] } prior.height.mean.all[p.int] <- sum(sample.pop)/length(sample.pop) prior.height.var.all[p.int] <- sum((sample.pop-prior.height.mean.all[p.int])^2)/(length(sample.pop) - 1) } prior.height.mean <- function(position) { j <- sum(prior.change.times <= position) if (j >= length(prior.height.mean.all)) j <- length(prior.height.mean.all) prior.mean <- prior.height.mean.all[j] prior.mean } prior.height.var <- function(position) { j <- sum(prior.change.times <= position) if (j >= length(prior.height.var.all)) j <- length(prior.height.var.all) prior.var <- prior.height.var.all[j] prior.var } } if (method.prior.heights == "custom") { if (missing(prior.height.mean) || missing(prior.height.var)) stop("custom priors not specified") } ## set prior prior <- vector(length = 4) prior[4] <- max.nodes ## set initial position of markov chain and likelihood pos <- c(0, max(data)) h <- c(rep(mean(sk1$population.size), 2)) b.lin <- choose(ci$lineages, 2) ## loglik <<- loglik.pop # modified by EP ## set lists for data count.it <- floor((nstep - burn.in)/thinning) save.pos <- save.h <- vector("list", count.it) save.loglik <- 1:count.it save.steptype <- 1:count.it save.accept <- 1:count.it ## calculate jump probabilities for given lambda of the prior if (method.prior.changepoints == "fixed.lambda") { prior[1] <- lambda jump.prob <- matrix(ncol = 4, nrow = prior[4] + 1) p <- dpois(0:prior[4], prior[1])/ppois(prior[4] + 1, prior[1]) bk <- c(p[-1]/p[-length(p)], 0) bk[bk > 1] <- 1 dk <- c(0, p[-length(p)]/p[-1]) dk[dk > 1] <- 1 mx <- max(bk + dk) bk <- bk/mx*0.9 dk <- dk/mx*0.9 bk[is.na(bk)] <- 0 # added dk[is.na(dk)] <- 0 # added jump.prob[, 3] <- bk jump.prob[, 4] <- dk jump.prob[1, 2] <- 0 jump.prob[1, 1] <- 1 - bk[1] - dk[1] jump.prob[-1, 1] <- jump.prob[-1, 2] <- (1 - jump.prob[-1, 3] - jump.prob[-1, 4])/2 } ## calculate starting loglik curloglik <- loglik.pop(data, pos, h, b.lin, sk1, ci) count.i <- 1 ## set progress bar if (progress.bar == TRUE) { dev.new(width = 3, height = 0.7) par(mar = c(0.5, 0.5, 2, 0.5)) plot(x = c(0, 0), y = c(0, 1), type = "l", xlim = c(0, 1), ylim = c(0, 1), main = "rjMCMC in progress", ylab = "", xlab = "", xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n") } ## BEGIN CALCULATION for (i in (1:nstep + 1)) { if (progress.bar == TRUE) { if (i %% 100 == 0) { z <- i/nstep zt <- (i - 100)/(nstep) polygon(c(zt, zt, z, z), c(1, 0, 0, 1), col = "black") } } ## calculate jump probabilities without given lamda if (method.prior.changepoints == "hierarchical") { prior[1] <- rgamma(1, shape = gamma.shape, scale = gamma.scale) jump.prob <- matrix(ncol = 4, nrow = prior[4] + 1) p <- dpois(0:prior[4], prior[1]) / ppois(prior[4] + 1, prior[1]) bk <- c(p[-1]/p[-length(p)], 0) bk[bk > 1] <- 1 dk <- c(0, p[-length(p)]/p[-1]) dk[dk > 1] <- 1 mx <- max(bk + dk) bk <- bk/mx*0.9 dk <- dk/mx*0.9 bk[is.na(bk)] <- 0 # added dk[is.na(dk)] <- 0 # added jump.prob[, 3] <- bk jump.prob[, 4] <- dk jump.prob[1, 2] <- 0 jump.prob[1, 1] <- 1 - bk[1] - dk[1] jump.prob[-1, 1] <- jump.prob[-1, 2] <- (1 - jump.prob[-1, 3] - jump.prob[-1, 4])/2 } ## determine what type of jump to make wh <- sample(1:4, 1, prob = jump.prob[length(h)-1, ]) if (i %% thinning == 0 & i > burn.in) save.steptype[[count.i]] <- wh if (wh == 1) { step <- ht.move(data, pos, h, curloglik, prior, b.lin, sk1, ci, prior.height.mean, prior.height.var) h <- step[[1]] curloglik <- step[[2]] if (i %% thinning == 0 & i > burn.in) { save.pos[[count.i]] <- pos save.h[[count.i]] <- h save.loglik[[count.i]] <- step[[2]] save.accept[[count.i]] <- step[[3]] } } else if (wh == 2) { step <- pos.move(data, pos, h, curloglik, b.lin, sk1, ci) pos <- step[[1]] curloglik <- step[[2]] if (i %% thinning == 0 & i > burn.in) { save.pos[[count.i]] <- pos save.h[[count.i]] <- h save.loglik[[count.i]] <- step[[2]] save.accept[[count.i]] <- step[[3]] } } else if (wh == 3) { step <- birth.step(data, pos, h, curloglik, prior, jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var) pos <- step[[1]] h <- step[[2]] curloglik <- step[[3]] if (i %% thinning == 0 & i > burn.in) { save.pos[[count.i]] <- pos save.h[[count.i]] <- h save.loglik[[count.i]] <- step[[3]] save.accept[[count.i]] <- step[[4]] } } else { step <- death.step(data, pos, h, curloglik, prior, jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var) pos <- step[[1]] h <- step[[2]] curloglik <- step[[3]] if (i %% thinning == 0 & i > burn.in) { save.pos[[count.i]] <- pos save.h[[count.i]] <- h save.loglik[[count.i]] <- step[[3]] save.accept[[count.i]] <- step[[4]] } } if (i %% thinning == 0 & i > burn.in) count.i <- count.i + 1 } if (progress.bar == TRUE) dev.off() list(pos = save.pos, h = save.h, loglik = save.loglik, steptype = save.steptype, accept = save.accept) } ## private functions ht.move <- function(data, pos, h, curloglik, prior, b.lin, sk1, ci, prior.height.mean, prior.height.var) { j <- sample(1:length(h), 1) prior.mean <- prior.height.mean(pos[j]) prior.var <- prior.height.var(pos[j]) prior[3] <- prior.mean/prior.var prior[2] <- (prior.mean^2)/prior.var newh <- h newh[j] <- h[j] * exp(runif(1, -0.5, 0.5)) newloglik <- loglik.pop(data, pos, newh, b.lin, sk1, ci) lr <- newloglik - curloglik ratio <- exp(lr + prior[2] * (log(newh[j]) - log(h[j])) - prior[3] * (newh[j] - h[j])) if (runif(1, 0, 1) < ratio) return(list(newh, newloglik, 1)) else return(list(h, curloglik, 0)) } pos.move <- function(data, pos, h, curloglik, b.lin, sk1, ci) { j <- if (length(pos) == 3) 2 else sample(2:(length(pos)-1), 1) newpos <- pos left <- pos[j - 1] right <- pos[j + 1] newpos[j] <- runif(1, left, right) newloglik <- loglik.pop(data, newpos, h, b.lin, sk1, ci) lr <- newloglik - curloglik ratio <- exp(lr) * (right - newpos[j])*(newpos[j]- left)/ (right - pos[j])/(pos[j] - left) if (runif(1, 0, 1) < ratio) return(list(newpos, newloglik, 1)) else return(list(pos, curloglik, 0)) } birth.step <- function(data, pos, h, curloglik, prior, jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var) { newpos <- runif(1, 0, pos[length(pos)]) j <- sum(pos < newpos) left <- pos[j] right <- pos[j + 1] prior.mean <- prior.height.mean(pos[j]) prior.var <- prior.height.var(pos[j]) prior[3] <- prior.mean/prior.var prior[2] <- (prior.mean^2)/prior.var u <- runif(1, -0.5, 0.5) oldh <- (((newpos - left)/(right - left))*(h[j + 1] - h[j]) + h[j]) newheight <- oldh*(1 + u) ## ratio ## recall that prior = (lambda, alpha, beta, maxk) k <- length(pos) - 2 L <- max(pos) prior.logratio <- log(prior[1]) - log(k+1) + log((2*k + 3)*(2*k + 2)) - 2*log(L) + log(newpos - left) + log(right - newpos) - log(right - left) + prior[2]*log(prior[3]) - lgamma(prior[2]) + (prior[2] - 1) * log(newheight) + prior[3]*(newheight) proposal.ratio <- jump.prob[k + 2, 4]*L/jump.prob[k + 1, 3]/(k + 1) jacobian <- (((newpos - left)/(right - left))*(h[j + 1] - h[j])) + h[j] ## form new parameters newpos <- sort(c(pos, newpos)) newh <- c(h[1:j], newheight, h[(j + 1):length(h)]) newloglik <- loglik.pop(data, newpos, newh, b.lin, sk1, ci) lr <- newloglik - curloglik ratio <- exp(lr + prior.logratio) * proposal.ratio * jacobian if (runif(1, 0, 1) < ratio) return(list(newpos, newh, newloglik, 1)) else return(list(pos, h, curloglik, 0)) } death.step <- function(data, pos, h, curloglik, prior, jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var) { ## position to drop if (length(pos) == 3) j <- 2 else j <- sample(2:(length(pos) - 1), 1) left <- pos[j - 1] right <- pos[j + 1] prior.mean <- prior.height.mean(pos[j]) prior.var <- prior.height.var(pos[j]) prior[3] <- prior.mean/prior.var prior[2] <- (prior.mean^2)/prior.var ## get new height h.left <- h[j - 1] h.right <- h[j + 1] newheight <- (((pos[j] - left)/(right - left))*(h.right - h.left) + h.left) ## ratio ## recall that prior = (lambda, alpha, beta, maxk) k <- length(pos) - 3 L <- max(pos) prior.logratio <- log(k+1) - log(prior[1]) - log(2*(k + 1)*(2*k + 3)) + 2*log(L) - log(pos[j] - left) - log(right - pos[j]) + log(right - left) - prior[2]*log(prior[3]) + lgamma(prior[2]) - (prior[2]-1) * log(newheight) - prior[3]*(newheight) proposal.ratio <- (k + 1)*jump.prob[k + 1, 3]/jump.prob[k + 2, 4]/L jacobian <- ((pos[j] - left)/(right - left))*(h[j + 1] - h[j - 1]) + h[j - 1] ## form new parameters newpos <- pos[-j] newh <- h[-j] newloglik <- loglik.pop(data, newpos, newh, b.lin, sk1, ci) lr <- newloglik - curloglik ratio <- exp(lr + prior.logratio) * proposal.ratio * (jacobian^(-1)) if (runif(1, 0, 1) < ratio) return(list(newpos, newh, newloglik, 1)) else return(list(pos, h, curloglik, 0)) } # calculate the log likelihood for a set of data loglik.pop <- function(time = sk1$time, pos = c(0, max(sk1$time)), h = mean(sk1$population.size), b = b.lin, sk1, ci) { data.time <- c(0, time) leftside <- 0 i <- 1 h1 <- c(h, h[length(h)]) pos1 <- c(pos, pos[length(pos)]) while (i < length(time)) { left.pos <- sum(data.time[i + 1] >= pos) right.pos <- left.pos + 1 h.mix <- (((data.time[i + 1] - pos[left.pos])/(pos[right.pos] - pos[left.pos]))*(h[right.pos] - h[left.pos])) + h[left.pos] leftside <- leftside + log(b[i]/h.mix) i <- i + 1 } rightside <- 0 time1 <- c(0, time) time.count <- 1 ## heigths of jumps jumps <- sort(c(time1, pos)) h.jumps <- jumps while (time.count <= length(jumps)) { left.pos <- sum(jumps[time.count] >= pos) right.pos <- left.pos + 1 h.jumps[time.count] <- (((jumps[time.count] - pos[left.pos])/(pos[right.pos] - pos[left.pos]))*(h[right.pos] - h[left.pos])) + h[left.pos] if (is.na(h.jumps[time.count])) h.jumps[time.count] <- h[left.pos] time.count <- time.count + 1 } ## Vector for lineages i <- 1 lineages.jumps <- jumps while (i <= length(jumps)) { lineages.jumps[i] <- sum(jumps[i] >= time) if (lineages.jumps[i] == 0) lineages.jumps[i] <- 1 i <- i + 1 } lineage <- ci$lineages[lineages.jumps] b1 <- choose(lineage, 2) ## Integral a <- (h.jumps[-1] - h.jumps[-length(h.jumps)])/(jumps[-1] - jumps[-length(jumps)]) c <- h.jumps[-1] - jumps[-1] * a area <- (1/a) * log(a*jumps[-1] + c) - (1/a)*log(a * jumps[-length(jumps)] + c) stepfunction <- (jumps[-1] - jumps[-length(jumps)])/h.jumps[-1] area[is.na(area)] <- stepfunction[is.na(area)] rightside <- sum(area * b1[-1]) loglik <- leftside - rightside loglik } ape/R/compar.lynch.R0000644000176200001440000000433412465112403013742 0ustar liggesusers## compar.lynch.R (2002-08-28) ## Lynch's Comparative Method ## Copyright 2002 Julien Claude ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. compar.lynch <- function(x, G, eps = 1e-4) { if (is.vector(x) || is.data.frame(x)) x <- as.matrix(x) alea <- runif(1, 0, 1) z <- as.vector(x) uz <- apply(x, 2, mean) vcvz <- var(x) vz <- diag(vcvz) nsp <- nrow(x) k <- ncol(x) X1 <- matrix(0, k, k) diag(X1) <- 1 I <- matrix(0, nsp, nsp) diag(I) <- 1 vara <- trvare <- matrix(NA, k, k) nsp1 <- rep(1, nsp) X <- X1 %x% nsp1 compteur <- 0 vara <- A0 <- alea * vcvz vare <- E0 <- (1 - alea) * vcvz newu <- u0 <- uz Ginv <- solve(G) V0 <- vcvz %x% G a0 <- e0 <- matrix(0, nsp, k) a1 <- e1 <- matrix(1, nsp, k) while (any(abs((rbind(a1, e1) - rbind(a0, e0))) > eps)) { a1 <- a0 e1 <- e0 compteur <- compteur + 1 Rinv <- solve(E0 %x% I) Dinv <- solve(A0 %x% G) info <- solve(Rinv + Dinv) newa <- solve(Rinv + Dinv) %*% Rinv %*% (z - X %*% u0) newe <- z - X %*% u0 - newa e0 <- mnewe <- matrix(newe, nsp, k) a0 <- mnewa <- matrix(newa, nsp, k) for (i in 1:k) { for (j in 1:k) { trvare[i, j] <- sum(diag(info[(((i - 1) * nsp) + 1):(i * nsp), (((j - 1) * nsp) + 1):(j * nsp)]))} } vare <- ((nsp - 1) * var(mnewe) + trvare) / nsp for (i in 1:k) { for (j in 1:k) { vara[i, j] <- (t(mnewa[, i]) %*% Ginv %*% mnewa[, j] + sum(diag(Ginv %*% info[(((i - 1) * nsp) + 1):(i * nsp), (((j - 1) * nsp) + 1):(j * nsp)]))) / nsp } } newu <- apply(x - mnewa, 2, mean) V <- vara %x% G + vare %x% I p <- (2 * pi)^(-nsp) * det(V)^(-0.5) * exp(-0.5 * t(z - (X %*% newu)) %*% solve(V) %*% (z - (X %*% newu))) E0 <- vare A0 <- vara u0 <- newu } dimnames(vare) <- dimnames(vara) list(vare = vare, vara = vara, A = mnewa, E = mnewe, u = newu, lik = log(p)) } ape/R/howmanytrees.R0000644000176200001440000000437614566023557014117 0ustar liggesusers## howmanytrees.R (2022-10-10) ## Calculate Numbers of Phylogenetic Trees ## Copyright 2004-2022 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. LargeNumber <- function(a, b) { c <- b * log10(a) n <- floor(c) x <- 10^(c - n) structure(c(x = x, n = n), class = "LargeNumber") } print.LargeNumber <- function(x, latex = FALSE, digits = 1, ...) { if (latex) { cat("$\\approx ", round(x["x"], digits), " \\times 10^{", x["n"], "}$\n", sep = "") } else { cat("approximately ", x["x"], " * 10^", x["n"], "\n", sep = "") } } howmanytrees <- function(n, rooted = TRUE, binary = TRUE, labeled = TRUE, detail = FALSE) { if (!labeled && !(rooted & binary)) stop("can compute number of unlabeled trees only for rooted binary cases.") if (n < 3) N <- 1 else { if (labeled) { if (!rooted) n <- n - 1 if (binary) { if (n < 152) { N <- prod(seq(1, 2*n - 3, by = 2)) # double factorial } else { ldfac <- lfactorial(2 * n - 3) - (n - 2) * log(2) - lfactorial(n - 2) N <- LargeNumber(exp(1), ldfac) } } else { N <- matrix(0, n, n - 1) N[1:n, 1] <- 1 for (i in 3:n) for (j in 2:(i - 1)) N[i, j] <- (i + j - 2)*N[i - 1, j - 1] + j*N[i - 1, j] if (detail) { rownames(N) <- 1:n colnames(N) <- 1:(n - 1) } else N <- sum(N[n, ]) } } else { N <- numeric(n) N[1] <- 1 for (i in 2:n) { if (i %% 2) { im1 <- i - 1L x <- N[1:(im1 / 2)] y <- N[im1:((i + 1) / 2)] } else { ion2 <- i / 2 x <- N[1:ion2] y <- N[(i - 1):ion2] ny <- length(y) y[ny] <- (y[ny] + 1) / 2 } N[i] <- sum(x * y) } if (detail) names(N) <- 1:n else N <- N[n] } } N } ape/R/lmorigin.R0000644000176200001440000001204614033475376013202 0ustar liggesusers'lmorigin' <- function(formula, data=NULL, origin=TRUE, nperm=999, method=NULL, silent=FALSE) # # This program computes a multiple linear regression and performs tests # of significance of the equation parameters using permutations. # # origin=TRUE: the regression line can be forced through the origin. Testing # the significance in that case requires a special permutation procedure. # # Permutation methods: raw data or residuals of full model # Default method in regression through the origin: raw data # Default method in ordinary multiple regression: residuals of full model # - In ordinary multiple regression when m = 1: raw data # # Pierre Legendre, March 2009 { if(!is.null(method)) method <- match.arg(method, c("raw", "residuals")) if(is.null(method) & origin==TRUE) method <- "raw" if(is.null(method) & origin==FALSE) method <- "residuals" if(nperm < 0) stop("Incorrect value for 'nperm'") ## From the formula, find the variables and the number of observations 'n' toto <- lm(formula, data) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "offset"), names(mf), 0) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") mf <- eval(mf, parent.frame()) var.names = colnames(mf) # Noms des variables y <- as.matrix(mf[,1]) colnames(y) <- var.names[1] X <- as.matrix(mf[,-1]) n <- nrow(mf) m <- ncol(X) a <- system.time({ mm<- m # No. regression coefficients, possibly including the intercept if(m == 1) method <- "raw" if(nrow(X) != n) stop("Unequal number of rows in y and X") if(origin) { if(!silent) cat("Regression through the origin",'\n') reg <- lm(y ~ 0 + X) } else { if(!silent) cat("Multiple regression with estimation of intercept",'\n') reg <- lm(y ~ X) mm <- mm+1 } if(!silent) { if(nperm > 0) { if(method == "raw") { cat("Permutation method =",method,"data",'\n') } else { cat("Permutation method =",method,"of full model",'\n') } } } t.vec <- summary(reg)$coefficients[,3] p.param.t <- summary(reg)$coefficients[,4] df1 <- summary(reg)$fstatistic[[2]] df2 <- summary(reg)$fstatistic[[3]] F <- summary(reg)$fstatistic[[1]] y.res <- summary(reg)$residuals # b.vec <- summary(reg)$coefficients[,1] # r.sq <- summary(reg)$r.squared # adj.r.sq <- summary(reg)$adj.r.squared # p.param.F <- pf(F, df1, df2, lower.tail=FALSE) if(df1 < m) stop("\nCollinearity among the X variables. Check using 'lm'") # Permutation tests if(nperm > 0) { nGT.F <- 1 nGT1.t <- rep(1,mm) nGT2.t <- rep(1,mm) sign.t <- sign(t.vec) for(i in 1:nperm) # Permute raw data. Always use this method for F-test { if(origin) { # Regression through the origin dia.bin <- diag((rbinom(n,1,0.5)*2)-1) y.perm <- dia.bin %*% sample(y) reg.perm <- lm(y.perm ~ 0 + X) } else { # Multiple linear regression y.perm <- sample(y,n) reg.perm <- lm(y.perm ~ X) } # Permutation test of the F-statistic F.perm <- summary(reg.perm)$fstatistic[1] if(F.perm >= F) nGT.F <- nGT.F+1 # Permutation tests of the t-statistics: permute raw data if(method == "raw") { t.perm <- summary(reg.perm)$coefficients[,3] if(nperm <= 5) cat(t.perm,'\n') for(j in 1:mm) { # One-tailed test in direction of sign if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1 # Two-tailed test if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1 } } } if(method == "residuals") { # Permute residuals of full model for(i in 1:nperm) { if(origin) { # Regression through the origin dia.bin <- diag((rbinom(n,1,0.5)*2)-1) y.perm <- dia.bin %*% sample(y.res) reg.perm <- lm(y.perm ~ 0 + X) } else { # Multiple linear regression y.perm <- sample(y.res,n) reg.perm <- lm(y.perm ~ X) } # Permutation tests of the t-statistics: permute residuals t.perm <- summary(reg.perm)$coefficients[,3] if(nperm <= 5) cat(t.perm,'\n') for(j in 1:mm) { # One-tailed test in direction of sign if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1 # Two-tailed test if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1 } } } # Compute the permutational probabilities p.perm.F <- nGT.F/(nperm+1) p.perm.t1 <- nGT1.t/(nperm+1) p.perm.t2 <- nGT2.t/(nperm+1) ### Do not test intercept by permutation of residuals in multiple regression if(!origin & method=="residuals") { if(silent) { # Note: silent==TRUE in simulation programs p.perm.t1[1] <- p.perm.t2[1] <- 1 } else { p.perm.t1[1] <- p.perm.t2[1] <- NA } } } }) a[3] <- sprintf("%2f",a[3]) if(!silent) cat("Computation time =",a[3]," sec",'\n') # if(nperm == 0) { out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, origin=origin, nperm=nperm, var.names=var.names, call=match.call()) } else { out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, p.perm.t.2tail=p.perm.t2, p.perm.t.1tail=p.perm.t1, p.perm.F=p.perm.F, origin=origin, nperm=nperm, method=method, var.names=var.names, call=match.call()) } # class(out) <- "lmorigin" out } ape/R/compar.gee.R0000644000176200001440000001432412520626433013372 0ustar liggesusers## compar.gee.R (2015-05-01) ## Comparative Analysis with GEEs ## Copyright 2002-2015 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. compar.gee <- function(formula, data = NULL, family = gaussian, phy, corStruct, scale.fix = FALSE, scale.value = 1) { if (requireNamespace("gee", quietly = TRUE)) gee <- gee::gee else stop("package 'gee' not available") if (!missing(corStruct)) { if (!missing(phy)) warning("the phylogeny was ignored because you gave a 'corStruct' object") R <- vcv(corStruct, corr = TRUE) } else { R <- vcv(phy, corr = TRUE) } if (is.null(data)) data <- parent.frame() else { nmsR <- rownames(R) if (!identical(rownames(data), nmsR)) { if (!any(is.na(match(rownames(data), nmsR)))) data <- data[nmsR, ] else { msg <- if (missing(corStruct)) "the tip labels of the tree" else "those of the correlation structure" msg <- paste("the rownames of the data.frame and", msg, "do not match: the former were ignored in the analysis") warning(msg) } } } effect.assign <- attr(model.matrix(formula, data = data), "assign") for (i in all.vars(formula)) { if (any(is.na(eval(parse(text = i), envir = data)))) stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.") } id <- rep(1, dim(R)[1]) geemod <- do.call("gee", list(formula, id, data = data, family = family, R = R, corstr = "fixed", scale.fix = scale.fix, scale.value = scale.value)) W <- geemod$naive.variance fname <- if (is.function(family)) deparse(substitute(family)) else if (is.list(family)) family$family else family if (fname == "binomial") W <- summary(glm(formula, family = quasibinomial, data = data))$cov.scaled N <- geemod$nobs ## ## maybe need to refine below in case of non-Brownian corStruct if (!missing(corStruct)) phy <- attr(corStruct, "tree") dfP <- sum(phy$edge.length)*N / sum(diag(vcv(phy))) # need the variances ## ## compute QIC: Y <- geemod$y MU <- geemod$fitted.values Qlik <- switch(fname, "gaussian" = -sum((Y - MU)^2)/2, "binomial" = sum(Y*log(MU/(1 - MU)) + log(1 - MU)), "poisson" = sum(Y*log(MU) - MU), "Gamma" = sum(Y/MU + log(MU)), "inverse.gaussian" = sum(-Y/(2*MU^2) + 1/MU)) Ai <- do.call("gee", list(formula, id, data = data, family = family, corstr = "independence", scale.fix = scale.fix, scale.value = scale.value))$naive.variance QIC <- -2*Qlik + 2*sum(diag(solve(Ai) %*% W)) obj <- list(call = match.call(), effect.assign = effect.assign, nobs = N, QIC = QIC, coefficients = geemod$coefficients, residuals = geemod$residuals, fitted.values = MU, family = geemod$family$family, link = geemod$family$link, scale = geemod$scale, W = W, dfP = dfP) class(obj) <- "compar.gee" obj } print.compar.gee <- function(x, ...) { nas <- is.na(x$coef) coef <- x$coef[!nas] cnames <- names(coef) coef <- matrix(rep(coef, 4), ncol = 4) dimnames(coef) <- list(cnames, c("Estimate", "S.E.", "t", "Pr(T > |t|)")) df <- x$dfP - dim(coef)[1] coef[, 2] <- sqrt(diag(x$W)) coef[, 3] <- coef[, 1]/coef[, 2] if (df < 0) { warning("not enough degrees of freedom to compute P-values.") coef[, 4] <- NA } else coef[, 4] <- 2 * (1 - pt(abs(coef[, 3]), df)) residu <- quantile(as.vector(x$residuals)) names(residu) <- c("Min", "1Q", "Median", "3Q", "Max") cat("Call: ") print(x$call) cat("Number of observations: ", x$nobs, "\n") cat("Model:\n") cat(" Link:", x$link, "\n") cat(" Variance to Mean Relation:", x$family, "\n") cat("\nQIC:", x$QIC, "\n") cat("\nSummary of Residuals:\n") print(residu) if (any(nas)) cat("\n\nCoefficients: (", sum(nas), " not defined because of singularities)\n", sep = "") else cat("\n\nCoefficients:\n") print(coef) cat("\nEstimated Scale Parameter: ", x$scale) cat("\n\"Phylogenetic\" df (dfP): ", x$dfP, "\n") } drop1.compar.gee <- function(object, scope, quiet = FALSE, ...) { fm <- formula(object$call) trm <- terms(fm) z <- attr(trm, "term.labels") ind <- object$effect.assign n <- length(z) ans <- matrix(NA, n, 3) for (i in 1:n) { wh <- which(ind == i) ans[i, 1] <- length(wh) ans[i, 2] <- t(object$coefficients[wh]) %*% solve(object$W[wh, wh]) %*% object$coefficients[wh] } df <- object$dfP - length(object$coefficients) if (df < 0) warning("not enough degrees of freedom to compute P-values.") else ans[, 3] <- pf(ans[, 2], ans[, 1], df, lower.tail = FALSE) colnames(ans) <- c("df", "F", "Pr(>F)") rownames(ans) <- z if (any(attr(trm, "order") > 1) && !quiet) warning("there is at least one interaction term in your model: you should be careful when interpreting the significance of the main effects.") class(ans) <- "anova" attr(ans, "heading") <- paste("Single term deletions\n\n Model:", as.character(as.expression(fm)), "\n") ans } predict.compar.gee <- function(object, newdata = NULL, type = c("link", "response"), ...) { type <- match.arg(type) pred <- if (is.null(newdata)) object$fitted.values else { frm <- formula(object$call$formula)[-2] X <- model.matrix(frm, data = newdata) beta <- object$coefficients X[, names(beta), drop = FALSE] %*% beta } if (type == "link") return(pred) f <- match.fun(object$family) f(link = object$link)$linkinv(pred) } ape/R/varcomp.R0000644000176200001440000000177414533612577013040 0ustar liggesusers## varcomp.R (2004-10-29) ## Variance Component of Mixed-Effect Linear Model ## Copyright 2004 Julien Dutheil ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. varcomp <- function(x, scale = FALSE, cum = FALSE) { if (!("lme" %in% class(x))) stop("Object \"x\" is not of class \"lme\"") res <- seq(along = x$modelStruct$reStruct) var <- vector(length = length(res) + 1) for(i in res) { var[length(var) - i] <- attr(summary(x$modelStruct$reStruct[[i]]),"stdDev")[1]*x$sigma } var[length(var)] <- x$sigma var <- var^2 if(scale) var <- var/sum(var) if(cum) var <- cumsum(var) names(var) <- c(rev(names(x$modelStruct$reStruct)), "Within") class(var) <- "varcomp" return(var) } plot.varcomp <- function(x, xlab = "Levels", ylab = "Variance", type = "b", ...) { if (!("varcomp" %in% class(x))) stop("Object \"x\" is not of class \"varcomp\"") return(xyplot(x ~ ordered(names(x), levels=rev(names(x))), xlab=xlab, ylab=ylab, type=type, ...)) } ape/R/skylineplot.R0000644000176200001440000000321614356737424013741 0ustar liggesusers## skylineplot.R (2004-07-4) ## Various methods to plot skyline objects (= skyline plots) ## Copyright 2002-2004 Korbinian Strimmer ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # plot skyline plot.skyline <- function(x, show.years=FALSE, subst.rate, present.year, ...) { if (!inherits(x, "skyline")) stop("object \"x\" is not of class \"skyline\"") t <- x$time m <- x$population.size lm <- length(m) if (show.years) { plot((-c(0,t))/subst.rate+present.year,c(m,m[lm]),type="s", xlab="time (years)",ylab="effective population size",log="y", ...) } else { plot(c(0,t),c(m,m[lm]),type="s", xlim=c(t[lm],0), xlab="time (past to present in units of substitutions)",ylab="effective population size",log="y", ...) } } # plot another skyline plot on top lines.skyline <- function(x, show.years=FALSE, subst.rate, present.year, ...) { if (!inherits(x, "skyline")) stop("object \"x\" is not of class \"skyline\"") t <- x$time m <- x$population.size lm <- length(m) if (show.years) { lines((-c(0,t))/subst.rate+present.year,c(m,m[lm]),type="s", ...) } else { lines(c(0,t),c(m,m[lm]),type="s", ...) } } # convenience short cut (almost compatible with APE 0.1) skylineplot <- function(z, ...) plot(skyline(z, ...)) #input: phylogenetic tree skylineplot.deluxe <- function(tree, ...) { if (!inherits(tree, "phylo")) stop("object \"tree\" is not of class \"phylo\"") ci <- coalescent.intervals(tree) classic <- skyline(ci) generalized <- skyline(ci, -1) plot(classic,col=grey(.8), ...) lines(generalized, ...) return(generalized) } ape/R/binaryPGLMM.R0000644000176200001440000002017114533611010013417 0ustar liggesusers## binaryPGLMM.R (2015-03-04) ## Phylogenetic Generalized Linear Mixed Model for Binary Data ## Copyright 2015 Anthony R. Ives ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. binaryPGLMM <- function(formula, data = list(), phy, s2.init = 0.1, B.init = NULL, tol.pql = 10^-6, maxit.pql = 200, maxit.reml = 100) { # Begin pglmm.reml pglmm.reml <- function(par, tinvW, tH, tVphy, tX) { n <- dim(tX)[1] p <- dim(tX)[2] ss2 <- abs(Re(par)) Cd <- ss2 * tVphy V <- tinvW + Cd LL <- 10^10 if (sum(is.infinite(V)) == 0) { # & rcond(V) < 10^10) { if (all(eigen(V)$values > 0)) { #if(rcond(V) > 10^-10 & all(eigen(V)$values > 0)) { invV <- solve(V) logdetV <- determinant(V)$modulus[1] if (is.infinite(logdetV)) { cholV <- chol(V) logdetV <- 2 * sum(log(diag(chol(V)))) } LL <- logdetV + t(tH) %*% invV %*% tH + determinant(t(tX) %*% invV %*% tX)$modulus[1] } } return(LL) } # End pglmm.reml if (!inherits(phy, "phylo")) stop("Object \"phy\" is not of class \"phylo\".") if (is.null(phy$edge.length)) stop("The tree has no branch lengths.") if (is.null(phy$tip.label)) stop("The tree has no tip labels.") phy <- reorder(phy, "postorder") n <- length(phy$tip.label) mf <- model.frame(formula = formula, data = data) if (nrow(mf) != length(phy$tip.label)) stop("Number of rows of the design matrix does not match with length of the tree.") if (is.null(rownames(mf))) { warning("No tip labels, order assumed to be the same as in the tree.\n") data.names = phy$tip.label } else data.names = rownames(mf) order <- match(data.names, phy$tip.label) if (sum(is.na(order)) > 0) { warning("Data names do not match with the tip labels.\n") rownames(mf) <- data.names } else { tmp <- mf rownames(mf) <- phy$tip.label mf[order, ] <- tmp[1:nrow(tmp), ] } X <- model.matrix(attr(mf, "terms"), data = mf) y <- model.response(mf) if (sum(!(y %in% c(0, 1)))) { stop("PGLMM.binary requires a binary response (dependent variable).") } if (var(y) == 0) { stop("The response (dependent variable) is always 0 or always 1.") } p <- ncol(X) Vphy <- vcv(phy) Vphy <- Vphy/max(Vphy) Vphy/exp(determinant(Vphy)$modulus[1]/n) # Compute initial estimates if not provided assuming no phylogeny if (!is.null(B.init) & length(B.init) != p) { warning("B.init not correct length, so computed B.init using glm()") } if (is.null(B.init) | (!is.null(B.init) & length(B.init) != p)) { B.init <- t(matrix(glm(formula = formula, data = data, family = "binomial")$coefficients, ncol = p)) } B <- B.init s2 <- s2.init b <- matrix(0, nrow = n) beta <- rbind(B, b) mu <- exp(X %*% B)/(1 + exp(X %*% B)) XX <- cbind(X, diag(1, nrow = n, ncol = n)) C <- s2 * Vphy est.s2 <- s2 est.B <- B oldest.s2 <- 10^6 oldest.B <- matrix(10^6, nrow = length(est.B)) iteration <- 0 exitflag <- 0 rcondflag <- 0 while (((t(est.s2 - oldest.s2) %*% (est.s2 - oldest.s2) > tol.pql^2) | (t(est.B - oldest.B) %*% (est.B - oldest.B)/length(B) > tol.pql^2)) & (iteration <= maxit.pql)) { iteration <- iteration + 1 oldest.s2 <- est.s2 oldest.B <- est.B est.B.m <- B oldest.B.m <- matrix(10^6, nrow = length(est.B)) iteration.m <- 0 # mean component while ((t(est.B.m - oldest.B.m) %*% (est.B.m - oldest.B.m)/length(B) > tol.pql^2) & (iteration.m <= maxit.pql)) { iteration.m <- iteration.m + 1 oldest.B.m <- est.B.m invW <- diag(as.vector((mu * (1 - mu))^-1)) V <- invW + C # This flags cases in which V has a very high condition number, which will cause solve() to fail. if (sum(is.infinite(V)) > 0 | rcond(V) < 10^-10) { rcondflag <- rcondflag + 1 B <- 0 * B.init + 0.001 b <- matrix(0, nrow = n) beta <- rbind(B, b) mu <- exp(X %*% B)/(1 + exp(X %*% B)) oldest.B.m <- matrix(10^6, nrow = length(est.B)) invW <- diag(as.vector((mu * (1 - mu))^-1)) V <- invW + C } invV <- solve(V) Z <- X %*% B + b + (y - mu)/(mu * (1 - mu)) denom <- t(X) %*% invV %*% X num <- t(X) %*% invV %*% Z B <- as.matrix(solve(denom, num)) b <- C %*% invV %*% (Z - X %*% B) beta <- rbind(B, b) mu <- exp(XX %*% beta)/(1 + exp(XX %*% beta)) est.B.m <- B } # variance component H <- Z - X %*% B opt <- optim(fn = pglmm.reml, par = s2, tinvW = invW, tH = H, tVphy = Vphy, tX = X, method = "BFGS", control = list(factr = 1e+12, maxit = maxit.reml)) s2 <- abs(opt$par) C <- s2 * Vphy est.s2 <- s2 est.B <- B } convergeflag <- "converged" if (iteration >= maxit.pql | rcondflag >= 3) { convergeflag <- "Did not converge; try increasing maxit.pql or starting with B.init values of .001" } converge.test.s2 <- (t(est.s2 - oldest.s2) %*% (est.s2 - oldest.s2))^0.5 converge.test.B <- (t(est.B - oldest.B) %*% (est.B - oldest.B))^0.5/length(est.B) # Extract parameters invW <- diag(as.vector((mu * (1 - mu))^-1)) V <- invW + C invV <- solve(V) Z <- X %*% B + b + (y - mu)/(mu * (1 - mu)) denom <- t(X) %*% invV %*% X num <- t(X) %*% invV %*% Z B <- solve(denom, num) b <- C %*% invV %*% (Z - X %*% B) beta <- rbind(B, b) mu <- exp(XX %*% beta)/(1 + exp(XX %*% beta)) H <- Z - X %*% B B.cov <- solve(t(X) %*% invV %*% X) B.se <- as.matrix(diag(B.cov))^0.5 B.zscore <- B/B.se B.pvalue <- 2 * pnorm(abs(B/B.se), lower.tail = FALSE) LL <- opt$value lnlike.cond.reml <- -0.5 * (n - p) * log(2 * pi) + 0.5 * determinant(t(X) %*% X)$modulus[1] - 0.5 * LL LL0 <- pglmm.reml(par = 0, tinvW = invW, tH = H, tVphy = Vphy, tX = X) lnlike.cond.reml0 <- -0.5 * (n - p) * log(2 * pi) + 0.5 * determinant(t(X) %*% X)$modulus[1] - 0.5 * LL0 P.H0.s2 <- pchisq(2 * (lnlike.cond.reml - lnlike.cond.reml0), df = 1, lower.tail = F)/2 results <- list(formula = formula, B = B, B.se = B.se, B.cov = B.cov, B.zscore = B.zscore, B.pvalue = B.pvalue, s2 = s2, P.H0.s2 = P.H0.s2, mu = mu, b = b, B.init = B.init, X = X, H = H, VCV = Vphy, V = V, convergeflag = convergeflag, iteration = iteration, converge.test.s2 = converge.test.s2, converge.test.B = converge.test.B, rcondflag = rcondflag) class(results) <- "binaryPGLMM" results } ### binaryPGLMM.sim binaryPGLMM.sim <- function(formula, data = list(), phy, s2 = NULL, B = NULL, nrep = 1) { if (!inherits(phy, "phylo")) stop("Object \"phy\" is not of class \"phylo\".") if (is.null(phy$edge.length)) stop("The tree has no branch lengths.") if (is.null(phy$tip.label)) stop("The tree has no tip labels.") phy <- reorder(phy, "postorder") n <- length(phy$tip.label) mf <- model.frame(formula = formula, data = data) if (nrow(mf) != length(phy$tip.label)) stop("Number of rows of the design matrix does not match with length of the tree.") if (is.null(rownames(mf))) { warning("No tip labels, order assumed to be the same as in the tree.\n") data.names = phy$tip.label } else data.names = rownames(mf) order <- match(data.names, phy$tip.label) if (sum(is.na(order)) > 0) { warning("Data names do not match with the tip labels.\n") rownames(mf) <- data.names } else { tmp <- mf rownames(mf) <- phy$tip.label mf[order, ] <- tmp[1:nrow(tmp), ] } if (is.null(s2)) stop("You must specify s2") if (is.null(B)) stop("You must specify B") X <- model.matrix(attr(mf, "terms"), data = mf) n <- nrow(X) p <- ncol(X) V <- vcv(phy) V <- V/max(V) V <- vcv(phy) V <- V/max(V) V/exp(determinant(V)$modulus[1]/n) V <- s2 * V if (s2 > 10^-8) { iD <- t(chol(V)) } else { iD <- matrix(0, nrow = n, ncol = n) } Y <- matrix(0, nrow = n, ncol = nrep) y <- matrix(0, nrow = n, ncol = nrep) for (i in 1:nrep) { y[, i] <- X %*% B + iD %*% rnorm(n = n) p <- 1/(1 + exp(-y[, i])) Y[, i] <- as.numeric(runif(n = n) < p) } results <- list(Y = Y, y = y, X = X, s2 = s2, B = B, V = V) return(results) } ### print.binaryPGLMM print.binaryPGLMM <- function(x, digits = max(3, getOption("digits") - 3), ...) { cat("\n\nCall:") print(x$formula) cat("\n") cat("Random effect (phylogenetic signal s2):\n") w <- data.frame(s2 = x$s2, Pr = x$P.H0.s2) print(w, digits = digits) cat("\nFixed effects:\n") coef <- data.frame(Value = x$B, Std.Error = x$B.se, Zscore = x$B.zscore, Pvalue = x$B.pvalue) printCoefmat(coef, P.values = TRUE, has.Pvalue = TRUE) cat("\n") } ape/R/is.ultrametric.R0000644000176200001440000000331012775152333014315 0ustar liggesusers## is.ultrametric.R (2016-10-04) ## Test if a Tree is Ultrametric ## Copyright 2003-2016 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.ultrametric <- function(phy, ...) UseMethod("is.ultrametric") ## the main driver code (n = number of tips): .is.ultrametric_ape <- function(phy, tol, option, n) { if (is.null(phy$edge.length)) stop("the tree has no branch lengths") e1 <- phy$edge[, 1] e2 <- phy$edge[, 2] EL <- phy$edge.length ## xx: distance from a node or a tip to the root xx <- numeric(n + phy$Nnode) ## the following must start at the root and follow the ## edges contiguously; so the tree must be either in cladewise ## order (or in pruningwise but the for loop must start from ## the bottom of the edge matrix) for (i in seq_len(length(e1))) xx[e2[i]] <- xx[e1[i]] + EL[i] xx.tip <- xx[1:n] crit <- switch(option, { mn <- min(xx.tip) mx <- max(xx.tip) (mx - mn)/mx }, var(xx.tip)) isTRUE(all.equal.numeric(crit, 0, tolerance = tol)) } is.ultrametric.phylo <- function(phy, tol = .Machine$double.eps^0.5, option = 1, ...) { phy <- reorder.phylo(phy) .is.ultrametric_ape(phy, tol, option, length(phy$tip.label)) } is.ultrametric.multiPhylo <- function(phy, tol = .Machine$double.eps^0.5, option = 1, ...) { phy <- reorder.multiPhylo(phy) labs <- attr(phy, "TipLabel") if (is.null(labs)) sapply(phy, is.ultrametric.phylo, tol = tol, option = option) else sapply(phy, .is.ultrametric_ape, tol = tol, option = option, n = length(labs)) } ape/R/unique.multiPhylo.R0000644000176200001440000000175014157513250015024 0ustar liggesusers## unique.multiPhylo.R (2021-12-19) ## Revomes Duplicate Trees from a List ## Copyright 2007-2021 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. unique.multiPhylo <- function(x, incomparables = FALSE, use.edge.length = FALSE, use.tip.label = TRUE, ...) { n <- length(x) ## fixed by Martin: if (n == 0L) return(x) if (n == 1L) return(structure(x, old.index = 1L)) keep <- 1L old.index <- seq_len(n) for (i in 2:n) { already.seen <- FALSE for (j in keep) { if (all.equal(x[[j]], x[[i]], use.edge.length = use.edge.length, use.tip.label = use.tip.label)) { already.seen <- TRUE old.index[i] <- j break } } if (!already.seen) keep <- c(keep, i) } res <- x[keep] attr(res, "old.index") <- old.index res } ape/R/multi2di.R0000644000176200001440000001424714230417641013106 0ustar liggesusers## multi2di.R (2022-04-22) ## Collapse or Resolve Multichotomies ## Copyright 2005-2021 Emmanuel Paradis, 2018-2022 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. multi2di <- function(phy, ...) UseMethod("multi2di") .multi2di_ape <- function(phy, random, equiprob, n) { ## n: number of tips of phy degree <- tabulate(phy$edge[, 1]) target <- which(degree > 2) if (!length(target)) return(phy) phy <- .reorder_ape(phy, "postorder", FALSE, n, 2L) # by Klaus pos <- match(target, phy$edge[,1]) nb.edge <- dim(phy$edge)[1] nextnode <- n + phy$Nnode + 1L new.edge <- edge2delete <- NULL wbl <- FALSE if (!is.null(phy$edge.length)) { wbl <- TRUE new.edge.length <- NULL } if (random) { if (equiprob) { FUN <- function(N) { x <- rtopology(N, rooted = TRUE)$edge desc <- x[, 2L] x[desc <= N, 2L] <- seq_len(N) x } } else { FUN <- function(N) rtree(N)$edge } } for (i in seq_along(target)) { node <- target[i] N <- degree[node] ind <- pos[i] : (pos[i]+N-1L) desc <- phy$edge[ind, 2] if (random) { ## if we shuffle the descendants, we need to eventually ## reorder the corresponding branch lenghts (see below) ## so we store the result of sample() tmp <- sample(length(desc)) desc <- desc[tmp] res <- FUN(N) } else { res <- matrix(0L, 2*N - 2, 2) res[, 1] <- N + rep(1:(N - 1), each = 2) res[, 2] <- N + rep(2:N, each = 2) res[seq(1, by = 2, length.out = N - 1), 2] <- 1:(N - 1) res[length(res)] <- N } if (wbl) { ## keep the branch lengths coming from `node' el <- numeric(dim(res)[1]) # initialized with 0's el[res[, 2] <= N] <- if (random) phy$edge.length[ind][tmp] else phy$edge.length[ind] } ## now substitute the nodes in `res' ## `node' stays at the "root" of these new ## edges whereas their "tips" are `desc' Nodes <- c(node, nextnode:(nextnode + N - 3L)) res[, 1] <- Nodes[res[, 1] - N] tmp <- res[, 2] > N res[tmp, 2] <- Nodes[res[tmp, 2] - N] res[!tmp, 2] <- desc[res[!tmp, 2]] new.edge <- rbind(new.edge, res) edge2delete <- c(edge2delete, ind) if (wbl) new.edge.length <- c(new.edge.length, el) nextnode <- nextnode + N - 2L phy$Nnode <- phy$Nnode + N - 2L } phy$edge <- rbind(phy$edge[-edge2delete, ], new.edge) if (wbl) phy$edge.length <- c(phy$edge.length[-edge2delete], new.edge.length) if (!is.null(attr(phy, "order"))) attr(phy, "order") <- NULL if (!is.null(phy$node.label)) phy$node.label <- c(phy$node.label, rep("", phy$Nnode - length(phy$node.label))) phy <- .reorder_ape(phy, "cladewise", FALSE, n, 1L) # fix by Klaus (2017-01-16) ## the node numbers are not in increasing order in edge[, 2]: this ## will confuse drop.tip and other functions (root), so renumber them newNb <- integer(phy$Nnode) newNb[1] <- n + 1L sndcol <- phy$edge[, 2] > n ## reorder node labels before changing edge: if (!is.null(phy$node.label)) { o <- 1 + rank(phy$edge[sndcol, 2]) ## the root's label is not changed: phy$node.label <- phy$node.label[c(1, o)] } ## executed from right to left, so newNb is modified before phy$edge: phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2] - n] <- n + 2:phy$Nnode phy$edge[, 1] <- newNb[phy$edge[, 1] - n] phy } multi2di.phylo <- function (phy, random = TRUE, equiprob = TRUE, ...) .multi2di_ape(phy, random, equiprob = equiprob, length(phy$tip.label)) multi2di.multiPhylo <- function(phy, random = TRUE, equiprob = TRUE, ...) { labs <- attr(phy, "TipLabel") oc <- oldClass(phy) class(phy) <- NULL if (is.null(labs)) phy <- lapply(phy, multi2di.phylo, random = random, equiprob = equiprob) else { phy <- lapply(phy, .multi2di_ape, random = random, equiprob = equiprob, n = length(labs)) attr(phy, "TipLabel") <- labs } class(phy) <- oc phy } di2multi <- function(phy, ...) UseMethod("di2multi") ## by Klaus (2018-05-28, 2022-03-24) .di2multi_ape <- function(phy, tol = 1e-08, ntips, tip2root = FALSE) { if (is.null(phy$edge.length)) stop("the tree has no branch length") phy <- .reorder_ape(phy, "cladewise", FALSE, ntips, 1L) # by Klaus e1 <- seq_len(max(phy$edge)) ind <- which(phy$edge.length < tol & phy$edge[, 2] > ntips) n <- length(ind) if (!n) return(phy) ## new 24.3.22 if (tip2root) { ## nh <- node.depth.edgelength(phy) phy_tmp <- .reorder_ape(phy, "postorder", FALSE, ntips, 2L) m <- phy_tmp$Nnode nh <- .C(node_depth_edgelength, as.integer(phy_tmp$edge[, 1]), as.integer(phy_tmp$edge[, 2]), as.integer(nrow(phy_tmp$edge)), as.double(phy_tmp$edge.length), double(ntips + m))[[5]] } for (i in ind) e1[phy$edge[i,2]] <- e1[phy$edge[i,1]] phy$edge[, 1] <- e1[phy$edge[, 1]] node2del <- phy$edge[ind, 2] phy$edge <- phy$edge[-ind, ] phy$edge.length <- if (tip2root) nh[phy$edge[, 2]] - nh[phy$edge[, 1]] else phy$edge.length[-ind] phy$Nnode <- phy$Nnode - n e1 <- sort(unique(phy$edge[, 1])) tmp <- integer(max(phy$edge)) tmp[e1] <- ntips + seq_len(phy$Nnode) tmp[1:ntips] <- seq_len(ntips) phy$edge[] <- tmp[phy$edge] if (!is.null(phy$node.label)) phy$node.label <- phy$node.label[-(node2del - ntips)] phy } di2multi.phylo <- function (phy, tol = 1e-08, ...) .di2multi_ape(phy, tol, length(phy$tip.label), ...) di2multi.multiPhylo <- function(phy, tol = 1e-08, ...) { labs <- attr(phy, "TipLabel") oc <- oldClass(phy) class(phy) <- NULL if (is.null(labs)) phy <- lapply(phy, di2multi.phylo, tol = tol, ...) else { phy <- lapply(phy, .di2multi_ape, tol = tol, ntips = length(labs), ...) attr(phy, "TipLabel") <- labs } class(phy) <- oc phy } ape/R/is.compatible.R0000644000176200001440000000145014533611654014104 0ustar liggesusers## is.compatible.R (2017-06-03) ## Check Compatibility of Splits ## Copyright 2011 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.compatible <- function(obj) UseMethod("is.compatible") is.compatible.bitsplits <- function(obj) { m <- obj$matsplit n <- ncol(m) ntaxa <- length(obj$labels) for (i in 1:(n - 1)) for (j in (i + 1):n) if (!arecompatible(m[, i], m[, j], ntaxa)) return(FALSE) TRUE } arecompatible <-function(x, y, n) { msk <- !as.raw(2^(8 - (n %% 8)) - 1) foo <- function(v) { lv <- length(v) v[lv] <- v[lv] & msk as.integer(all(v == as.raw(0))) } nE <- foo(x & y) + foo(x & !y) + foo(!x & y) + foo(!x & !y) nE >= 1 } ape/R/makeLabel.R0000644000176200001440000001402013551115316013216 0ustar liggesusers## makeLabel.R (2019-10-14) ## Label Management ## Copyright 2010-2019 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. makeLabel <- function(x, ...) UseMethod("makeLabel") makeLabel.character <- function(x, len = 99, space = "_", make.unique = TRUE, illegal = "():;,[]", quote = FALSE, ...) { x <- gsub("[[:space:]]", space, x) if (illegal != "") { illegal <- unlist(strsplit(illegal, NULL)) for (i in illegal) x <- gsub(i, "", x, fixed = TRUE) } if (quote) len <- len - 2 nc <- nchar(x) > len if (any(nc)) x[nc] <- substr(x[nc], 1, len) tab <- table(x) if (all(tab == 1)) make.unique <- FALSE if (make.unique) { dup <- tab[which(tab > 1)] nms <- names(dup) for (i in 1:length(dup)) { j <- which(x == nms[i]) end <- nchar(x[j][1]) ## w: number of characters to be added as suffix w <- floor(log10(dup[i])) + 1 suffix <- formatC(1:dup[i], width = w, flag = "0") if (end + w > len) { start <- end - w + 1 substr(x[j], start, end) <- suffix } else x[j] <- paste(x[j], suffix, sep = "") } } if (quote) x <- paste('"', x, '"', sep = "") x } makeLabel.phylo <- function(x, tips = TRUE, nodes = TRUE, ...) { if (tips) x$tip.label <- makeLabel.character(x$tip.label, ...) if (!is.null(x$node.label) && nodes) x$node.label <- makeLabel.character(x$node.label, ...) x } makeLabel.multiPhylo <- function(x, tips = TRUE, nodes = TRUE, ...) { y <- attr(x, "TipLabel") if (is.null(y)) { for (i in 1:length(x)) x[[i]] <- makeLabel.phylo(x[[i]], tips = tips, nodes = nodes, ...) } else { attr(x, "TipLabel") <- makeLabel.character(y, ...) } x } makeLabel.DNAbin <- function(x, ...) { if (is.list(x)) names(x) <- makeLabel.character(names(x), ...) else rownames(x) <- makeLabel.character(rownames(x), ...) x } mixedFontLabel <- function(..., sep = " ", italic = NULL, bold = NULL, parenthesis = NULL, always.upright = c("sp.", "spp.", "ssp.")) { x <- list(...) n <- length(x) if (!is.null(italic)) { for (i in italic) { y <- x[[i]] s <- ! y %in% always.upright y[s] <- paste("italic(\"", y[s], "\")", sep = "") if (any(!s)) y[!s] <- paste("plain(\"", y[!s], "\")", sep = "") x[[i]] <- y } } if (!is.null(bold)) { for (i in bold) { y <- x[[i]] s <- logical(length(y)) s[grep("^italic", y)] <- TRUE y[s] <- sub("^italic", "bolditalic", y[s]) y[!s] <- paste("bold(\"", y[!s], "\")", sep = "") x[[i]] <- y } } k <- which(! 1:n %in% c(italic, bold)) # those in upright if (length(k)) for (i in k) x[[i]] <- paste("plain(\"", x[[i]], "\")", sep = "") if (!is.null(parenthesis)) for (i in parenthesis) x[[i]] <- paste("(", x[[i]], ")", sep = "") res <- x[[1L]] if (n > 1) { sep <- rep(sep, length.out = n - 1L) for (i in 2:n) res <- paste(res, "*\"", sep[i - 1L], "\"*", x[[i]], sep = "") } parse(text = res) } .getSeparatorTaxaLabels <- function(x) { if (length(grep("_", x))) "_" else " " } label2table <- function(x, sep = NULL, as.is = FALSE) { n <- length(x) if (is.null(sep)) sep <- .getSeparatorTaxaLabels(x) x <- strsplit(x, sep) maxlen <- max(lengths(x)) x <- unlist(lapply(x, "[", 1:maxlen)) x <- matrix(x, n, maxlen, byrow = TRUE) x <- as.data.frame(x, as.is = as.is) baselevels <- c("genus", "species", "subspecies") nmx <- if (maxlen <= 3) baselevels[1:maxlen] else c(baselevels, paste0("type", 1:(maxlen - 3))) names(x) <- nmx x } stripLabel <- function(x, species = FALSE, subsp = TRUE, sep = NULL) { if (is.null(sep)) sep <- .getSeparatorTaxaLabels(x) n <- 0 if (species) n <- 1 else if (subsp) n <- 2 if (!n) return(x) x <- strsplit(x, sep) x <- lapply(x, "[", 1:n) sapply(x, paste, collapse = sep) } abbreviateGenus <- function(x, genus = TRUE, species = FALSE, sep = NULL) { if (is.null(sep)) sep <- .getSeparatorTaxaLabels(x) if (genus) x <- sub(paste0("[[:lower:]]{1,}", sep), paste0(".", sep), x) if (!species) return(x) x <- strsplit(x, sep) k <- which(lengths(x, use.names = FALSE) > 1) for (i in k) x[[i]][2] <- paste0(substr(x[[i]][2], 1, 1), ".") sapply(x, paste, collapse = sep) } updateLabel <- function(x, old, new, ...) UseMethod("updateLabel") updateLabel.character <- function(x, old, new, exact = TRUE, ...) { if (length(old) != length(new)) stop("'old' and 'new' not of the same length") if (exact) { for (i in seq_along(old)) x[x == old[i]] <- new[i] } else { for (i in seq_along(old)) x[grep(old[i], x)] <- new[i] } x } updateLabel.DNAbin <- function(x, old, new, exact = TRUE, ...) { labs <- labels(x) labs <- updateLabel.character(labs, old, new, exact, ...) if (is.list(x)) names(x) <- labs else rownames(x) <- labs x } updateLabel.AAbin <- function(x, old, new, exact = TRUE, ...) updateLabel.DNAbin(x, old, new, exact, ...) updateLabel.phylo <- function(x, old, new, exact = TRUE, nodes = FALSE, ...) { x$tip.label <- updateLabel.character(x$tip.label, old, new, exact, ...) if (nodes) x$node.label <- updateLabel.character(x$node.label, old, new, exact, ...) x } updateLabel.evonet <- function(x, old, new, exact = TRUE, nodes = FALSE, ...) updateLabel.phylo(x, old, new, exact, nodes, ...) updateLabel.data.frame <- function(x, old, new, exact = TRUE, ...) { row.names(x) <- updateLabel.character(row.names(x), old, new, exact, ...) x } updateLabel.matrix <- function(x, old, new, exact = TRUE, ...) { rownames(x) <- updateLabel.character(rownames(x), old, new, exact, ...) x } ape/R/checkValidPhylo.R0000644000176200001440000000775613276335105014440 0ustar liggesusers## checkValidPhylo.R (2016-07-26) ## Check the Structure of a "phylo" Object ## Copyright 2015-2016 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. checkValidPhylo <- function(phy) { cat("Starting checking the validity of ", deparse(substitute(phy)), "...\n", sep = "") n <- m <- NULL if (is.null(phy$tip.label)) { cat(" FATAL: no element named 'tip.label' in the tree -- did you extract this tree from a \"multiPhylo\" object?\n") } else { if (!is.vector(phy$tip.label)) { cat(" FATAL: 'tip.label' is not a vector\n") } else { if (!is.character(phy$tip.label)) cat(" MODERATE: 'tip.label' is not of mode \"character\"\n") n <- length(phy$tip.label) cat("Found number of tips: n =", n, "\n") } } if (is.null(n)) cat(" FATAL: cannot determine the number of tips\n") if (is.null(phy$Nnode)) { cat(" FATAL: no element named 'Nnode' in the tree\n") } else { if (!is.vector(phy$Nnode)) cat(" MODERATE: 'Nnode' is not a vector\n") if (length(phy$Nnode) != 1) cat(" FATAL: 'Nnode' is not of length 1\n") if (!is.numeric(phy$Nnode)) { cat(" FATAL: 'Nnode' is not numeric\n") } else { if (storage.mode(phy$Nnode) != "integer") cat(" MODERATE: 'Nnode' is not stored as an integer\n") } if (length(phy$Nnode) == 1 && is.numeric(phy$Nnode)) { m <- phy$Nnode cat("Found number of nodes: m =", m, "\n") } } if (is.null(m)) cat(" FATAL: cannot determine the number of nodes\n") if (is.null(phy$edge)) { cat(" FATAL: no element named 'edge' in the tree\n") } else { if (!is.matrix(phy$edge)) { cat(" FATAL: 'edge' is not a matrix\n") } else { nc <- ncol(phy$edge) if (nc != 2) cat(" FATAL: 'edge' has", nc, "columns: it MUST have 2\n") if (!is.numeric(phy$edge)) { cat(" FATAL: 'edge' is not a numeric matrix\n") } else { if (storage.mode(phy$edge) != "integer") cat(" MODERATE: the matrix 'edge' is not stored as integers\n") if (nc == 2) { if (any(phy$edge <= 0)) cat(" FATAL: some elements in 'edge' are negative or zero\n") if (is.null(n) || is.null(m)) { cat("The number of tips and/or nodes was not found: cannot check completely the 'edge' matrix\n") } else { tab <- tabulate(phy$edge) if (length(tab) > n + m) cat(" FATAL: some numbers in 'edge' are larger than 'n + m'\n") if (length(tab) < n + m) cat(" MODERATE: some nodes are missing in 'edge'\n") if (any(tab[1:n] != 1)) cat(" FATAL: each tip must appear once in 'edge'\n") if (any(tab[n + 1:m] < 2)) cat(" FATAL: all nodes should appear at least twice in 'edge'\n") if (m > 1) if (any(tab[n + 2:m] < 2)) cat(" MODERATE: some nodes are of degree 1 or less\n") if (any(phy$edge[, 1] <= n & phy$edge[, 1] > 0)) cat(" FATAL: tips should not appear in the 1st column of 'edge'\n") if (any(table(phy$edge[, 2]) > 1)) cat(" FATAL: nodes and tips should appear only once in the 2nd column of 'edge'\n") if (any(phy$edge[, 2] == n + 1L)) cat(" FATAL: the root node should not appear in the 2nd column of 'edge'\n") } } } } } cat("Done.\n") } ape/R/treePop.R0000644000176200001440000000132012465112403012753 0ustar liggesusers## treePop.R (2011-10-11) ## Tree Popping ## Copyright 2011 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. treePop <- function(obj) { mf <- obj$matsplit labels <- obj$labels n <- length(labels) imf <- as.integer(mf) freq <- obj$freq mimf <- matrix(imf, nrow(mf), ncol(mf)) ans <- .C(C_treePop, mimf, as.double(freq), as.integer(ncol(mf)), as.integer(n), integer(2*n - 3), integer(2*n - 3), double(2*n - 3), NAOK = TRUE) obj <- list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]], tip.label = labels, Nnode = n - 2L) class(obj) <- "phylo" reorder(obj) } ape/R/mantel.test.R0000644000176200001440000000273613434732467013626 0ustar liggesusers## mantel.test.R (2019-02-25) ## Mantel Test for Similarity of Two Matrices ## Copyright 2002-2011 Ben Bolker and Julien Claude, 2019 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. perm.rowscols <- function(m1, n) { s <- sample(1:n) m1[s, s] } ## calculate the Mantel z-statistic for two square matrices m1 and m2 ## old code: ## mant.zstat <- function(m1, m2) sum(lower.triang(m1 * m2)) ## modified by EP following suggestion by Andrzej Galecki (2018-02-07) mant.zstat <- function(m1, m2) { diag(m1) <- diag(m2) <- 0 # in case the diagonals are not 0 sum(m1 * m2)/2 } mantel.test <- function(m1, m2, nperm = 999, graph = FALSE, alternative = "two.sided", ...) { alternative <- match.arg(alternative, c("two.sided", "less", "greater")) n <- nrow(m1) realz <- mant.zstat(m1, m2) nullstats <- replicate(nperm, mant.zstat(m1, perm.rowscols(m2, n))) pval <- switch(alternative, "two.sided" = 2 * min(sum(nullstats >= realz), sum(nullstats <= realz)), "less" = sum(nullstats <= realz), "greater" = sum(nullstats >= realz)) pval <- (pval + 1) / (nperm + 1) # 'realz' is included in 'nullstats' if (alternative == "two.sided" && pval > 1) pval <- 1 if (graph) { plot(density(nullstats), type = "l", ...) abline(v = realz) } list(z.stat = realz, p = pval, alternative = alternative) } ape/R/diversi.gof.R0000644000176200001440000000455012465112403013564 0ustar liggesusers## diversi.gof.R (2006-10-16) ## Tests of Constant Diversification Rates ## Copyright 2002-2006 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. diversi.gof <- function(x, null = "exponential", z = NULL) { n <- length(x) if (null == "exponential") { delta <- n/sum(x) z <- 1 - exp(-delta * sort(x)) } else { nmsz <- deparse(substitute(z)) z <- sort(z) # utile ??? } i <- 1:n W2 <- sum((z - (2*i - 1)/(2*n))^2) + 1/12*n A2 <- -sum((2*i - 1)*(log(z) + log(1 - rev(z))))/n - n if (null == "exponential") { W2 <- W2*(1 - 0.16/n) A2 <- A2*(1 + 0.6/n) } else W2 <- (W2 - 0.4/n + 0.6/n^2)/(1 + 1/n) cat("\nTests of Constant Diversification Rates\n\n") cat("Data:", deparse(substitute(x)), "\n") cat("Number of branching times:", n, "\n") cat("Null model: ") if (null == "exponential") cat("exponential\n\n") else cat(nmsz, "(user-specified)\n\n") cat("Cramer-von Mises test: W2 =", round(W2, 3)) if (null == "exponential") { if (W2 < 0.177) cat(" P > 0.1\n") if (W2 >= 0.177 && W2 < 0.224) cat(" 0.05 < P < 0.1\n") if (W2 >= 0.224 && W2 < 0.273) cat(" 0.025 < P < 0.05\n") if (W2 >= 0.273 && W2 < 0.337) cat(" 0.01 < P < 0.025\n") if (W2 > 0.337) cat(" P < 0.01\n") } else { if (W2 < 0.347) cat(" P > 0.1\n") if (W2 >= 0.347 && W2 < 0.461) cat(" 0.05 < P < 0.1\n") if (W2 >= 0.461 && W2 < 0.581) cat(" 0.025 < P < 0.05\n") if (W2 >= 0.581 && W2 < 0.743) cat(" 0.01 < P < 0.025\n") if (W2 > 0.743) cat(" P < 0.01\n") } cat("Anderson-Darling test: A2 =", round(A2, 3)) if (null == "exponential") { if (A2 < 1.078) cat(" P > 0.1\n") if (A2 >= 1.078 && A2 < 1.341) cat(" 0.05 < P < 0.1\n") if (A2 >= 1.341 && A2 < 1.606) cat(" 0.025 < P < 0.05\n") if (A2 >= 1.606 && A2 < 1.957) cat(" 0.01 < P < 0.025\n") if (A2 > 1.957) cat(" P < 0.01\n") } else { if (A2 < 1.933) cat(" P > 0.1\n") if (A2 >= 1.933 && A2 < 2.492) cat(" 0.05 < P < 0.1\n") if (A2 >= 2.492 && A2 < 3.070) cat(" 0.025 < P < 0.05\n") if (A2 >= 3.070 && A2 < 3.857) cat(" 0.01 < P < 0.025\n") if (A2 > 3.857) cat(" P < 0.01\n") } } ape/R/rtree.R0000644000176200001440000002525313754005020012467 0ustar liggesusers## rtree.R (2020-11-14) ## Generates Trees ## Copyright 2004-2020 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .N <- unname(howmanytrees(792, labeled = FALSE, detail = TRUE)) .lN <- log10(.N) .xi <- 2.477993 .lxi <- log10(.xi) .log10sumxipow0to9 <- log10(sum(.xi^(0:9))) .getProb4rtree <- function(n) { a <- 1:floor(n/2) b <- n - a x <- .N[a] * .N[b] x <- x / max(x) p <- x / sum(x) if (all(is.finite(p))) return(p) ## use the log10-scale foo <- function(n) 0.3941 * n - 4.153 lNa <- .lN[a] lNa[a > 792] <- foo(a[a > 792]) lNb <- .lN[b] lNb[b > 792] <- foo(b[b > 792]) lx <- lNa + lNb # log10-scale ## we use the first 10 terms of the approximation (see the vignette) sx <- (n - 1) * .lxi + (n - 10) * .lxi + .log10sumxipow0to9 p <- lx - sx p <- p - min(p) # rescale p / sum(p) } rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, equiprob = FALSE, ...) { ## as.integer(runif()) is more efficient than sample.int() but we ## have to keep the latter for the default of 'equiprob' because ## this is used in other packages with set.seed() if (equiprob) { bar <- function(n) { if (n < 4L) return(1L) p <- .getProb4rtree(n) sample.int(floor(n / 2), 1L, FALSE, p, FALSE) ##if (n < 4L) return(1L) ##as.integer(runif(1L, 0, floor(n / 2))) + 1L } } else { bar <- function(n) sample.int(n - 1L, 1L, FALSE, NULL, FALSE) } foo <- function(n, pos) { n1 <- bar(n) n2 <- n - n1 po2 <- pos + 2L * n1 - 1L edge[c(pos, po2), 1L] <<- nod nod <<- nod + 1L if (n1 > 2L) { edge[pos, 2L] <<- nod foo(n1, pos + 1L) } else if (n1 == 2L) { edge[pos + 1:2, 1L] <<- edge[pos, 2L] <<- nod nod <<- nod + 1L } if (n2 > 2L) { edge[po2, 2L] <<- nod foo(n2, po2 + 1L) } else if (n2 == 2L) { edge[po2 + 1:2, 1L] <<- edge[po2, 2L] <<- nod nod <<- nod + 1L } } if (n < 1) stop("a tree must have at least 1 tip") if (n < 3 && !rooted) stop("an unrooted tree must have at least 3 tips") n <- as.integer(n) ## make the tip labels: if (is.null(tip.label)) { tip.label <- paste0("t", 1:n) } else { tip.label <- as.character(tip.label) Nlabs <- length(tip.label) if (!Nlabs) { warning("vector 'tip.label' of length zero: generating tip labels") tip.label <- paste0("t", seq_len(n)) } else if (Nlabs > n) { warning("vector 'tip.label' longer than 'n': was shorten") tip.label <- tip.label[1:n] } else if (Nlabs < n) { warning("vector 'tip.label' shorter than 'n': was recycled") tip.label <- rep(tip.label, length.out = n) } } if (n == 1L) { # rooted case with n = 1 nbr <- 1L edge <- matrix(2:1, 1L, 2L) } else { # all other cases nbr <- 2L * n - 3L + rooted edge <- matrix(NA_integer_, nbr, 2L) } if (rooted) { if (n == 2L) { edge[] <- c(3L, 3L, 1L, 2L) } else if (n == 3L) { edge[] <- c(4L, 5L, 5L, 4L, 5L, 1:3) } else if (n > 3L) { nod <- n + 1L foo(n, 1L) ## slightly more efficient than affecting the tip numbers in foo(): i <- which(is.na(edge[, 2L])) edge[i, 2L] <- 1:n } } else { # unrooted case if (n == 3L) { edge[] <- c(4L, 4L, 4L, 1:3) } else if (n == 4L) { edge[] <- c(5L, 6L, 6L, 5L, 5L, 6L, 1:4) } else if (n == 5L) { edge[] <- c(6L, 6L, 6L, 7L, 7L, 8L, 8L, 1L, 2L, 7L, 3L, 8L, 4L, 5L) } else { # n > 5 ## generate a rooted tree without branch lengths and unroot it phy <- rtree(n, tip.label = tip.label, br = NULL, equiprob = equiprob, ...) phy <- .unroot_ape(phy, n) } } if (!exists("phy", inherits = FALSE)) { phy <- list(edge = edge, tip.label = sample(tip.label)) phy$Nnode <- if (n == 1L) 1L else n - 2L + as.integer(rooted) class(phy) <- "phylo" attr(phy, "order") <- "cladewise" } if (!is.null(br)) { phy$edge.length <- if (is.function(br)) br(nbr, ...) else rep(br, length.out = nbr) } phy } rcoal <- function(n, tip.label = NULL, br = "coalescent", ...) { n <- as.integer(n) nbr <- 2*n - 2 edge <- matrix(NA, nbr, 2) ## coalescence times by default: x <- if (is.character(br)) 2*rexp(n - 1)/(as.double(n:2) * as.double((n - 1):1)) else if (is.numeric(br)) rep(br, length.out = n - 1) else br(n - 1, ...) if (n == 2) { edge[] <- c(3L, 3L, 1:2) edge.length <- rep(x, 2) } else if (n == 3) { edge[] <- c(4L, 5L, 5L, 4L, 5L, 1:3) edge.length <- c(x[c(2, 1, 1)], sum(x)) } else { edge.length <- numeric(nbr) h <- numeric(2*n - 1) node.height <- cumsum(x) pool <- 1:n nextnode <- 2L*n - 1L for (i in 1:(n - 1)) { y <- sample(pool, size = 2) ind <- (i - 1)*2 + 1:2 edge[ind, 2] <- y edge[ind, 1] <- nextnode edge.length[ind] <- node.height[i] - h[y] h[nextnode] <- node.height[i] pool <- c(pool[! pool %in% y], nextnode) nextnode <- nextnode - 1L } } phy <- list(edge = edge, edge.length = edge.length) if (is.null(tip.label)) tip.label <- paste("t", 1:n, sep = "") phy$tip.label <- sample(tip.label) phy$Nnode <- n - 1L class(phy) <- "phylo" phy <- reorder(phy) ## to avoid crossings when converting with as.hclust: phy$edge[phy$edge[, 2] <= n, 2] <- 1:n phy } rmtree <- function(N, n, rooted = TRUE, tip.label = NULL, br = runif, equiprob = FALSE, ...) { a <- replicate(N, rtree(n, rooted = rooted, tip.label = tip.label, br = br, equiprob = equiprob, ...), simplify = FALSE) class(a) <- "multiPhylo" a } stree <- function(n, type = "star", tip.label = NULL) { type <- match.arg(type, c("star", "balanced", "left", "right")) n <- as.integer(n) if (type == "star") { N <- n m <- 1L } else { m <- n - 1L N <- n + m - 1L } edge <- matrix(0L, N, 2) switch(type, "star" = { edge[, 1] <- n + 1L edge[, 2] <- 1:n }, "balanced" = { if (log2(n) %% 1) stop("'n' is not a power of 2: cannot make a balanced tree") foo <- function(node, size) { if (size == 2) { edge[c(i, i + 1L), 1L] <<- node edge[c(i, i + 1L), 2L] <<- c(nexttip, nexttip + 1L) nexttip <<- nexttip + 2L i <<- i + 2L } else { for (k in 1:2) { # do the 2 subclades edge[i, ] <<- c(node, nextnode) nextnode <<- nextnode + 1L i <<- i + 1L foo(nextnode - 1L, size/2) } } } i <- 1L nexttip <- 1L nextnode <- n + 2L foo(n + 1L, n) }, "left" = { edge[c(seq.int(from = 1, to = N - 1, by = 2), N), 2L] <- 1:n nodes <- (n + 1L):(n + m) edge[seq.int(from = 2, to = N - 1, by = 2), 2L] <- nodes[-1] edge[, 1L] <- rep(nodes, each = 2) }, "right" = { nodes <- (n + 1L):(n + m) edge[, 1L] <- c(nodes, rev(nodes)) edge[, 2L] <- c(nodes[-1], 1:n) }) if (is.null(tip.label)) tip.label <- paste("t", 1:n, sep = "") phy <- list(edge = edge, tip.label = tip.label, Nnode = m) class(phy) <- "phylo" attr(phy, "order") <- "cladewise" phy } .check.tip.label <- function(tip.label, n, prefix = "t") { if (is.null(tip.label)) return(paste0(prefix, seq_len(n))) tip.label <- as.character(tip.label) Nlabs <- length(tip.label) if (!Nlabs) { warning("vector 'tip.label' of length zero: generating tip labels", call. = FALSE) return(paste0(prefix, seq_len(n))) } if (Nlabs > n) { warning("vector 'tip.label' longer than 'n': was shorten", call. = FALSE) return(tip.label[1:n]) } if (Nlabs < n) { warning("vector 'tip.label' shorter than 'n': was recycled", call. = FALSE) return(rep(tip.label, length.out = n)) } tip.label } rtopology <- function(n, rooted = FALSE, tip.label = NULL, br = runif, ...) { n <- as.integer(n) if (n < 1) stop("a tree must have at least 1 tip") if (n < 3 && !rooted) stop("an unrooted tree must have at least 3 tips") if (n < 4) return(rtree(n, rooted = rooted, tip.label = tip.label, br = br, ...)) nb <- n - 3L x <- as.integer(runif(nb) * seq(3, by = 2, length.out = nb)) + 1L tip.label <- .check.tip.label(tip.label, n) Nnode <- n - 2L TIPS <- sample.int(n) # permute the tips beforehand N <- 3L * n - 6L edge <- matrix(NA_integer_, N, 2L) alive <- logical(N) alive[1:3] <- TRUE Nalive <- 3L e <- 1:3 ROOT <- n + 1L edge[1:3] <- ROOT nextnode <- ROOT + 1L edge[1:3 + N] <- TIPS[1:3] i <- 4L while (i <= n) { ## draw a branch among the alive ones k <- which(alive)[x[i - 3L]] # find its location in edge alive[k] <- FALSE # delete 1 branch e <- e + 3L # add 3 new branches alive[e] <- TRUE edge[e[1]] <- edge[k] edge[e[1] + N] <- nextnode edge[e[2:3]] <- nextnode edge[e[2] + N] <- edge[k + N] edge[e[3] + N] <- TIPS[i] nextnode <- nextnode + 1L Nalive <- Nalive + 2L i <- i + 1L } edge <- edge[alive, ] phy <- list(edge = edge, tip.label = tip.label, Nnode = Nnode) class(phy) <- "phylo" phy <- reorder(phy) if (rooted) { ## exclude the root partition and add the terminal trivial ## partitions og <- sample.int(n + Nnode - 1L, 1L) if (og > n) { pp <- prop.part(phy)[-1L] og <- pp[[og - n]] } phy <- root.phylo(phy, og, resolve.root = TRUE) } nbr <- Nedge.phylo(phy) if (!is.null(br)) { phy$edge.length <- if (is.function(br)) br(nbr, ...) else rep(br, length.out = nbr) } phy } rmtopology <- function(N, n, rooted = FALSE, tip.label = NULL, br = runif, ...) { a <- replicate(N, rtopology(n, rooted = rooted, tip.label = tip.label, br = br, ...), simplify = FALSE) class(a) <- "multiPhylo" a } ape/R/plot.phyloExtra.R0000644000176200001440000000371513327065734014477 0ustar liggesusers## plot.phyloExtra.R (2018-07-28) ## Extra Functions for Plotting and Annotating ## Copyright 2016-2018 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. plotBreakLongEdges <- function(phy, n = 1, ...) { o <- order(phy$edge.length, decreasing = TRUE) i <- o[seq_len(n)] phy$edge.length[i] <- max(phy$edge.length[-i]) plot.phylo(phy, ...) edgelabels(edge = i, pch = 19, col = "white") edgelabels("//", i, frame = "n") } drawSupportOnEdges <- function(value, ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) n <- lastPP$Ntip m <- lastPP$Nnode if (length(value) == m) value <- value[-1] else if (length(value) != m - 1) stop("incorrect number of support values") nodes <- 2:m + n i <- match(nodes, lastPP$edge[, 2]) edgelabels(value, i, ...) } plotTreeTime <- function(phy, tip.dates, show.tip.label = FALSE, y.lim = NULL, color = TRUE, ...) { n <- Ntip(phy) if (length(tip.dates) != n) stop("number of dates does not match number of tips of the tree") if (is.null(y.lim)) y.lim <- c(-n/4, n) plot(phy, show.tip.label = show.tip.label, y.lim = y.lim, ...) psr <- par("usr") if (anyNA(tip.dates)) { s <- which(!is.na(tip.dates)) tip.dates <- tip.dates[s] } else s <- 1:n range.dates <- range(as.numeric(tip.dates)) diff.range <- range.dates[2] - range.dates[1] footrans <- function(x) psr[2] * (as.numeric(x) - range.dates[1]) / diff.range x1 <- footrans(tip.dates) y1 <- psr[3] lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) x2 <- lastPP$xx[s] y2 <- lastPP$yy[s] x1.scaled <- x1 / max(x1) col <- if (color) rgb(x1.scaled, 0, 1 - x1.scaled, alpha = .5) else grey(x1.scaled, alpha = 0.5) segments(x1, y1, x2, y2, col = col) at <- pretty(tip.dates) axis(1, at = footrans(at), labels = at) } ape/R/parafit.R0000644000176200001440000001211314533612157012776 0ustar liggesusers'parafit' <- function(host.D, para.D, HP, nperm=999, test.links=FALSE, seed=NULL, correction="none", silent=FALSE) # # Test of host-parasite coevolution # host.D = host distance or patristic matrix (class dist or matrix) # para.D = parasite distance or patristic matrix (class dist or matrix) # HP = host-parasite link matrix (n.host, n.para) # # Pierre Legendre, May 2009 { epsilon <- sqrt(.Machine$double.eps) if(is.null(seed)) { runif(1) seed <- .Random.seed[trunc(runif(1,1,626))] } HP <- as.matrix(HP) host.D <- as.matrix(host.D) host.pc <- pcoa(host.D, correction=correction) if(host.pc$correction[2] == 1) { if(min(host.pc$values[,2]) < -epsilon) stop('Host D matrix has negative eigenvalues. Rerun with correction="lingoes" or correction="cailliez"') sum.host.values.sq <- sum(host.pc$values[,1]^2) host.vectors <- host.pc$vectors } else { sum.host.values.sq <- sum(host.pc$values[,2]^2) host.vectors <- host.pc$vectors.cor } n.host <- nrow(host.D) para.D <- as.matrix(para.D) para.pc <- pcoa(para.D, correction=correction) if(para.pc$correction[2] == 1) { if(min(para.pc$values[,2]) < -epsilon) stop('Parasite D matrix has negative eigenvalues. Rerun with correction="lingoes" or correction="cailliez"') sum.para.values.sq <- sum(para.pc$values[,1]^2) para.vectors <- para.pc$vectors } else { sum.para.values.sq <- sum(para.pc$values[,2]^2) para.vectors <- para.pc$vectors.cor } n.para <- nrow(para.D) if(!silent) cat("n.hosts =", n.host, ", n.parasites =", n.para,'\n') a <- system.time({ tracemax <- max(sum.host.values.sq, sum.para.values.sq) if(n.host == n.para) { if(!silent) cat("The function cannot check if matrix HP has been entered in the right way.",'\n') if(!silent) cat("It will assume that the rows of HP are the hosts.",'\n') } else { temp <- dim(HP) if(temp[1] == n.host) { if(temp[2] != n.para) stop("Matrices host.D, para.D and HP not comformable") } else if(temp[2] == n.host) { if(temp[1] != n.para) stop("Matrices host.D, para.D and HP not comformable") HP <- t(HP) if(!silent) cat("Matrix HP has been transposed for comformity with host.D and para.D.",'\n') } else { stop("Matrices host.D, para.D and HP not comformable") } } p.per.h <- apply(HP, 1, sum) h.per.p <- apply(HP, 2, sum) # # Compute and test the global statistics mat.4 <- t(host.vectors) %*% HP %*% para.vectors global <- sum(mat.4^2) if(nperm > 0) { set.seed(seed) nGT <- 1 global.perm <- NA for(i in 1:nperm) { HP.perm <- apply(HP, 2, sample) mat.4.perm <- t(host.vectors) %*% HP.perm %*% para.vectors global.perm <- c(global.perm, sum(mat.4.perm^2)) if(global.perm[i+1] >= global) nGT <- nGT+1 } global.perm <- global.perm[-1] p.global <- nGT/(nperm+1) } else { p.global <- NA } # # Test individual H-P links if(test.links) { # 1. Create the list of H-P pairs list.hp <- which( t(cbind(HP,rep(0,n.host))) > 0) HP.list <- cbind((list.hp %/% (n.para+1))+1, list.hp %% (n.para+1)) colnames(HP.list) <- c("Host","Parasite") n.links <- length(list.hp) stat1 <- NA stat2 <- NA p.stat1 <- NA p.stat2 <- NA for(k in 1:n.links) { # # 2. Compute reference values of link statistics HP.k <- HP HP.k[HP.list[k,1], HP.list[k,2]] <- 0 mat.4.k <- t(host.vectors) %*% HP.k %*% para.vectors trace.k <- sum(mat.4.k^2) stat1 <- c(stat1, (global-trace.k)) den <- tracemax-global if(den > epsilon) { stat2 <- c(stat2, stat1[k+1]/den) } else { stat2 <- c(stat2, NA) } # # 3. Test link statistics by permutations if(nperm > 0) { set.seed(seed) nGT1 <- 1 nGT2 <- 1 nperm2 <- nperm # for(i in 1:nperm) { HP.k.perm <- apply(HP.k, 2, sample) mat.4.k.perm <- t(host.vectors) %*% HP.k.perm %*% para.vectors trace.k.perm <- sum(mat.4.k.perm^2) stat1.perm <- global.perm[i]-trace.k.perm if(stat1.perm >= stat1[k+1]) nGT1 <- nGT1+1 # if(!is.na(stat2[k+1])) { den <- tracemax-global.perm[i] if(den > epsilon) { stat2.perm <- stat1.perm/den if(stat2.perm >= stat2[k+1]) nGT2 <- nGT2+1 } else { nperm2 <- nperm2-1 # if(!silent) cat("In permutation #",i,"den < epsilon",'\n') } } } p.stat1 <- c(p.stat1, nGT1/(nperm+1)) if(!is.na(stat2[k+1])) { p.stat2 <- c(p.stat2, nGT2/(nperm2+1)) } else { p.stat2 <- c(p.stat2, NA) ### Error in previous version, corrected here } } else { p.stat1 <- c(p.stat1, NA) ### Error in previous version, corrected here p.stat2 <- c(p.stat2, NA) ### Error in previous version, corrected here } } # link.table <- cbind(HP.list, stat1[-1], p.stat1[-1], stat2[-1], p.stat2[-1]) colnames(link.table) = c("Host","Parasite","F1.stat","p.F1","F2.stat","p.F2") out <-list(ParaFitGlobal=global, p.global=p.global, link.table=link.table, para.per.host=p.per.h, host.per.para=h.per.p, nperm=nperm) } else { if(!silent) cat("Rerun the program with option 'test.links=TRUE' to test the individual H-P links",'\n') out <-list(ParaFitGlobal=global, p.global=p.global, para.per.host=p.per.h, host.per.para=h.per.p, nperm=nperm) } # }) a[3] <- sprintf("%2f",a[3]) if(!silent) cat("Computation time =",a[3]," sec",'\n') # class(out) <- "parafit" out } ape/R/yule.R0000644000176200001440000000561513002744275012334 0ustar liggesusers## yule.R (2011-11-03) ## Fits Yule Model to a Phylogenetic Tree ## yule: standard Yule model (constant birth rate) ## yule.cov: Yule model with covariates ## Copyright 2003-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. yule <- function(phy, use.root.edge = FALSE) { if (!is.binary.phylo(phy)) stop("tree must be dichotomous to fit the Yule model.") X <- sum(phy$edge.length) nb.node <- phy$Nnode if (!is.null(phy$root.edge) && use.root.edge) X <- X + phy$root.edge else nb.node <- nb.node - 1 lambda <- nb.node/X se <- lambda/sqrt(nb.node) loglik <- -lambda * X + lfactorial(phy$Nnode) + nb.node * log(lambda) obj <- list(lambda = lambda, se = se, loglik = loglik) class(obj) <- "yule" obj } yule.cov <- function(phy, formula, data = NULL) { if (is.null(data)) data <- parent.frame() n <- length(phy$tip.label) nb.node <- phy$Nnode if (!is.null(phy$node.label)) phy$node.label <- NULL bt <- sort(branching.times(phy)) # branching times (from present to past) bt <- rev(bt) # branching times from past to present ni <- cumsum(rev(table(bt))) + 1 X <- model.matrix(formula, data) Xi <- X[phy$edge[, 1], , drop = FALSE] Xj <- X[phy$edge[, 2], , drop = FALSE] dev <- function(b) { 2 * sum(((1/(1 + exp(-(Xi %*% b)))) + (1/(1 + exp(-(Xj %*% b))))) * phy$edge.length/2) - 2 * (sum(log(ni[-length(ni)])) + sum(log((1/(1 + exp(-(X[-(1:(n + 1)), , drop = FALSE] %*% b))))))) } out <- nlm(function(p) dev(p), p = c(rep(0, ncol(X) - 1), -1), hessian = TRUE) Dev <- out$minimum para <- matrix(NA, ncol(X), 2) para[, 1] <- out$estimate if (any(out$gradient == 0)) warning("The likelihood gradient seems flat in at least one dimension (null gradient):\ncannot compute the standard-errors of the parameters.\n") else para[, 2] <- sqrt(diag(solve(out$hessian))) rownames(para) <- colnames(X) colnames(para) <- c("Estimate", "StdErr") ## fit the intercept-only model: X <- model.matrix(~ 1, data = data.frame(X)) Xi <- X[phy$edge[, 1], , drop = FALSE] Xj <- X[phy$edge[, 2], , drop = FALSE] Dev.null <- nlm(function(p) dev(p), p = -1)$minimum cat("\n---- Yule Model with Covariates ----\n\n") cat(" Phylogenetic tree:", deparse(substitute(phy)), "\n") cat(" Number of tips:", n, "\n") cat(" Number of nodes:", nb.node, "\n") cat(" Deviance:", Dev, "\n") cat(" Log-likelihood:", -Dev/2, "\n\n") cat(" Parameter estimates:\n") print(para) cat("\n") cat("Null Deviance:", Dev.null, "\n") cat(" Test of the fitted model: ") chi <- Dev.null - Dev df <- nrow(para) - 1 cat("chi^2 =", round(chi, 3), " df =", df, " P =", round(1 - pchisq(chi, df), 3), "\n") } ape/R/balance.R0000644000176200001440000000155414231460343012735 0ustar liggesusers## balance.R (2022-04-25) ## Balance of a Dichotomous Phylogenetic Tree ## Copyright 2002-2015 Emmanuel Paradis, 2022 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. balance <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') phy <- reorder(phy, "postorder") N <- length(phy$tip.label) nb.node <- phy$Nnode if (nb.node != N - 1) stop('"phy" is not rooted and fully dichotomous') ans <- matrix(NA, nb.node, 2) nd <- node.depth(phy) i <- 1L while (i < nrow(phy$edge)) { node <- phy$edge[i, 1] - N ans[node, 1] <- nd[phy$edge[i,2]] ans[node, 2] <- nd[phy$edge[i+1,2]] i <- i + 2L } rownames(ans) <- if (is.null(phy$node.label)) N + 1:nb.node else phy$node.label ans } ape/R/write.tree.R0000644000176200001440000000772214714422345013451 0ustar liggesusers## write.tree.R (2024-11-11) ## Write Tree File in Parenthetic Format ## Copyright 2002-2024 Emmanuel Paradis, Daniel Lawson, and Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. checkLabel <- function(x) { ## delete all leading and trailing spaces and tabs, and ## the leading left and trailing right parentheses: ## (the syntax will work with any mix of these characters, ## e.g., " ( ( (( " will correctly be deleted) x <- gsub("^[[:space:]\\(]+", "", x) x <- gsub("[[:space:]\\)]+$", "", x) ## replace all spaces and tabs by underscores: x <- gsub("[[:space:]]", "_", x) ## replace commas, colons, and semicolons with dashes: x <- gsub("[,:;]", "-", x) ## replace left and right parentheses with dashes: x <- gsub("[\\(\\)]", "-", x) x } write.tree <- function(phy, file = "", append = FALSE, digits = 10, tree.names = FALSE) { if (!(inherits(phy, c("phylo", "multiPhylo"))) && !all(vapply(phy, inherits, logical(1), 'phylo'))) stop("object \"phy\" must contain trees") if (inherits(phy, "phylo")) phy <- c(phy) N <- length(phy) res <- character(N) if (is.logical(tree.names)) { if (tree.names) { tree.names <- if (is.null(names(phy))) character(N) else names(phy) } else tree.names <- character(N) } ## added by KS (2019-03-01): check_tips <- TRUE if (inherits(phy, "multiPhylo")) { if (!is.null(attr(phy, "TipLabel"))) { attr(phy, "TipLabel") <- checkLabel(attr(phy, "TipLabel")) check_tips <- FALSE } } ## added by EP (2019-01-23): phy <- .uncompressTipLabel(phy) class(phy) <- NULL for (i in 1:N) res[i] <- .write.tree2(phy[[i]], digits = digits, tree.prefix = tree.names[i], check_tips) if (file == "") return(res) else cat(res, file = file, append = append, sep = "\n") } .write.tree2 <- function(phy, digits = 10, tree.prefix = "", check_tips) { brl <- (!is.null(phy$edge.length) && digits > 0) nodelab <- !is.null(phy$node.label) if (check_tips) phy$tip.label <- checkLabel(phy$tip.label) if (nodelab) { ## fix by Martin Smith (2024-04-07) if (length(phy[["node.label"]]) != phy[["Nnode"]]) { warning("Length of node.label does not match number of nodes.") } phy$node.label <- checkLabel(phy$node.label) } f.d <- paste0(":%.", digits, "g") n <- length(phy$tip.label) ## terminal branches: terms <- match(seq_len(n), phy$edge[, 2]) TERMS <- phy$tip.label if (brl) TERMS <- paste0(TERMS, sprintf(f.d, phy$edge.length[terms])) ## internal branches, including root edge: INTS <- rep(")", phy$Nnode) if (nodelab) INTS <- paste0(INTS, phy$node.label) if (brl) { tmp <- phy$edge.length[-terms][order(phy$edge[-terms, 2])] tmp <- c("", sprintf(f.d, tmp)) if (!is.null(phy$root.edge)) tmp[1L] <- sprintf(f.d, phy$root.edge) INTS <- paste0(INTS, tmp) } ## find the root node: tmp.nodes <- unique.default(phy$edge[, 1L]) tmp.m <- match(tmp.nodes, phy$edge[, 2L]) root <- tmp.nodes[is.na(tmp.m)] if (length(root) > 1) stop("seems there is more than one root node") storage.mode(root) <- "integer" o <- reorderRcpp(phy$edge, n, root, 2L) ANC <- phy$edge[o, 1L] DESC <- phy$edge[o, 2L] NEWICK <- character(n + phy$Nnode) NEWICK[1:n] <- TERMS from <- to <- 1L repeat { thenode <- ANC[from] if (thenode == root) { to <- length(ANC) } else { while (ANC[to + 1L] == thenode) to <- to + 1L } tmp <- paste(NEWICK[DESC[from:to]], collapse = ",") tmp <- paste0("(", tmp, INTS[thenode - n]) NEWICK[thenode] <- tmp if (thenode == root) break from <- to + 1L } paste0(tree.prefix, NEWICK[root], ";") } ape/R/gammaStat.R0000644000176200001440000000115012465112403013254 0ustar liggesusers## gammaStat.R (2009-05-10) ## Gamma-Statistic of Pybus and Harvey ## Copyright 2002-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. gammaStat <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') N <- length(phy$tip.label) bt <- sort(branching.times(phy)) g <- rev(c(bt[1], diff(bt))) # internode intervals are from past to present ST <- sum((2:N) * g) stat <- sum(cumsum((2:(N - 1)) * g[-(N - 1)]))/(N - 2) m <- ST/2 s <- ST * sqrt(1/(12 * (N - 2))) (stat - m)/s } ape/R/collapsed.intervals.R0000644000176200001440000000243614356737443015344 0ustar liggesusers## collapsed.intervals.R (2002-09-12) ## Collapsed coalescent intervals (e.g. for the skyline plot) ## Copyright 2002 Korbinian Strimmer ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. # construct collapsed intervals from coalescent intervals collapsed.intervals <- function(ci, epsilon=0.0) { if (!inherits(ci, "coalescentIntervals")) stop("object \"ci\" is not of class \"coalescentIntervals\"") sz <- ci$interval.length lsz <- length(sz) idx <- c <- 1:lsz p <- 1 w <- 0 # starting from tips collapes intervals # until total size is >= epsilon for (i in 1:lsz) { idx[[i]] <- p w <- w + sz[[i]] if (w >= epsilon) { p <- p+1 w <- 0 } } # if last interval is smaller than epsilon merge # with second last interval lastInterval <- idx==p if ( sum(sz[lastInterval]) < epsilon ) { p <- p-1 idx[lastInterval] <- p } obj <- list( lineages=ci$lineages, interval.length=ci$interval.length, collapsed.interval=idx, # collapsed intervals (via reference) interval.count=ci$interval.count, collapsed.interval.count = idx[[ci$interval.count]], total.depth =ci$total.depth, epsilon = epsilon ) class(obj) <- "collapsedIntervals" return(obj) } ape/R/zoom.R0000644000176200001440000000230212465112403012322 0ustar liggesusers## zoom.R (2009-07-27) ## Zoom on a Portion of a Phylogeny ## Copyright 2003-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. zoom <- function(phy, focus, subtree = FALSE, col = rainbow, ...) { if (!is.list(focus)) focus <- list(focus) n <- length(focus) for (i in 1:n) if (is.character(focus[[i]])) focus[[i]] <- which(phy$tip.label %in% focus[[i]]) # fix by Yan Wong if (is.function(col)) { col <- if (deparse(substitute(col)) == "grey") grey(1:n/n) else col(n) } ext <- vector("list", n) for (i in 1:n) ext[[i]] <- drop.tip(phy, phy$tip.label[-focus[[i]]], subtree = subtree, rooted = TRUE) nc <- round(sqrt(n)) + 1 nr <- ceiling(sqrt(n)) M <- matrix(0, nr, nc) x <- c(rep(1, nr), 2:(n + 1)) M[1:length(x)] <- x layout(M, c(1, rep(3/(nc - 1), nc - 1))) phy$tip.label <- rep("", length(phy$tip.label)) colo <- rep("black", dim(phy$edge)[1]) for (i in 1:n) colo[which.edge(phy, focus[[i]])] <- col[i] plot.phylo(phy, edge.color = colo, ...) for (i in 1:n) plot.phylo(ext[[i]], edge.color = col[i], ...) } ape/R/vcv2phylo.R0000644000176200001440000001205312465112403013276 0ustar liggesusers## vcv2phylo.R (2014-11-27) ## Variance-Covariance Matrix to Tree ## Copyright 2014 Simon Blomberg ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. vcv2phylo <- function (mat, tolerance = 1e-7) { ######################################################### ## Program to reconstruct a phylogenetic tree ## ## from a phylogenetic variance-covariance matrix. ## ## Input: mat (is tested for positive-definiteness) ## ## Output: phylo (in phylo format as in package "ape") ## ## If numerical issues occur, adjust the tolerance. ## ## Author: S. P. Blomberg ## ## Date: 12th November 2010 ## ######################################################### make.node <- function (left, right, value, lbrlen, rbrlen) { # function to make a node, using lists the.node <- list(left=left, right=right, value=value, lbrlen=lbrlen, rbrlen=rbrlen) class(the.node) <- c("node", "list") return(the.node) } divide.matrix <- function (mat) { # function to decompose a block-diagonal matrix into # upper and lower blocks dims <- dim(mat)[1] end.of.block <- which(mat[1,] < tolerance)[1]-1 if (is.na(end.of.block)) stop("Matrix is not block-diagonal") matlist <- list(upper=mat[1:end.of.block, 1:end.of.block], lower=mat[(end.of.block+1):dims,(end.of.block+1):dims]) if (length(matlist$upper)==1) names(matlist$upper) <- rownames(mat)[1] if (length(matlist$lower)==1) names(matlist$lower) <- rownames(mat)[dims] return(matlist) } make.tree.rec <- function (mat) { # Recursive function to create a tree made of nodes # from a phylogenetic matrix matlist <- divide.matrix(mat) if (is.vector(matlist$upper) && is.vector(matlist$lower)) { left <- as.numeric(names(matlist$upper)) right <- as.numeric(names(matlist$lower)) value <- i lbrlen <- matlist$upper rbrlen <- matlist$lower } if (is.vector(matlist$upper) && is.matrix(matlist$lower)) { min.lower <- min(matlist$lower) left <- as.numeric(names(matlist$upper)) value <- i i <<- i+1 right <- Recall(matlist$lower-min.lower) lbrlen <- matlist$upper rbrlen <- min.lower } if (is.matrix(matlist$upper) && is.vector(matlist$lower)) { min.upper <- min(matlist$upper) value <- i i <<- i+1 left <- Recall(matlist$upper-min.upper) right <- as.numeric(names(matlist$lower)) lbrlen <- min.upper rbrlen <- matlist$lower } if (is.matrix(matlist$upper) && is.matrix(matlist$lower)) { min.upper <- min(matlist$upper) min.lower <- min(matlist$lower) value <- i i <<- i+1 left <- Recall(matlist$upper-min.upper) i <<- i+1 right <- Recall(matlist$lower-min.lower) lbrlen <- min.upper rbrlen <- min.lower } return(make.node(left, right, value, lbrlen, rbrlen)) } make.phylo.rec <- function (the.list) { # Recursive function to construct the edge matrix and collect the # branch length information from the tree brlens <<- c(brlens, the.list$lbrlen, the.list$rbrlen) if (is.numeric(the.list$left) && is.numeric(the.list$right)) { the.matrix <<- rbind(the.matrix, c(the.list$value, the.list$left), c(the.list$value, the.list$right)) } if (is.numeric(the.list$left) && inherits(the.list$right, "node")) { the.matrix <<- rbind(the.matrix, c(the.list$value, the.list$left), c(the.list$value, the.list$right$value)) Recall(the.list$right) } if (inherits(the.list$left, "node") && is.numeric(the.list$right)) { the.matrix <<- rbind(the.matrix, c(the.list$value, the.list$left$value), c(the.list$value, the.list$right)) Recall(the.list$left) } if (inherits(the.list$left, "node") && inherits(the.list$right, "node")) { the.matrix <<- rbind(the.matrix, c(the.list$value, the.list$left$value), c(the.list$value, the.list$right$value)) Recall(the.list$left) Recall(the.list$right) } } # main body #require(matrixcalc) #if (!is.positive.definite(mat)) stop("Matrix is not positive-definite") if (!isSymmetric(mat)) stop("Matrix is not symmetric") if (any(eigen(mat, only.values = TRUE)$values < -tolerance)) stop("Matrix is not positive-definite") sp.names <- rownames(mat) dims <- dim(mat)[1] rownames(mat) <- colnames(mat) <- 1:dims i <- dims+1 the.list <- make.tree.rec(mat) # side effect: calculate i the.matrix <- matrix(NA, 0, ncol=2) # initialise the edge matrix brlens <- vector(mode="numeric", length=0) #initialise branch length vector make.phylo.rec(the.list) # side effects: calculate the.matrix and brlens names(brlens) <- NULL phylo <- list(edge=the.matrix, tip.label=sp.names, edge.length=brlens, Nnode=i-dims) storage.mode(phylo$edge) <- "integer" storage.mode(phylo$Nnode) <- "integer" class(phylo) <- "phylo" return(phylo) } ape/R/MPR.R0000644000176200001440000000404013002744256012002 0ustar liggesusers## MPR.R (2010-08-10) ## Most Parsimonious Reconstruction ## Copyright 2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. MPR <- function(x, phy, outgroup) { if (is.rooted(phy)) stop("the tree must be unrooted") if (!is.binary.phylo(phy)) stop("the tree must be fully dichotomous") if (length(outgroup) > 1L) stop("outgroup must be a single tip") if (is.character(outgroup)) outgroup <- which(phy$tip.label == outgroup) if (!is.null(names(x))) { if (all(names(x) %in% phy$tip.label)) x <- x[phy$tip.label] else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.") } n <- length(phy$tip.label) if (is.null(phy$node.label)) phy$node.label <- n + 1:(phy$Nnode) phy <- drop.tip(root(phy, outgroup), outgroup) n <- n - 1L m <- phy$Nnode phy <- reorder(phy, "postorder") root.state <- x[outgroup] I <- as.integer(x[-outgroup]) I[n + 1:m] <- NA I <- cbind(I, I) # interval map med <- function(x) { i <- length(x)/2 sort(x)[c(i, i + 1L)] } ## 1st pass s <- seq(from = 1, by = 2, length.out = m) anc <- phy$edge[s, 1] des <- matrix(phy$edge[, 2], ncol = 2, byrow = TRUE) for (i in 1:m) I[anc[i], ] <- med(I[des[i, ], ]) ## 2nd pass out <- matrix(NA, m, 2) colnames(out) <- c("lower", "upper") ## do the most basal node before looping Iw <- as.vector(I[des[m, ], ]) # interval maps of the descendants out[anc[m] - n, ] <- range(med(c(root.state, root.state, Iw))) for (i in (m - 1):1) { j <- anc[i] Iw <- as.vector(I[des[i, ], ]) # interval maps of the descendants k <- which(phy$edge[, 2] == j) # find the ancestor tmp <- out[phy$edge[k, 1] - n, ] out[j - n, 1] <- min(med(c(tmp[1], tmp[1], Iw))) out[j - n, 2] <- max(med(c(tmp[2], tmp[2], Iw))) } rownames(out) <- phy$node.label out } ape/R/read.gff.R0000644000176200001440000000154713300740442013023 0ustar liggesusers## read.gff.R (2018-05-22) ## Read GFF Files ## Copyright 2016-2018 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. read.gff <- function(file, na.strings = c(".", "?"), GFF3 = TRUE) { w <- list("", "", "", 0L, 0L, 0, "", "", "") x <- scan(file, w, sep = "\t", quote = "", quiet = TRUE, na.strings = na.strings, comment.char = "#") for (i in c(1, 2, 3, 7, 8)) x[[i]] <- factor(x[[i]]) names(x) <- c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes") if (!GFF3) { names(x) <- c("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attributes") } n <- length(x[[1]]) attr(x, "row.names") <- as.character(seq_len(n)) class(x) <- "data.frame" x } ape/R/all.equal.phylo.R0000644000176200001440000000644512465112403014362 0ustar liggesusers## all.equal.phylo.R (2009-07-05) ## ## Global Comparison of two Phylogenies ## Copyright 2006 Benoit Durand ## modified by EP for the new coding of "phylo" (2006-10-04) ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ## Recherche de la correspondance entre deux arbres ## Parcours en profondeur et en parallele des deux arbres (current et target) ## current, target: les deux arbres a comparer ## use.edge.length: faut-il comparer les longueurs de branches ? ## use.tip.label: faut-il comparer les etiquettes de feuilles ou seulement la ## topologie des deux arbres ? ## index.return: si TRUE, retourner la matrice de correspondance entre noeuds ## et feuilles, une matrice a deux colonnes (current et target) avec pour ## chaque ligne des paires d'identifiants de noeuds/feuilles, tels qu'ils ## apparaissent dans l'attribut 'edge' des objets phylo ## tolerance, scale: parametres de comparaison des longueurs de branches ## (voir 'all.equal') all.equal.phylo <- function(target, current, use.edge.length = TRUE, use.tip.label = TRUE, index.return = FALSE, tolerance = .Machine$double.eps ^ 0.5, scale = NULL, ...) { same.node <- function(i, j) { # Comparaison de un noeud et une feuille if (xor(i > Ntip1, j > Ntip2)) return(NULL) # Comparaison de deux feuilles if (i <= Ntip1) { if (!use.tip.label) return(c(i, j)) if (current$tip.label[i] == target$tip.label[j]) return(c(i, j)) return(NULL) } # Comparaison de deux noeuds i.children <- which(current$edge[, 1] == i) j.children <- which(target$edge[, 1] == j) if (length(i.children) != length(j.children)) return(NULL) correspondance <- NULL for (i.child in i.children) { corresp <- NULL for (j.child in j.children) { if (!use.edge.length || isTRUE(all.equal(current$edge.length[i.child], target$edge.length[j.child], tolerance = tolerance, scale = scale))) corresp <- same.node(current$edge[i.child, 2], target$edge[j.child, 2]) if (!is.null(corresp)) break } if (is.null(corresp)) return(NULL) correspondance <- c(correspondance, i, j, corresp) j.children <- j.children[j.children != j.child] } return(correspondance) } Ntip1 <- length(target$tip.label) Ntip2 <- length(current$tip.label) root1 <- Ntip1 + 1 root2 <- Ntip2 + 1 if (root1 != root2) return(FALSE) ## Fix by EP so that unrooted trees are correctly compared: if (!is.rooted(target) && !is.rooted(current)) { outg <- target$tip.label[1] if (! outg %in% current$tip.label) return(FALSE) target <- root(target, outg) current <- root(current, outg) } ## End result <- same.node(root1, root2) if (!isTRUE(index.return)) return(!is.null(result)) if (is.null(result)) return(result) result <- t(matrix(result, nrow = 2)) colnames(result) = c('current', 'target') return(result) } ape/R/read.tree.R0000644000176200001440000001116714601173237013226 0ustar liggesusers## read.tree.R (2024-03-28) ## Read Tree Files in Parenthetic Format ## Copyright 2002-2024 Emmanuel Paradis, Daniel Lawson and Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. read.tree <- function(file = "", text = NULL, tree.names = NULL, skip = 0, comment.char = "", keep.multi = FALSE, ...) { if (!is.null(text)) { if (!is.character(text)) stop("argument 'text' must be of mode character") tree <- text } else { tree <- scan(file = file, what = "", sep = "\n", quiet = TRUE, skip = skip, comment.char = comment.char, ...) } ## Suggestion from Eric Durand and Nicolas Bortolussi (added 2005-08-17): if (identical(tree, character(0))) { warning("empty character string.") return(NULL) } ## make a single string if (length(tree) > 1) tree <- paste(tree, collapse = "") single_quotes <- function(x, z) { x <- charToRaw(x) z <- which(x == as.raw(39)) if (length(z) %% 2) stop("wrong number of single quotes around labels") l <- length(z) / 2 opening <- z[c(TRUE, FALSE)] closing <- z[c(FALSE, TRUE)] from <- c(1, closing + 1L) to <- c(opening - 1L, length(x)) i <- mapply(":", from = from, to = to, SIMPLIFY = FALSE, USE.NAMES = FALSE) keep <- lapply(i, function(i) x[i]) tmp_label <- paste0("IMPROBABLEPREFIX", 1:l, "IMPROBABLESUFFIX") tmpLabsRaw <- lapply(tmp_label, charToRaw) n <- 2 * l + 1L res <- vector("list", n) res[seq(1, n, 2)] <- keep res[seq(2, n - 1, 2)] <- tmpLabsRaw tree <<- rawToChar(unlist(res)) i <- mapply(":", from = opening, to = closing, SIMPLIFY = FALSE, USE.NAMES = FALSE) orig_label <- lapply(i, function(i) x[i]) sapply(orig_label, rawToChar) } ## replace labels with single quotes (if needed) SINGLE.QUOTES.FOUND <- grepl("'", tree) if (SINGLE.QUOTES.FOUND) tmp_label <- single_quotes(tree) y <- unlist(gregexpr(";", tree)) if (all(y == -1)) { warning("no semicolon(s) [end(s) of tree] found") return(NULL) } ## if one tree per line much faster if (identical(y, nchar(tree))) { # check if always one tree per line Ntree <- length(y) STRING <- character(Ntree) for (i in 1:Ntree) { STRING[i] <- gsub("\\[[^]]*\\]", "", tree[i]) # delete comments (fix 2015-01-12) } } else { tree <- unlist(strsplit(tree, NULL)) y <- which(tree == ";") Ntree <- length(y) x <- c(1, y[-Ntree] + 1) ## Suggestion from Olivier Francois (added 2006-07-15): if (is.na(y[1])) return(NULL) STRING <- character(Ntree) for (i in 1:Ntree) { tmp <- paste0(tree[x[i]:y[i]], collapse = "") STRING[i] <- gsub("\\[[^]]*\\]", "", tmp) # delete comments (fix 2015-01-12) } } ## remove possible leading and trailing underscores STRING <- gsub("^_+|_+$", "", STRING) STRING <- gsub("[ \t]", "", STRING) # spaces and TABs within quoted labels are not deleted getTreeName <- function(x) { res <- rep("", length(x)) i <- regexpr("\\(", x) s <- i > 1 if (any(s)) res[s] <- substr(x[s], 1, i[s] - 1) res } tmpnames <- getTreeName(STRING) if (is.null(tree.names) && any(nzchar(tmpnames))) tree.names <- tmpnames colon <- grep(":", STRING) if (!length(colon)) { obj <- lapply(STRING, .cladoBuild) } else if (length(colon) == Ntree) { obj <- lapply(STRING, .treeBuild) } else { obj <- vector("list", Ntree) obj[colon] <- lapply(STRING[colon], .treeBuild) nocolon <- (1:Ntree)[!1:Ntree %in% colon] obj[nocolon] <- lapply(STRING[nocolon], .cladoBuild) } if (SINGLE.QUOTES.FOUND) { FOO <- function(x) { i <- gsub("^IMPROBABLEPREFIX|IMPROBABLESUFFIX$", "", x) tmp_label[as.integer(i)] } for (i in 1:Ntree) { lab <- obj[[i]]$tip.label k <- grep("IMPROBABLEPREFIX", lab) if (length(k)) { lab[k] <- FOO(lab[k]) obj[[i]]$tip.label <- lab } lab <- obj[[i]]$node.label k <- grep("IMPROBABLEPREFIX", lab) if (length(k)) { lab[k] <- FOO(lab[k]) obj[[i]]$node.label <- lab } } } if (Ntree == 1 && !keep.multi) obj <- obj[[1]] else { if (!is.null(tree.names)) names(obj) <- tree.names class(obj) <- "multiPhylo" } obj } ape/R/scales.R0000644000176200001440000001254214714424055012627 0ustar liggesusers## scales.R (2024-11-11) ## Add a Scale Bar or Axis to a Phylogeny Plot ## add.scale.bar: add a scale bar to a phylogeny plot ## axisPhylo: add a scale axis on the side of a phylogeny plot ## Copyright 2002-2024 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. add.scale.bar <- function(x, y, length = NULL, ask = FALSE, lwd = 1, lcol = "black", ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) direc <- lastPP$direction if (is.null(length)) { nb.digit <- if (direc %in% c("rightwards", "leftwards")) diff(range(lastPP$xx)) else diff(range(lastPP$yy)) length <- pretty(c(0, nb.digit) / 6, 1)[2] # by Klaus } if (ask) { cat("\nClick where you want to draw the bar\n") x <- unlist(locator(1)) y <- x[2] x <- x[1] } else if (missing(x) || missing(y)) { if (lastPP$type %in% c("phylogram", "cladogram")) { switch(direc, "rightwards" = { x <- 0 y <- 1 }, "leftwards" = { x <- max(lastPP$xx) y <- 1 }, "upwards" = { x <- max(lastPP$xx) y <- 0 }, "downwards" = { x <- 1 y <- max(lastPP$yy) }) } else { direc <- "rightwards" # just to be sure for below x <- lastPP$x.lim[1] y <- lastPP$y.lim[1] } } switch(direc, "rightwards" = { segments(x, y, x + length, y, col = lcol, lwd = lwd) text(x + length * 1.1, y, as.character(length), adj = c(0, 0.5), ...) }, "leftwards" = { segments(x - length, y, x, y, col = lcol, lwd = lwd) text(x - length * 1.1, y, as.character(length), adj = c(1, 0.5), ...) }, "upwards" = { segments(x, y, x, y + length, col = lcol, lwd = lwd) text(x, y + length * 1.1, as.character(length), adj = c(0, 0.5), srt = 90, ...) }, "downwards" = { segments(x, y - length, x, y, col = lcol, lwd = lwd) text(x, y - length * 1.1, as.character(length), adj = c(0, 0.5), srt = 270, ...) }) } axisPhylo <- function(side = NULL, root.time = NULL, backward = TRUE, ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) type <- lastPP$type if (type == "unrooted") stop("axisPhylo() not available for unrooted plots; try add.scale.bar()") if (type == "radial") stop("axisPhylo() not meaningful for this type of plot") if (is.null(root.time)) root.time <- lastPP$root.time if (type %in% c("phylogram", "cladogram")) { if(is.null(side)){ if (lastPP$direction %in% c("rightwards", "leftwards")) side <- 1 else side <- 2 } if (lastPP$direction %in% c("rightwards", "leftwards")){ tmp_range <- range(lastPP$xx) tmp_lim <- lastPP$x.lim } else { tmp_range <- range(lastPP$yy) tmp_lim <- lastPP$y.lim } if (lastPP$direction == "rightwards"){ xscale <- c(min(tmp_lim[1],tmp_range[1]), range(lastPP$xx)[2]) } if (lastPP$direction == "leftwards"){ xscale <- c(tmp_range[1], max(tmp_range[2], tmp_lim[2])) } if (lastPP$direction == "downwards"){ xscale <- c(tmp_range[1], max(tmp_range[2], tmp_lim[2])) } if (lastPP$direction == "upwards"){ xscale <- c(min(tmp_range[1], tmp_lim[1]), tmp_range[2]) } tscale <- xscale tmp <- lastPP$direction %in% c("leftwards", "downwards") if (xor(backward, tmp)) tscale <- tmp_range[2] - tscale if (!is.null(root.time)) { if(! backward) tscale <- tscale + root.time else tscale <- tscale + root.time - diff(tmp_range) } ## the linear transformation between the x-scale and the time-scale: beta <- diff(xscale) / diff(tscale) alpha <- xscale[1] - beta * tscale[1] lab <- pretty(tscale) x <- beta * lab + alpha axis(side = side, at = x, labels = lab, ...) } else { # type == "fan" n <- lastPP$Ntip xx <- lastPP$xx[1:n]; yy <- lastPP$yy[1:n] r0 <- max(sqrt(xx^2 + yy^2)) ## find the widest angle between tips: alpha <- sort(setNames(rect2polar(xx, yy)$angle, 1:n)) # from -pi to +pi angles <- c(diff(alpha), 2*pi - alpha[n] + alpha[1L]) j <- which.max(angles) i <- if (j == 1L) n else j - 1L # this is a circle... firstandlast <- as.integer(names(angles[c(i, j)])) # c(1, n) theta0 <- mean(atan2(yy[firstandlast], xx[firstandlast])) x0 <- r0 * cos(theta0); y0 <- r0 * sin(theta0) inc <- diff(pretty(c(0, r0))[1:2]) srt <- 360*theta0/(2*pi) coef <- -1 if (abs(srt) > 90) { srt <- srt + 180 coef <- 1 } len <- 0.025 * r0 # the length of tick marks r <- r0 while (r > 1e-8) { x <- r * cos(theta0); y <- r * sin(theta0) if (len/r < 1) { ra <- sqrt(len^2 + r^2); thetaa <- theta0 + coef * asin(len/r) xa <- ra * cos(thetaa); ya <- ra * sin(thetaa) segments(xa, ya, x, y) text(xa, ya, r0 - r, srt = srt, adj = c(0.5, 1.1), ...) } r <- r - inc } segments(x, y, x0, y0) } } ape/R/mst.R0000644000176200001440000000512013677265011012154 0ustar liggesusers## mst.R (2020-07-02) ## Minimum Spanning Tree ## Copyright 2002-2006 Yvonnick Noel, Julien Claude, and Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. mst <- function(X) { if (inherits(X, "dist")) X <- as.matrix(X) n <- dim(X)[1] N <- matrix(0, n, n) tree <- NULL large.value <- max(X) + 1 diag(X) <- large.value index.i <- 1 for (i in 1:(n - 1)) { tree <- c(tree, index.i) m <- apply(as.matrix(X[, tree]), 2, min) #calcul les minimum par colonne a <- sortIndex(X[, tree])[1, ] b <- sortIndex(m)[1] index.j <- tree[b] index.i <- a[b] N[index.i, index.j] <- 1 N[index.j, index.i] <- 1 for (j in tree) { X[index.i, j] <- large.value X[j, index.i] <- large.value } } dimnames(N) <- dimnames(X) class(N) <- "mst" return(N) } ### Function returning an index matrix for an increasing sort sortIndex <- function(X) { if(length(X) == 1) return(1) # sorting a scalar? if(!is.matrix(X)) X <- as.matrix(X) # force vector into matrix ## n <- nrow(X) apply(X, 2, function(v) order(rank(v))) # find the permutation } plot.mst <- function(x, graph = "circle", x1 = NULL, x2 = NULL, ...) { n <- nrow(x) if (is.null(x1) || is.null(x2)) { if (graph == "circle") { ang <- seq(0, 2 * pi, length = n + 1) x1 <- cos(ang) x2 <- sin(ang) plot(x1, x2, type = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", ...) } if (graph == "nsca") { XY <- nsca(x) x1 <- XY[, 1] x2 <- XY[, 2] plot(XY, type = "n", xlab = "\"nsca\" -- axis 1", ylab = "\"nsca\" -- axis 2", ...) } } else plot(x1, x2, type = "n", xlab = deparse(substitute(x1)), ylab = deparse(substitute(x2)), ...) for (i in 1:n) { w1 <- which(x[i, ] == 1) segments(x1[i], x2[i], x1[w1], x2[w1]) } points(x1, x2, pch = 21, col = "black", bg = "white", cex = 3) text(x1, x2, 1:n, cex = 0.8) } nsca <- function(A) { Dr <- apply(A, 1, sum) Dc <- apply(A, 2, sum) eig.res <- eigen(diag(1 / sqrt(Dr)) %*% A %*% diag(1 / sqrt(Dc))) r <- diag(1 / Dr) %*% (eig.res$vectors)[, 2:4] ## The next line has been changed by EP (20-02-2003), since ## it does not work if 'r' has no dimnames already defined ## dimnames(r)[[1]] <- dimnames(A)[[1]] rownames(r) <- rownames(A) r } ape/R/drop.tip.R0000644000176200001440000002327514533611457013124 0ustar liggesusers## drop.tip.R (2023-01-09) ## Remove Tips in a Phylogenetic Tree ## Copyright 2003-2023 Emmanuel Paradis, 2017-2023 Klaus Schliep, 2018 Joseph Brown ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. keep.tip <- function(phy, tip, ...) UseMethod("keep.tip") keep.tip.phylo <- function(phy, tip, ...) { Ntip <- length(phy$tip.label) ## convert to indices if strings passed in if (is.character(tip)) { idx <- match(tip, phy$tip.label) ## stop on bad tip names ## alternative is to warn but proceed. not sure what stance is if (anyNA(idx)) { um <- c("unmatched tip labels:\n", paste(tip[is.na(idx)], collapse = " ")) stop(um) } tip <- idx } else { # check that passed in indices are all valid out.of.range <- tip > Ntip if (any(out.of.range)) { warning("some tip numbers were larger than the number of tips: they were ignored") tip <- tip[!out.of.range] } } ## get complement tip indices to drop toDrop <- setdiff(1:Ntip, tip) drop.tip(phy, toDrop, ...) } extract.clade <- function(phy, node, root.edge = 0, collapse.singles = TRUE, interactive = FALSE) { n <- length(phy$tip.label) if (interactive) { cat("Click close to the node...\n") node <- identify(phy)$nodes } else { if (length(node) > 1) { node <- node[1] warning("only the first value of 'node' has been considered") } if (is.character(node)) { if (is.null(phy$node.label)) stop("the tree has no node labels") node <- match(node, phy$node.label) + n if (is.na(node)) stop("'node' not among the node labels.") } if (node <= n) stop("node number must be greater than the number of tips") } if (node == n + 1L) return(phy) keep <- prop.part(phy)[[node - n]] drop.tip(phy, (1:n)[-keep], root.edge = root.edge, rooted = TRUE, collapse.singles = collapse.singles) } drop.tip <- function(phy, tip, ...) UseMethod("drop.tip") drop.tip.phylo <- function(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0, rooted = is.rooted(phy), collapse.singles = TRUE, interactive = FALSE, ...) { Ntip <- length(phy$tip.label) ## find the tips to drop: if (interactive) { cat("Left-click close to the tips you want to drop; right-click when finished...\n") xy <- locator() nToDrop <- length(xy$x) tip <- integer(nToDrop) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) for (i in 1:nToDrop) { d <- sqrt((xy$x[i] - lastPP$xx)^2 + (xy$y[i] - lastPP$yy)^2) tip[i] <- which.min(d) } } else { if (is.character(tip)) tip <- which(phy$tip.label %in% tip) } out.of.range <- tip > Ntip if (any(out.of.range)) { warning("some tip numbers were larger than the number of tips: they were ignored") tip <- tip[!out.of.range] } if (!length(tip)) return(phy) if (length(tip) == Ntip) { if (Nnode(phy) < 3 || trim.internal) { # by Klaus (2018-06-21) warning("drop all tips of the tree: returning NULL") return(NULL) } } wbl <- !is.null(phy$edge.length) if (length(tip) == Ntip - 1 && trim.internal) { i <- which(phy$edge[, 2] == (1:Ntip)[-tip]) res <- list(edge = matrix(2:1, 1, 2), tip.label = phy$tip.label[phy$edge[i, 2]], Nnode = 1L) class(res) <- "phylo" if (wbl) res$edge.length <- phy$edge.length[i] if (!is.null(phy$node.label)) res$node.label <- phy$node.label[phy$edge[i, 1] - Ntip] return(res) } if (!rooted) { phy$root.edge <- NULL # moved from below (2021-09-29) if (subtree) { phy <- root(phy, (1:Ntip)[-tip][1]) root.edge <- 0 } } phy <- reorder(phy) NEWROOT <- ROOT <- Ntip + 1 Nnode <- phy$Nnode Nedge <- dim(phy$edge)[1] if (subtree) { trim.internal <- TRUE tr <- reorder(phy, "postorder") N <- .C(node_depth, as.integer(Ntip), as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), 1L)[[5]] } edge1 <- phy$edge[, 1] # local copies edge2 <- phy$edge[, 2] # keep <- !logical(Nedge) ## delete the terminal edges given by `tip': keep[match(tip, edge2)] <- FALSE if (trim.internal) { ints <- edge2 > Ntip ## delete the internal edges that do not have anymore ## descendants (ie, they are in the 2nd col of `edge' but ## not in the 1st one) repeat { sel <- !(edge2 %in% edge1[keep]) & ints & keep if (!sum(sel)) break keep[sel] <- FALSE } if (subtree) { ## keep the subtending edge(s): subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep] keep[subt] <- TRUE } if (root.edge && wbl) { degree <- tabulate(edge1[keep]) if (degree[ROOT] == 1) { j <- integer(0) # will store the indices of the edges below the new root repeat { i <- which(edge1 == NEWROOT & keep) j <- c(i, j) NEWROOT <- edge2[i] ## degree <- tabulate(edge1[keep]) # utile ? if (degree[NEWROOT] > 1) break } keep[j] <- FALSE ## if (length(j) > root.edge) j <- 1:root.edge j <- j[1:root.edge] NewRootEdge <- sum(phy$edge.length[j]) if (length(j) < root.edge && !is.null(phy$root.edge)) NewRootEdge <- NewRootEdge + phy$root.edge phy$root.edge <- NewRootEdge } } } ##if (!root.edge) phy$root.edge <- NULL # moved above (2021-09-29) ## drop the edges phy$edge <- phy$edge[keep, ] if (wbl) phy$edge.length <- phy$edge.length[keep] ## find the new terminal edges (works whatever 'subtree' and 'trim.internal'): TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]) ## get the old No. of the nodes and tips that become tips: oldNo.ofNewTips <- phy$edge[TERMS, 2] ## in case some tips are dropped but kept because of 'subtree = TRUE': if (subtree) { i <- which(tip %in% oldNo.ofNewTips) if (length(i)) { phy$tip.label[tip[i]] <- "[1_tip]" tip <- tip[-i] } } n <- length(oldNo.ofNewTips) # the new number of tips in the tree ## the tips may not be sorted in increasing order in the ## 2nd col of edge, so no need to reorder $tip.label phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]) ## fix by Thomas Sibley (2017-10-28): if (length(tip)) phy$tip.label <- phy$tip.label[-tip] ## make new tip labels if necessary: if (subtree || !trim.internal) { ## get the numbers of the nodes that become tips: node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip] ## fix by Thomas Sibley (2017-10-28): new.tip.label <- if (!length(node2tip)) { character(0) } else if (subtree && is.null(phy$node.label)) { paste("[", N[node2tip], "_tips]", sep = "") } else { if (is.null(phy$node.label)) rep("NA", length(node2tip)) else phy$node.label[node2tip - Ntip] } # if (!is.null(phy$node.label)) # phy$node.label <- phy$node.label[-(node2tip - Ntip)] phy$tip.label <- c(phy$tip.label, new.tip.label) } phy$Nnode <- dim(phy$edge)[1] - n + 1L # update phy$Nnode ## The block below renumbers the nodes so that they conform ## to the "phylo" format newNb <- integer(Ntip + Nnode) newNb[NEWROOT] <- n + 1L sndcol <- phy$edge[, 2] > n newNb[sort(phy$edge[sndcol, 2])] <- (n + 2):(n + phy$Nnode) phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] phy$edge[, 1] <- newNb[phy$edge[, 1]] storage.mode(phy$edge) <- "integer" if (!is.null(phy$node.label)) # update node.label if needed phy$node.label <- phy$node.label[which(newNb > 0) - Ntip] if (collapse.singles) phy <- collapse.singles(phy) phy } ## "multiPhylo" methods by Klaus: keep.tip.multiPhylo <- function(phy, tip, ...) { if (is.null(attr(phy, "TipLabel"))) { tmp <- try(.compressTipLabel(phy), TRUE) if (!inherits(tmp, "try-error")) phy <- tmp } if (!is.null(attr(phy, "TipLabel"))) { phy <- lapply(phy, keep.tip, tip, ...) class(phy) <- "multiPhylo" phy <- .compressTipLabel(phy) } else { if (!inherits(tip, "character")) stop("Trees have different labels, tip needs to be of class character!") phy <- lapply(phy, keep.tip, tip, ...) class(phy) <- "multiPhylo" } phy } drop.tip.multiPhylo <- function(phy, tip, ...) { interactive <- if (hasArg(interactive)) list(...)$interactive else FALSE if (interactive) stop("interactive = TRUE does not work for drop.tip.multiPhylo().") if (is.null(attr(phy, "TipLabel"))) { tmp <- try(.compressTipLabel(phy), TRUE) if (!inherits(tmp, "try-error")) phy <- tmp } if (!is.null(attr(phy, "TipLabel"))) { phy <- lapply(phy, drop.tip, tip, ...) class(phy) <- "multiPhylo" phy <- .compressTipLabel(phy) } else { if (!inherits(tip, "character")) stop("Trees have different labels, tip needs to be of class character!") phy <- lapply(phy, drop.tip, tip, ...) class(phy) <- "multiPhylo" } phy } ape/R/write.dna.R0000644000176200001440000001400013754010120013222 0ustar liggesusers## write.dna.R (2020-11-08) ## Write DNA Sequences in a File ## Copyright 2003-2020 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. write.dna <- function(x, file, format = "interleaved", append = FALSE, nbcol = 6, colsep = " ", colw = 10, indent = NULL, blocksep = 1) { format <- match.arg(format, c("interleaved", "sequential", "fasta")) phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE if (inherits(x, "DNAbin")) x <- as.character(x) aligned <- TRUE if (is.matrix(x)) { N <- dim(x) S <- N[2] N <- N[1] xx <- vector("list", N) for (i in 1:N) xx[[i]] <- x[i, ] names(xx) <- rownames(x) x <- xx rm(xx) } else { N <- length(x) S <- unique(lengths(x, use.names = FALSE)) if (length(S) > 1) aligned <- FALSE } if (is.null(names(x))) names(x) <- as.character(1:N) if (is.null(indent)) indent <- if (phylip) 10 else 0 if (is.numeric(indent)) indent <- paste(rep(" ", indent), collapse = "") if (format == "interleaved") { blocksep <- paste(rep("\n", blocksep), collapse = "") if (nbcol < 0) format <- "sequential" } zz <- if (append) file(file, "a") else file(file, "w") on.exit(close(zz)) if (phylip) { if (!aligned) stop("sequences must have the same length for interleaved or sequential format.") cat(N, " ", S, "\n", sep = "", file = zz) if (nbcol < 0) { nb.block <- 1 nbcol <- totalcol <- ceiling(S/colw) } else { nb.block <- ceiling(S/(colw * nbcol)) totalcol <- ceiling(S/colw) } ## Prepare the sequences in a matrix whose elements are ## strings with `colw' characters. SEQ <- matrix("", N, totalcol) for (i in 1:N) { X <- paste(x[[i]], collapse = "") for (j in 1:totalcol) SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw) } ## Prepare the names so that they all have the same nb of chars max.nc <- max(nchar(names(x))) ## always put a space between the sequences and the taxa names fmt <- paste("%-", max.nc + 1, "s", sep = "") names(x) <- sprintf(fmt, names(x)) } switch(format, "interleaved" = { ## Write the first block with the taxon names colsel <- if (nb.block == 1) 1:totalcol else 1:nbcol for (i in 1:N) { cat(names(x)[i], file = zz) cat(SEQ[i, colsel], sep = colsep, file = zz) cat("\n", file = zz) } ## Write eventually the other blocks if (nb.block > 1) { for (k in 2:nb.block) { cat(blocksep, file = zz) endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol for (i in 1:N) { cat(indent, file = zz) cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz) cat("\n", file = zz) } } } }, "sequential" = { if (nb.block == 1) { for (i in 1:N) { cat(names(x)[i], file = zz) cat(SEQ[i, ], sep = colsep, file = zz) cat("\n", file = zz) } } else { for (i in 1:N) { cat(names(x)[i], file = zz) cat(SEQ[i, 1:nbcol], sep = colsep, file = zz) cat("\n", file = zz) for (k in 2:nb.block) { endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol cat(indent, file = zz) cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz) cat("\n", file = zz) } } } }, "fasta" = { for (i in 1:N) { cat(">", names(x)[i], file = zz, sep = "") cat("\n", file = zz) X <- paste(x[[i]], collapse = "") S <- length(x[[i]]) totalcol <- ceiling(S/colw) if (nbcol < 0) nbcol <- totalcol nb.lines <- ceiling(totalcol/nbcol) SEQ <- character(totalcol) for (j in 1:totalcol) SEQ[j] <- substr(X, 1 + (j - 1) * colw, colw + (j - 1) * colw) for (k in 1:nb.lines) { endsel <- if (k == nb.lines) length(SEQ) else nbcol + (k - 1)*nbcol cat(indent, file = zz) cat(SEQ[(1 + (k - 1)*nbcol):endsel], sep = colsep, file = zz) cat("\n", file = zz) } } }) } write.FASTA <- function(x, file, header = NULL, append = FALSE) { dna <- inherits(x, "DNAbin") if (!dna && !inherits(x, "AAbin")) stop("data are apparently neither DNA nor AA sequences") if (!is.null(header)) { header <- as.character(header) if (!length(header) || !sum(nchar(header)) || is.na(header)) { warning("header cannot be coerced as character; was ignored") header <- NULL } } labs <- labels(x) if (is.matrix(x)) { s <- ncol(x) # always integer n <- nrow(x) # } else { s <- -1L n <- length(x) } if (is.null(labs)) labs <- as.character(1:n) labs <- lapply(labs, charToRaw) if (!is.null(header)) { cat(header, sep = "\n", file = file, append = append) } else { fileExists <- file.exists(file) if (append) { if (!fileExists) stop(paste("cannot access FASTA file", file)) } else { if (fileExists) file.remove(file) o <- file.create(file) if (!o) stop(paste("cannot create FASTA file", file)) } } ## 'file' should always exist now file <- normalizePath(file) if (dna) .Call(writeDNAbinToFASTA, x, file, n, s, labs) else .Call(writeAAbinToFASTA, x, file, n, s, labs) invisible(NULL) } ape/R/nodelabels.R0000644000176200001440000003036214430407332013457 0ustar liggesusers## nodelabels.R (2023-05-15) ## Labelling Trees ## Copyright 2004-2023 Emmanuel Paradis, 2006 Ben Bolker, and 2006 Jim Lemon ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ## from JL: ## floating.pie() from plotrix with two changes: ## (1) aspect ratio fixed, so pies will appear circular ## (`radius' is the radius in user coordinates along the x axis); ## (2) zero values allowed (but not negative). floating.pie.asp <- function(xpos, ypos, x, edges = 200, radius = 1, col = NULL, startpos = 0, ...) { u <- par("usr") user.asp <- diff(u[3:4])/diff(u[1:2]) p <- par("pin") inches.asp <- p[2]/p[1] asp <- user.asp/inches.asp if (!is.numeric(x) || any(is.na(x) | x < 0)) stop("floating.pie: x values must be non-negative") x <- c(0, cumsum(x)/sum(x)) dx <- diff(x) nx <- length(dx) col <- if (is.null(col)) rainbow(nx) else rep_len(col, nx) ## next a fix from Klaus to avoid a "3-o'clock" segment on pies with ## only one proportion equal to 1: if (length(i <- which(dx == 1))) { symbols(xpos, ypos, circles = radius, inches = FALSE, add = TRUE, fg = par("fg"), bg = col[i]) # suggested by Liam } else { bc <- 2 * pi * (x[1:nx] + dx/2) + startpos for (i in seq_len(nx)) { n <- max(2, floor(edges * dx[i])) t2p <- 2 * pi * seq(x[i], x[i + 1], length = n) + startpos xc <- c(cos(t2p) * radius + xpos, xpos) yc <- c(sin(t2p) * radius*asp + ypos, ypos) polygon(xc, yc, col = col[i], ...) } } } BOTHlabels <- function(text, sel, XX, YY, adj, frame, pch, thermo, pie, piecol, col, bg, horiz, width, height, ...) { if (missing(text)) text <- NULL if (length(adj) == 1) adj <- c(adj, 0.5) if (is.null(text) && is.null(pch) && is.null(thermo) && is.null(pie)) text <- as.character(sel) frame <- match.arg(frame, c("rect", "circle", "none")) args <- list(...) CEX <- if ("cex" %in% names(args)) args$cex else par("cex") if (frame != "none" && !is.null(text)) { width <- strwidth(text, units = "inches", cex = CEX) height <- strheight(text, units = "inches", cex = CEX) if (frame == "rect") { if ("srt" %in% names(args)) { args$srt <- args$srt %% 360 # just in case srt >= 360 if (args$srt == 90 || args$srt == 270) { tmp <- width width <- height height <- tmp } else if (args$srt != 0) warning("only right angle rotation of frame is supported;\n try `frame = \"n\"' instead.\n") } width <- xinch(width) height <- yinch(height) xl <- XX - width * adj[1] - xinch(0.03) xr <- xl + width + xinch(0.03) yb <- YY - height * adj[2] - yinch(0.02) yt <- yb + height + yinch(0.05) rect(xl, yb, xr, yt, col = bg) } if (frame == "circle") { radii <- 0.8 * apply(cbind(height, width), 1, max) offsetX <- offsetY <- 0 if (any(adj != 0.5)) { width <- xinch(width) height <- yinch(height) offsetX <- width * (adj[1] - 0.5) offsetY <- height * (adj[2] - 0.5) #browser() } symbols(XX - offsetX, YY - offsetY, circles = radii, inches = max(radii), add = TRUE, bg = bg) } } if (!is.null(thermo)) { parusr <- par("usr") if (is.null(width)) { width <- CEX * (parusr[2] - parusr[1]) width <- if (horiz) width/15 else width/40 } if (is.null(height)) { height <- CEX * (parusr[4] - parusr[3]) height <- if (horiz) height/40 else height/15 } if (is.vector(thermo) || ncol(thermo) == 1) thermo <- cbind(thermo, 1 - thermo) thermo <- if (horiz) width * thermo else height * thermo if (is.null(piecol)) piecol <- rainbow(ncol(thermo)) xl <- XX - width/2 + adj[1] - 0.5 # added 'adj' from Janet Young (2009-09-30) xr <- xl + width yb <- YY - height/2 + adj[2] - 0.5 yt <- yb + height if (horiz) { ## draw the first rectangle: rect(xl, yb, xl + thermo[, 1], yt, border = NA, col = piecol[1]) for (i in 2:ncol(thermo)) rect(xl + rowSums(thermo[, 1:(i - 1), drop = FALSE]), yb, xl + rowSums(thermo[, 1:i]), yt, border = NA, col = piecol[i]) } else { ## draw the first rectangle: rect(xl, yb, xr, yb + thermo[, 1], border = NA, col = piecol[1]) for (i in 2:ncol(thermo)) rect(xl, yb + rowSums(thermo[, 1:(i - 1), drop = FALSE]), xr, yb + rowSums(thermo[, 1:i]), border = NA, col = piecol[i]) } ## check for NA's before drawing the borders s <- apply(thermo, 1, function(xx) any(is.na(xx))) xl[s] <- xr[s] <- NA rect(xl, yb, xr, yt, border = "black") if (!horiz) { segments(xl, YY, xl - width/5, YY) segments(xr, YY, xr + width/5, YY) } } ## from BB: if (!is.null(pie)) { if (is.data.frame(pie)) pie <- as.matrix(pie) if (is.vector(pie) || ncol(pie) == 1) pie <- cbind(pie, 1 - pie) xrad <- CEX * diff(par("usr")[1:2]) / 50 xrad <- rep(xrad, length(sel)) XX <- XX + adj[1] - 0.5 YY <- YY + adj[2] - 0.5 for (i in seq_along(sel)) { if (any(is.na(pie[i, ]))) next floating.pie.asp(XX[i], YY[i], pie[i, ], radius = xrad[i], col = piecol) } } if (!is.null(text)) text(XX, YY, text, adj = adj, col = col, ...) if (!is.null(pch)) points(XX + adj[1] - 0.5, YY + adj[2] - 0.5, pch = pch, col = col, bg = bg, ...) } nodelabels <- function(text, node, adj = c(0.5, 0.5), frame = "rect", pch = NULL, thermo = NULL, pie = NULL, piecol = NULL, col = "black", bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) if (missing(node)) node <- (lastPP$Ntip + 1):length(lastPP$xx) XX <- lastPP$xx[node] YY <- lastPP$yy[node] BOTHlabels(text, node, XX, YY, adj, frame, pch, thermo, pie, piecol, col, bg, horiz, width, height, ...) } tiplabels <- function(text, tip, adj = c(0.5, 0.5), frame = "rect", pch = NULL, thermo = NULL, pie = NULL, piecol = NULL, col = "black", bg = "yellow", horiz = FALSE, width = NULL, height = NULL, offset = 0, ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) if (missing(tip)) tip <- 1:lastPP$Ntip XX <- lastPP$xx[tip] YY <- lastPP$yy[tip] if (offset != 0) { if (lastPP$type %in% c("phylogram", "cladogram")) { switch(lastPP$direction, "rightwards" = {XX <- XX + offset}, "leftwards" = {XX <- XX - offset}, "upwards" = {YY <- YY + offset}, "downwards" = {YY <- YY - offset}) } else { if (lastPP$type %in% c("fan", "radial")) { tmp <- rect2polar(XX, YY) if (lastPP$align.tip.label) tmp$r[] <- max(tmp$r) tmp <- polar2rect(tmp$r + offset, tmp$angle) XX <- tmp$x YY <- tmp$y } else { if (lastPP$type == "unrooted") warning("argument 'offset' ignored with unrooted trees") } } } BOTHlabels(text, tip, XX, YY, adj, frame, pch, thermo, pie, piecol, col, bg, horiz, width, height, ...) } edgelabels <- function(text, edge, adj = c(0.5, 0.5), frame = "rect", pch = NULL, thermo = NULL, pie = NULL, piecol = NULL, col = "black", bg = "lightgreen", horiz = FALSE, width = NULL, height = NULL, date = NULL, ...) { lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) if (missing(edge)) { sel <- 1:dim(lastPP$edge)[1] subedge <- lastPP$edge } else { sel <- edge subedge <- lastPP$edge[sel, , drop = FALSE] } xx <- lastPP$xx yy <- lastPP$yy if (lastPP$type == "phylogram") { if (lastPP$direction %in% c("rightwards", "leftwards")) { XX <- (xx[subedge[, 1]] + xx[subedge[, 2]]) / 2 YY <- yy[subedge[, 2]] } else { XX <- xx[subedge[, 2]] YY <- (yy[subedge[, 1]] + yy[subedge[, 2]]) / 2 } } else { if (lastPP$type == "fan") { # fix by Klaus Schliep (2015-07-31) r <- sqrt(xx^2 + yy^2) tmp <- (r[subedge[, 2]] + r[subedge[, 1]]) / (r[subedge[, 2]] * 2) XX <- xx[subedge[, 2]] * tmp YY <- yy[subedge[, 2]] * tmp } else { XX <- (xx[subedge[, 1]] + xx[subedge[, 2]]) / 2 YY <- (yy[subedge[, 1]] + yy[subedge[, 2]]) / 2 } } ## suggestion by Rob Lanfear: if (!is.null(date)) XX[] <- max(lastPP$xx) - date BOTHlabels(text, sel, XX, YY, adj, frame, pch, thermo, pie, piecol, col, bg, horiz, width, height, ...) } edges <- function(nodes0, nodes1, arrows = 0, type = "classical", ...) { type <- match.arg(type, c("classical", "triangle", "harpoon")) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) ## we do the recycling if necessary: if (length(nodes0) != length(nodes1)) { tmp <- cbind(nodes0, nodes1) nodes0 <- tmp[, 1] nodes1 <- tmp[, 2] } x0 <- lastPP$xx[nodes0] y0 <- lastPP$yy[nodes0] x1 <- lastPP$xx[nodes1] y1 <- lastPP$yy[nodes1] if (arrows) if (type == "classical") arrows(x0, y0, x1, y1, code = arrows, ...) else fancyarrows(x0, y0, x1, y1, code = arrows, type = type, ...) else segments(x0, y0, x1, y1, ...) } fancyarrows <- function(x0, y0, x1, y1, length = 0.25, angle = 30, code = 2, col = par("fg"), lty = par("lty"), lwd = par("lwd"), type = "triangle", ...) { foo <- function(x0, y0, x1, y1) { ## important to correct with these parameters cause ## the coordinate system will likely not be Cartesian pin <- par("pin") usr <- par("usr") A1 <- pin[1]/diff(usr[1:2]) A2 <- pin[2]/diff(usr[3:4]) x0 <- x0 * A1 y0 <- y0 * A2 x1 <- x1 * A1 y1 <- y1 * A2 atan2(y1 - y0, x1 - x0) } arrow.triangle <- function(x, y) { beta <- alpha - angle/2 xa <- xinch(length * cos(beta)) + x ya <- yinch(length * sin(beta)) + y beta <- beta + angle xb <- xinch(length * cos(beta)) + x yb <- yinch(length * sin(beta)) + y n <- length(x) col <- rep(col, length.out = n) for (i in 1:n) polygon(c(x[i], xa[i], xb[i]), c(y[i], ya[i], yb[i]), col = col[i], border = col[i]) list((xa + xb)/2, (ya + yb)/2) } arrow.harpoon <- function(x, y) { beta <- alpha - angle/2 xa <- xinch(length * cos(beta)) + x ya <- yinch(length * sin(beta)) + y beta <- alpha + angle/2 xb <- xinch(length * cos(beta)) + x yb <- yinch(length * sin(beta)) + y xc <- x/2 + (xa + xb)/4 yc <- y/2 + (ya + yb)/4 n <- length(x) col <- rep(col, length.out = n) for (i in 1:n) polygon(c(x[i], xa[i], xc[i], xb[i]), c(y[i], ya[i], yc[i], yb[i]), col = col[i], border = col[i]) list(xc, yc) } type <- match.arg(type, c("triangle", "harpoon")) angle <- pi*angle/180 # degree -> radian alpha <- foo(x0, y0, x1, y1) # angle of segment with x-axis ## alpha is in [-pi, pi] FUN <- if (type == "triangle") arrow.triangle else arrow.harpoon XY0 <- if (code == 1 || code == 3) FUN(x0, y0) else list(x0, y0) if (code >= 2) { alpha <- (alpha + pi) %% (2 * pi) XY1 <- FUN(x1, y1) } else XY1 <- list(x1, y1) segments(XY0[[1]], XY0[[2]], XY1[[1]], XY1[[2]], col = col, lty = lty, lwd = lwd, ...) } ape/R/rTrait.R0000644000176200001440000001263614156261520012622 0ustar liggesusers## rTrait.R (2021-12-15) ## Trait Evolution ## Copyright 2010-2021 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. rTraitDisc <- function(phy, model = "ER", k = if (is.matrix(model)) ncol(model) else 2, rate = 0.1, states = LETTERS[1:k], freq = rep(1/k, k), ancestor = FALSE, root.value = 1, ...) { if (is.null(phy$edge.length)) stop("tree has no branch length") if (any(phy$edge.length < 0)) stop("at least one branch length negative") if (is.character(model)) { switch(toupper(model), "ER" = { if (length(rate) != 1) stop("`rate' must have one element") Q <- matrix(rate, k, k) }, "ARD" = { if (length(rate) != k*(k - 1)) stop("`rate' must have k(k - 1) elements") Q <- matrix(0, k, k) Q[col(Q) != row(Q)] <- rate }, "SYM" = { if (length(rate) != k*(k - 1)/2) stop("`rate' must have k(k - 1)/2 elements") Q <- matrix(0, k, k) sel <- col(Q) < row(Q) Q[sel] <- rate Q <- t(Q) Q[sel] <- rate }) } if (is.matrix(model)) { Q <- model if (ncol(Q) != nrow(Q)) stop("the matrix given as `model' must be square") } phy <- reorder(phy, "postorder") n <- length(phy$tip.label) N <- dim(phy$edge)[1] ROOT <- n + 1L x <- integer(n + phy$Nnode) x[ROOT] <- as.integer(root.value) anc <- phy$edge[, 1] des <- phy$edge[, 2] el <- phy$edge.length if (!is.function(model)) { if (requireNamespace("expm", quietly = TRUE)) { FOO <- expm::expm } else { warning("package expm not available, using ape::matexpo instead") FOO <- matexpo } } if (is.function(model)) { environment(model) <- environment() # to find 'k' for (i in N:1) x[des[i]] <- model(x[anc[i]], el[i], ...) } else { freq <- rep(freq, each = k) Q <- Q * freq diag(Q) <- 0 diag(Q) <- -rowSums(Q) for (i in N:1) { p <- FOO(Q * el[i])[x[anc[i]], ] x[des[i]] <- sample.int(k, size = 1, FALSE, prob = p) } } if (ancestor) { if (is.null(phy$node.label)) phy <- makeNodeLabel(phy) names(x) <- c(phy$tip.label, phy$node.label) } else { x <- x[1:n] names(x) <- phy$tip.label } class(x) <- "factor" levels(x) <- states x } rTraitCont <- function(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0, ancestor = FALSE, root.value = 0, ...) { if (is.null(phy$edge.length)) stop("tree has no branch length") if (any(phy$edge.length < 0)) stop("at least one branch length negative") phy <- reorder(phy, "postorder") n <- length(phy$tip.label) N <- dim(phy$edge)[1] ROOT <- n + 1L x <- numeric(n + phy$Nnode) x[ROOT] <- root.value anc <- phy$edge[, 1] des <- phy$edge[, 2] el <- phy$edge.length if (is.function(model)) { environment(model) <- environment() for (i in N:1) x[des[i]] <- model(x[anc[i]], el[i], ...) } else { model <- pmatch(toupper(model), c("BM", "OU")) if (length(sigma) == 1) sigma <- rep(sigma, N) else if (length(sigma) != N) stop("'sigma' must have one or Nedge(phy) elements") if (model == 2) { # "OU" if (length(alpha) == 1) alpha <- rep(alpha, N) else if (length(alpha) != N) stop("'alpha' must have one or Nedge(phy) elements") if (length(theta) == 1) theta <- rep(theta, N) else if (length(theta) != N) stop("'theta' must have one or Nedge(phy) elements") } x <- .C(C_rTraitCont, as.integer(model), as.integer(N), as.integer(anc - 1L), as.integer(des - 1L), el, as.double(sigma), as.double(alpha), as.double(theta), x = x, NAOK = TRUE)$x } if (ancestor) { if (is.null(phy$node.label)) phy <- makeNodeLabel(phy) names(x) <- c(phy$tip.label, phy$node.label) } else { x <- x[1:n] names(x) <- phy$tip.label } x } rTraitMult <- function(phy, model, p = 1, root.value = rep(0, p), ancestor = FALSE, asFactor = NULL, trait.labels = paste("x", 1:p, sep = ""), ...) { phy <- reorder(phy, "postorder") n <- length(phy$tip.label) m <- phy$Nnode N <- dim(phy$edge)[1] ROOT <- n + 1L x <- matrix(0, n + m, p) x[ROOT, ] <- root.value anc <- phy$edge[, 1] des <- phy$edge[, 2] el <- phy$edge.length if (is.null(el)) el <- numeric(N) environment(model) <- environment() # to find 'p' for (i in N:1) x[des[i], ] <- model(x[anc[i], ], el[i], ...) if (ancestor) { if (is.null(phy$node.label)) phy <- makeNodeLabel(phy) rownames(x) <- c(phy$tip.label, phy$node.label) } else { x <- x[1:n, , drop = FALSE] rownames(x) <- phy$tip.label } x <- as.data.frame(x) names(x) <- trait.labels if (!is.null(asFactor)) { for (i in asFactor) { y <- x[, i] x[, i] <- factor(y, labels = LETTERS[1:length(unique(y))]) } } x } ape/R/cophenetic.phylo.R0000644000176200001440000000150614726074762014637 0ustar liggesusers## cophenetic.phylo.R (2024-12-10) ## Pairwise Distances from a Phylogenetic Tree ## Copyright 2006-2024 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. dist.nodes <- function(x, fail.if.no.length = FALSE) { if (is.null(x$edge.length)) { if (fail.if.no.length) stop("the tree has no branch length") warning("the tree has no branch length: fixing them to one.") x <- compute.brlen(x, 1) } x <- reorder(x) # required for the C code n <- Ntip(x) m <- x$Nnode d <- .Call(dist_nodes, n, m, x$edge, x$edge.length) nm <- n + m dimnames(d) <- list(1:nm, 1:nm) d } cophenetic.phylo <- function(x) { n <- length(x$tip.label) ans <- dist.nodes(x)[1:n, 1:n] dimnames(ans)[1:2] <- list(x$tip.label) ans } ape/R/ewLasso.R0000644000176200001440000000127712465112403012765 0ustar liggesusers## ewLasso.R (2013-04-02) ## Lasso Tree ## Copyright 2013 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ewLasso <- function(X, phy) { if (is.matrix(X)) X <- as.dist(X) X[is.na(X)] <- -1 X[X < 0] <- -1 X[is.nan(X)] <- -1 if (is.rooted(phy)) { phy <- unroot(phy) warning("'phy' is rooted: it was unrooted for this operation") } N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) ans <- .C(C_ewLasso, as.double(X), as.integer(N), as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]), NAOK = TRUE) } ape/R/chronos.R0000644000176200001440000005231214122770364013027 0ustar liggesusers## chronos.R (2021-09-23) ## Molecular Dating With Penalized and Maximum Likelihood ## Copyright 2013-2021 Emmanuel Paradis, 2018-2020 Santiago Claramunt, 2020 Guillaume Louvel ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .chronos.ctrl <- list(tol = 1e-8, iter.max = 1e4, eval.max = 1e4, nb.rate.cat = 10, dual.iter.max = 20, epsilon = 1e-6) makeChronosCalib <- function(phy, node = "root", age.min = 1, age.max = age.min, interactive = FALSE, soft.bounds = FALSE) { n <- Ntip(phy) if (interactive) { plot(phy) cat("Click close to a node and enter the ages (right-click to exit)\n\n") node <- integer() age.min <- age.max <- numeric() repeat { ans <- identify(phy, quiet = TRUE) if (is.null(ans)) break NODE <- ans$nodes nodelabels(node = NODE, col = "white", bg = "blue") cat("constraints for node ", NODE, sep = "") cat("\n youngest age: ") AGE.MIN <- as.numeric(readLines(n = 1)) cat(" oldest age (ENTER if not applicable): ") AGE.MAX <- as.numeric(readLines(n = 1)) node <- c(node, NODE) age.min <- c(age.min, AGE.MIN) age.max <- c(age.max, AGE.MAX) } s <- is.na(age.max) if (any(s)) age.max[s] <- age.min[s] } else { if (identical(node, "root")) node <- n + 1L } if (any(node <= n)) stop("node numbers should be greater than the number of tips") diff.age <- which(age.max < age.min) if (length(diff.age)) { msg <- "'old age' less than 'young age' for node" if (length(diff.age) > 1) msg <- paste(msg, "s", sep = "") stop(paste(msg, paste(node[diff.age], collapse = ", "))) } data.frame(node, age.min, age.max, soft.bounds = soft.bounds) } next.calib <- function(y, ini.time) # added by GL (2020-01-29) { times <- ini.time[y] runs.na <- rle(is.na(times)) next.calib.i <- cumsum(runs.na$lengths)[runs.na$values] + 1 ini.time[y[next.calib.i]] ##return(ncal) #if(length(ncal)){ncal}else{-1}) } chronos.control <- function(...) { dots <- list(...) x <- .chronos.ctrl if (length(dots)) { chk.nms <- names(dots) %in% names(x) if (any(!chk.nms)) { warning("some control parameter names do not match: they were ignored") dots <- dots[chk.nms] } x[names(dots)] <- dots } x } chronos <- function(phy, lambda = 1, model = "correlated", quiet = FALSE, calibration = makeChronosCalib(phy), control = chronos.control()) { model <- match.arg(tolower(model), c("correlated", "relaxed", "discrete", "clock")) if (model == "clock") { model <- "discrete" control$nb.rate.cat <- 1 } n <- Ntip(phy) ROOT <- n + 1L m <- phy$Nnode el <- phy$edge.length if (is.null(el)) stop("the tree has no branch lengths") if (any(el < 0)) stop("some branch lengths are negative") e1 <- phy$edge[, 1L] e2 <- phy$edge[, 2L] N <- length(e1) TIPS <- 1:n EDGES <- 1:N tol <- control$tol node <- calibration$node age.min <- calibration$age.min age.max <- calibration$age.max ## Starting points of node ages to *estimate*. Calibrated nodes can be NA. age.start <- # added by GL (2020-01-29) if (is.null(calibration$age.start)) rep(NA_real_, length(node)) else calibration$age.start if (model == "correlated") { ### `basal' contains the indices of the basal edges ### (ie, linked to the root): basal <- which(e1 == ROOT) Nbasal <- length(basal) ### 'ind1' contains the index of all nonbasal edges, and 'ind2' the ### index of the edges where these edges come from (ie, they contain ### pairs of contiguous edges), eg: ### ___b___ ind1 ind2 ### | | || | ### ___a___| | b || a | ### | | c || a | ### |___c___ | || | ind1 <- EDGES[-basal] ind2 <- match(e1[EDGES[-basal]], e2) } age <- numeric(n + m) lfactorial.el <- lfactorial(el) # Calculate the factorials here once (SC) ### This bit sets 'ini.time' and should result in no negative branch lengths if (!quiet) cat("\nSetting initial dates...\n") seq.nod <- .Call(seq_root2tip, phy$edge, n, phy$Nnode) ## 'fact.root' is used to approximate the age of the root if it is not given; ## it is multiplied by 1.5 every 100 tries of the initiation loop (see below) ## (added 2017-11-21) fact.root <- 3 ii <- 1L repeat { ini.time <- age ini.time[ROOT:(n + m)] <- NA ##ini.time[node] <- ## if (is.null(age.max)) age.min ## else runif(length(node), age.min, age.max) # (age.min + age.max) / 2 ## added by GL (2020-01-29): ini.time[node] <- ifelse(is.na(age.start), if (is.null(age.max)) age.min else runif(length(node), age.min, age.max), age.start) ## if no age given for the root, find one approximately: if (is.na(ini.time[ROOT])) ini.time[ROOT] <- fact.root * max(if (is.null(age.max)) age.min else age.max) ##ISnotNA.ALL <- unlist(lapply(seq.nod, function(x) sum(!is.na(ini.time[x])))) ##o <- order(ISnotNA.ALL, decreasing = TRUE) ## added by GL (2020-01-29): ## For each path to the leaves, return the calibrations following the last NA. calibs.after.NA <- lapply(seq.nod, next.calib, ini.time) ## This recycles shorter elements, but doesn't matter with the order() function L <- max(sapply(calibs.after.NA, length)) calibs.df <- as.data.frame(do.call(rbind, lapply(calibs.after.NA, function(r) c(r, rep(-1, L - length(r)))))) o <- do.call(order, c(calibs.df, decreasing = TRUE)) for (y in seq.nod[o]) { ISNA <- is.na(ini.time[y]) if (any(ISNA)) { i <- 2L # we know the 1st value is not NA, so we start at the 2nd one while (i <= length(y)) { if (ISNA[i]) { # we stop at the next NA j <- i + 1L while (ISNA[j]) j <- j + 1L # look for the next non-NA nb.val <- j - i by <- (ini.time[y[i - 1L]] - ini.time[y[j]]) / (nb.val + 1) ini.time[y[i:(j - 1L)]] <- ini.time[y[i - 1L]] - by * seq_len(nb.val) i <- j + 1L } else i <- i + 1L } } } if (all(ini.time[e1] - ini.time[e2] >= 0)) break ii <- ii + 1L if (ii > 1000) stop("cannot find reasonable starting dates after 1000 tries: maybe you need to adjust the calibration dates") if (!(ii %% 100)) fact.root <- fact.root * 1.5 } ### 'ini.time' set #ini.time[ROOT:(n+m)] <- branching.times(chr.dis) ## ini.time[ROOT:(n+m)] <- ini.time[ROOT:(n+m)] + rnorm(m, 0, 5) #print(ini.time) ### Setting 'ini.rate' ini.rate <- el/(ini.time[e1] - ini.time[e2]) if (model == "discrete") { Nb.rates <- control$nb.rate.cat if (Nb.rates > N) { Nb.rates <- N warning("'nb.rate.cat' > number of branches: used nb.rate.cat = # of branches instead", call. = FALSE) } minmax <- range(ini.rate) if (Nb.rates == 1) { ini.rate <- sum(minmax)/2 } else { ##inc <- diff(minmax)/Nb.rates ##ini.rate <- seq(minmax[1] + inc/2, minmax[2] - inc/2, inc) ini.rate <- quantile(ini.rate, seq(1/(2 * Nb.rates), by = 1/Nb.rates, length.out = Nb.rates)) names(ini.rate) <- NULL ini.freq <- rep(1/Nb.rates, Nb.rates - 1) lower.freq <- rep(0, Nb.rates - 1) upper.freq <- rep(1, Nb.rates - 1) } } else Nb.rates <- N ## 'ini.rate' set ### Setting bounds for the node ages ## `unknown.ages' will contain the index of the nodes of unknown age: unknown.ages <- 1:m + n ## initialize vectors for all nodes: lower.age <- rep(tol, m) upper.age <- rep(1/tol, m) lower.age[node - n] <- age.min upper.age[node - n] <- age.max ## find nodes known within an interval: ii <- which(is.na(age.min) | (age.min != age.max)) ## drop them from 'node' since they will be estimated: if (length(ii)) { node <- node[-ii] if (length(node)) age[node] <- age.min[-ii] # update 'age' } else age[node] <- age.min ## finally adjust the 3 vectors: if (length(node)) { unknown.ages <- unknown.ages[n - node] # 'n - node' is simplification for '-(node - n)' lower.age <- lower.age[n - node] upper.age <- upper.age[n - node] } ### Bounds for the node ages set ## 'known.ages' contains the index of all nodes ## (internal and terminal) of known age: known.ages <- c(TIPS, node) ## the bounds for the rates: lower.rate <- rep(tol, Nb.rates) upper.rate <- rep(1e5 - tol, Nb.rates) ### Gradient degree_node <- tabulate(phy$edge) eta_i <- degree_node[e1] eta_i[e2 <= n] <- 1L ## eta_i[i] is the number of contiguous branches for branch 'i' ## use of a list of indices is slightly faster than an incidence matrix ## and takes much less memory (60 Kb vs. 8 Mb for n = 500) X <- vector("list", N) for (i in EDGES) { j <- integer() if (e1[i] != ROOT) j <- c(j, which(e2 == e1[i])) if (e2[i] >= n) j <- c(j, which(e1 == e2[i])) X[[i]] <- j } ## X is a list whose i-th element gives the indices of the branches ## that are contiguous to branch 'i' ## D_ki and A_ki are defined in the SI of the paper D_ki <- match(unknown.ages, e2) A_ki <- lapply(unknown.ages, function(x) which(x == e1)) gradient.poisson <- function(rate, node.time) { age[unknown.ages] <- node.time real.edge.length <- age[e1] - age[e2] ## gradient for the rates: gr <- el/rate - real.edge.length ## gradient for the dates: tmp <- el/real.edge.length - rate tmp2 <- tmp[D_ki] tmp2[is.na(tmp2)] <- 0 gr.dates <- sapply(A_ki, function(x) sum(tmp[x])) - tmp2 c(gr, gr.dates) } ## gradient of the penalized lik (must be multiplied by -1 before calling nlminb) gradient <- switch(model, "correlated" = function(rate, node.time) { gr <- gradient.poisson(rate, node.time) #if (all(gr == 0)) return(gr) ## contribution of the penalty for the rates: gr[RATE] <- gr[RATE] - lambda * 2 * (eta_i * rate - sapply(X, function(x) sum(rate[x]))) ## the contribution of the root variance term: if (Nbasal == 1) { return(gr) } if (Nbasal == 2) { # the simpler formulae if there's a basal dichotomy i <- basal[1] j <- basal[2] gr[i] <- gr[i] - lambda * (rate[i] - rate[j]) gr[j] <- gr[j] - lambda * (rate[j] - rate[i]) return(gr) } ## Nbasal > 2 -- the general case for (i in 1:Nbasal) { j <- basal[i] gr[j] <- gr[j] - lambda*2*(rate[j]*(1 - 1/Nbasal) - sum(rate[basal[-i]])/Nbasal)/(Nbasal - 1) } gr }, "relaxed" = function(rate, node.time) { gr <- gradient.poisson(rate, node.time) #if (all(gr == 0)) return(gr) ## contribution of the penalty for the rates: mean.rate <- mean(rate) ## rank(rate)/Nb.rates is the same than ecdf(rate)(rate) but faster gr[RATE] <- gr[RATE] + lambda*2*dgamma(rate, mean.rate)*(rank(rate)/Nb.rates - pgamma(rate, mean.rate)) gr }, "discrete" = NULL) log.lik.poisson <- function(rate, node.time) { age[unknown.ages] <- node.time real.edge.length <- age[e1] - age[e2] if (isTRUE(any(real.edge.length < 0))) return(-1e100) B <- rate * real.edge.length sum(el * log(B) - B - lfactorial.el) } ## New function for incorporating multiple rate categories (by SC). ## This one calculates the conditional probability for each branch ## and rate regime, and then computes a weighted average (using the ## frequencies as weights) before summing logs across branches. log.lik.poisson.discrete <- function(rate, node.time, freq) { Freqs <- c(freq, 1 - sum(freq)) age[unknown.ages] <- node.time real.edge.length <- age[e1] - age[e2] if (any(real.edge.length < 0)) return(-1e+100) ## generate a matrix of branch length rates under each rate regime: B <- real.edge.length %*% t(rate) ## generate a matrix of likelihood values PPs <- exp(el * log(B) - B - lfactorial.el) ## matrix multiplication to obtain the weigthed sums for each ## branch (the average likelihoods), then sum the ## log-likelihoods to obtain the tree likelihood: sum(log(PPs %*% Freqs)) } ### penalized log-likelihood penal.loglik <- switch(model, "correlated" = function(rate, node.time) { loglik <- log.lik.poisson(rate, node.time) if (!is.finite(loglik)) return(-1e100) res <- loglik - lambda * sum((rate[ind1] - rate[ind2])^2) if (Nbasal > 1) res <- res + lambda * var(rate[basal]) res }, "relaxed" = function(rate, node.time) { loglik <- log.lik.poisson(rate, node.time) if (!is.finite(loglik)) return(-1e100) mu <- mean(rate) ## loglik - lambda * sum((1:N/N - pbeta(sort(rate), mu/(1 + mu), 1))^2) # avec loi beta ## loglik - lambda * sum((1:N/N - pcauchy(sort(rate)))^2) # avec loi Cauchy loglik - lambda * sum((1:N/N - pgamma(sort(rate), mean(rate)))^2) # avec loi Gamma }, "discrete" = if (Nb.rates == 1) function(rate, node.time) log.lik.poisson(rate, node.time) else function(rate, node.time, freq) { if (sum(freq) > 1) return(-1e100) ## rate.freq <- sum(c(freq, 1 - sum(freq)) * rate) ## log.lik.poisson(rate.freq, node.time) log.lik.poisson.discrete(rate, node.time, freq) # by SC }) opt.ctrl <- list(eval.max = control$eval.max, iter.max = control$iter.max) ## the following capitalized vectors give the indices of ## the parameters once they are concatenated in 'p' RATE <- 1:Nb.rates AGE <- Nb.rates + 1:length(unknown.ages) if (model == "discrete") { if (Nb.rates == 1) { start.para <- c(ini.rate, ini.time[unknown.ages]) f <- function(p) -penal.loglik(p[RATE], p[AGE]) g <- NULL LOW <- c(lower.rate, lower.age) UP <- c(upper.rate, upper.age) } else { FREQ <- length(RATE) + length(AGE) + 1:(Nb.rates - 1) start.para <- c(ini.rate, ini.time[unknown.ages], ini.freq) f <- function(p) -penal.loglik(p[RATE], p[AGE], p[FREQ]) g <- NULL LOW <- c(lower.rate, lower.age, lower.freq) UP <- c(upper.rate, upper.age, upper.freq) } } else { start.para <- c(ini.rate, ini.time[unknown.ages]) f <- function(p) -penal.loglik(p[RATE], p[AGE]) g <- function(p) -gradient(p[RATE], p[AGE]) LOW <- c(lower.rate, lower.age) UP <- c(upper.rate, upper.age) } k <- length(LOW) # number of free parameters if (!quiet) cat("Fitting in progress... get a first set of estimates\n") out <- nlminb(start.para, f, g, control = opt.ctrl, lower = LOW, upper = UP) if (model == "discrete") { if (Nb.rates == 1) { f.rates <- function(p) -penal.loglik(p, current.ages) f.ages <- function(p) -penal.loglik(current.rates, p) } else { f.rates <- function(p) -penal.loglik(p, current.ages, current.freqs) f.ages <- function(p) -penal.loglik(current.rates, p, current.freqs) f.freqs <- function(p) -penal.loglik(current.rates, current.ages, p) g.freqs <- NULL } g.rates <- NULL g.ages <- NULL } else { f.rates <- function(p) -penal.loglik(p, current.ages) g.rates <- function(p) -gradient(p, current.ages)[RATE] f.ages <- function(p) -penal.loglik(current.rates, p) g.ages <- function(p) -gradient(current.rates, p)[AGE] } current.ploglik <- -out$objective current.rates <- out$par[RATE] current.ages <- out$par[AGE] if (model == "discrete" && Nb.rates > 1) current.freqs <- out$par[FREQ] dual.iter.max <- control$dual.iter.max epsilon <- control$epsilon i <- 1L # was 0L (2020-05-08) if (!quiet) cat(" (Penalised) log-lik =", current.ploglik, "\n") repeat { if (dual.iter.max < 1) break if (i > dual.iter.max) { # added this break here (with a warning) instead of after optimizations (SC) warning("Maximum number of dual iterations reached.", call. = FALSE) break } if (!quiet) cat("Optimising rates...") out.rates <- nlminb(current.rates, f.rates, g.rates,# h.rates, control = list(eval.max = 1000, iter.max = 1000, step.min = 1e-8, step.max = .1), lower = lower.rate, upper = upper.rate) new.rates <- out.rates$par if (-out.rates$objective > current.ploglik) current.rates <- new.rates if (model == "discrete" && Nb.rates > 1) { if (!quiet) cat(" frequencies...") out.freqs <- nlminb(current.freqs, f.freqs, control = list(eval.max = 1000, iter.max = 1000, step.min = .001, step.max = .5), lower = lower.freq, upper = upper.freq) new.freqs <- out.freqs$par } if (!quiet) cat(" dates...") out.ages <- nlminb(current.ages, f.ages, g.ages,# h.ages, control = list(eval.max = 1000, iter.max = 1000, step.min = .001, step.max = 100), lower = lower.age, upper = upper.age) new.ploglik <- -out.ages$objective if (!quiet) cat("", current.ploglik, "\n") delta.ploglik <- new.ploglik - current.ploglik if (is.na(delta.ploglik)) break # fix by Daniel Lang if (delta.ploglik > epsilon) { current.ploglik <- new.ploglik current.rates <- new.rates current.ages <- out.ages$par if (model == "discrete" && Nb.rates > 1) current.freqs <- new.freqs out <- out.ages i <- i + 1L } else break } ## if (!quiet) cat("\nDone.\n") if (model == "discrete") { ## rate.freq <- logLik <- if (Nb.rates == 1) log.lik.poisson(current.rates, current.ages) else log.lik.poisson.discrete(current.rates, current.ages, current.freqs) ## else mean(c(current.freqs, 1 - sum(current.freqs)) * current.rates) ## logLik <- log.lik.poisson(rate.freq, current.ages) PHIIC <- list(logLik = logLik, k = k, PHIIC = -2 * logLik + 2 * k) } else { logLik <- log.lik.poisson(current.rates, current.ages) PHI <- switch(model, "correlated" = (current.rates[ind1] - current.rates[ind2])^2 + ifelse(Nbasal == 1, 0, var(current.rates[basal])), "relaxed" = (1:N/N - pgamma(sort(current.rates), mean(current.rates)))^2) # avec loi Gamma PHIIC <- list(logLik = logLik, k = k, lambda = lambda, PHIIC = -2 * logLik + 2 * k + lambda * svd(PHI)$d) } attr(phy, "call") <- match.call() attr(phy, "ploglik") <- -out$objective attr(phy, "rates") <- current.rates #out$par[EDGES] if (model == "discrete" && Nb.rates > 1) attr(phy, "frequencies") <- c(current.freqs, 1 - sum(current.freqs)) attr(phy, "convergence") <- if (out$convergence == 0) TRUE else FALSE attr(phy, "message") <- out$message attr(phy, "PHIIC") <- PHIIC attr(phy, "niter") <- i age[unknown.ages] <- current.ages #out$par[-EDGES] phy$edge.length <- age[e1] - age[e2] if(!attr(phy, "convergence")) warning(attr(phy, "message"), call. = FALSE) if (!quiet) cat("\nlog-Lik =", logLik, "\nPHIIC =", round(PHIIC$PHIIC, 2),"\n") class(phy) <- c("chronos", class(phy)) phy } print.chronos <- function(x, ...) { cat("\n Chronogram\n\n") cat("Call: ") print(attr(x, "call")) cat("\n") NextMethod("print") } ape/NEWS0000644000176200001440000040477614726075041011546 0ustar liggesusers CHANGES IN APE VERSION 5.8-1 NEW FEATURES o write.nexus() has a new option, digitis = 10, similar to the one with the same name in write.tree(). The help page of the latter has been clarified (see PR #130 on GitHub). o kronoviz() has two new options, 'direction' and 'side', to add more flexibbility to the plot. BUG FIXES o plot.phylo() shifted the coordinates when the tree was plotted left- or downwards (see PR #119 on GitHub). o Newick strings with a single tip and multiple nodes were not correctly parsed. Also, if a root edge was present it was not correctly decoded (see issue #124 on GitHub). o dist.nodes() returned a (meaningless) result for trees with no branch lengths (sometimes a segfault occurred). This function has now the option 'fail.if.no.length = FALSE' so that branch lengths, if absent in the tree, are fixed to one (with a warning). An error is returned if this option is set to FALSE. OTHER CHANGES o Plotting circular trees has been improved (see PR #129 on GitHub). o axisPhylo(): the new default is axisPhylo(side = NULL) instead of axisPhylo(side = 1). CHANGES IN APE VERSION 5.8 NEW FEATURES o dist.topo() has a new option 'mc.cores'. With the default PH85 distance, the gain of using several cores is visible with 200+ trees. The internal code for the score distance has been improved and is twice faster with a single core, and 4-5 times faster with mc.cores = 2. o keep.tip() gains a '...' argument which is passed to drop.tip(). The behaviour of the latter is changed so that node labels, if available, are used to create new tip labels when subtree = TRUE (see PR #90 on GitHub). o nj() can now handle distances stored in long vectors. There was previously an implicit limit to 46,341 observations (see PR #97 on GitHub). o del.gaps(), del.colgapsonly(), and del.rowgapsonly() now accepts objects of class "AAbin". Besides, the class of the input object is always respected (it used to be "DNAbin" only even when it had several elements). o makeNodeLabel() is now generic with methods for the "phylo" and "multiPhylo" classes. o unroot() gains two options, 'collapse.singles' and 'keep.root.edge', to handle some complicated situations (see issue #103 on GitHub). o read.GenBank() has a new argument 'type' to download AA sequences. The default is type = "DNA", and the other choice is type = "AA". BUG FIXES o rtt() was fixed when dates include NAs (issue #73 on GitHub). o njs(), bionj(), bionjs(), fastme.bal(), and fastme.ols() now check that there are at least three observations in the distance matrix (issue #84 on GitHub). o nodelabels(, frame = "circle") used to ignore the argument 'adj'. This is fixed (issue #85 on GitHub). o write.tree() ignored tree names. This bug was introduced in ape 5.7 (see issue #101 on GitHub). o read.tree() now reads correctly trees with many nodes but a single tip such as "(());" (see issue #104 on GitHub). o bitsplits() failed with large trees. There are now checks on memory requirements; some explanations have been added in the help page. o comparePhylo() failed with two large trees. This is fixed. OTHER CHANGES o .uncompressTipLabel() now returns an object of the same class than the input object (previously the returned object was always of class "multiPhylo"). o The help page of is.binary() has been clarified that branch lengths are ignored (issue #83 on GitHub). o dist.nodes() has an improved internal C code with unlimited tree size. The code is three times faster with small trees; twice faster with ~1000 tips (the gain is less with bigger trees). o ace() now uses node labels, if available, in the output. o comparePhylo() now restores the graphical window in its previous state (i.e., after calling layout() if plot = TRUE). o read.tree() now gives a warning (still returning NULL) if there is (are) no semicolon(s) [end of tree(s)] in the Newick string. o write.tree() now gives a warning if the length of the node labels does not match the number of nodes given by '$Nnode'. CHANGES IN APE VERSION 5.7-1 BUG FIXES o Some very unbalanced trees resulted in a segfault (see issue #70 on GitHub). OTHER CHANGES o ape 5.7 introduced an implicit dependence on R >= 4.1. This has been corrected and this version should work with R >= 3.4 (and certainly with R >= 3.2). o All examples should now write files in the TMP directory (some still used to do this in the user's working directory). CHANGES IN APE VERSION 5.7 NEW FEATURES o The new function muscle5 calls MUSCLE5. There are two other new functions that depend on this program, efastats and letterconf, to compute summaries for a set of alternative alignments and display the confidence in each site in HTML, respectively. o print.LargeNumber() gains the options 'latex' (FALSE by default) and 'digits' (depends on 'latex'). o drop.tip() and keep.tip() are now generic functions with methods for the classes "phylo" and "multiPhylo". BUG FIXES o A bug was fixed in multi2di() and di2multi() for compressed "multiPhylo" objects causing an error if the object had to be reordered internally (see PR #51 on GitHub). o unroot() now accepts different "order" of the "phylo" object (see PR #52 on GitHub). o write.tree(), ladderize(), and balance() failed with very unbalanced trees (issues #53 and #54 on GitHub). o PGLS models used a wrong ordering of data if 'species' was a factor in corBrownian(, form = ~species) or other correlation structure functions. o A bug was fixed in the calculations of the variance of the TN93 distance (dist.dna). o consensus() did not use postprocess.prop.part() and might scramble nodelabels (see PR #65 on GitHub). o is.binary.phylo() used to return TRUE with trees such as ((a:1):1,b:2,c:2):1;. This test now conforms with what is written in the documentation. OTHER CHANGES o reconstruct() gains the options 'low_alpha = 0.0001' and 'low_alpha = 1' to allow estimation of values of alpha greater than 1. o src/me.c and src/me.h are now OK with -Wstrict-prototypes flag. o makeNodeLabel(method = "md5sum") now uses the function digest from the package of the same name. The package tools has been removed from the Imports list. CHANGES IN APE VERSION 5.6-2 NEW FEATURES o plot.phylo() can now plot "tidy" trees with the option type = "t[idy]" (see: van der Ploeg, 2014, Drawing non-layered tidy trees in linear time. J. Software: Practice and Experience 44:1467). BUG FIXES o is.monophyletic() didn't work correctly with duplicated labels (issue #50 on GH). o birthdeath()'s 'if' conditions should be now all of length 1. CHANGES IN APE VERSION 5.6-1 BUG FIXES o axisPhylo() misplaced the scale with some trees drawn with plot(type = "fan"). o The internal code of bitsplits() has been improved and can now handle all types of unrooted trees. CHANGES IN APE VERSION 5.6 NEW FEATURES o The new generic function degree returns the degrees of all nodes in a tree or network. There are methods for "phylo" and "evonet" objects. o The new function solveAmbiguousBases replaces R, Y, W, ... by A, G, C, or T using columnwise base frequencies. o There is a new method as.phylo.default() for any object inheriting the class "phylo". o read.dna() and read.FASTA() can now read gzipped files. o where() now works also with objects of class "AAbin". 'NULL' is now returned, instead of an error, if the sequence is shorter than the searched pattern (useful with lists with some short sequences; issue #28 on GH). o comparePhylo() has a new '...' argument for options passed to plot.phylo(), and two new options: 'commons' to specify whether to show the splits in common, and 'location' for the legend (PRs #41 and #42 on GH). o The code of dnds() has been rewritten which fixes some minor bugs, is more robust, and gains the options 'details' to print the numbers of TS and TV for each degeneracy category, and 'return.category' to return the degeneracy categories of the original data (see ?dnds for details). BUG FIXES o A bug was introduced in read.tree() in ape 5.5 when reading trees with quoted labels (issue #14 on GH). o nj() did not duplicate the distances if originally stored in a "dist" object (issue #12 on GH). o The double-centered matrix computed by pcoa() is now more strictly symmetric, thus avoiding complex eigenvalues in some cases (issue #17 on GH). o chronos() failed with discrete models when branch lengths were extremely heterogeneous leading to badly initial rate values. o multi2di() sometimes resulted in an ultrametric tree to become non-ultrametric (see issue #23 on GH). o read.nexus() failed to read correctly files with labels containing spaces in the TRANSLATION bloc (PR #29 on GH). o ace(, type = "d") now woks with n = 2. o drop.tip() used to remove the root edge of a tree if the option 'root.edge = 0' was set. Since the latter is the default, the side-effect was to possibly turn a rooted tree into an unrooted one (see issue #32 on GH). The root edge is now removed only if the option 'rooted = TRUE' is used. The help page has been clarified on this point. o as.hclust.phylo() now returns the correct order of leaves so that plot() does not cross lines (PR #33 on GH). o unique.multiPhylo() failed with a single tree in the list or with an empty list (PR #38 on GH). o trans() failed with a matrix with a single codon. A similar bug affected as.character.AAbin(). o rTraitDisc() now uses expm::expm unless this package is not installed in which case ape::matexpo is used. OTHER CHANGES o multi2di() is faster as it avoids calling which() repeatedly. o dist.nodes() now gives an error with an explicit error message if the tree is too large (i.e., more than 46,340 tips + nodes). o The default values of the options 'edge.color', 'edge.width', and 'edge.lty' of plot.phylo() are now taken from the graphical parameters: par("fg"), par("lwd"), and par("lty"), respectively. o The visual aspect of circular trees (type = "f" in plot.phylo) has been improved. o ace() now accepts trees with some branch lengths equal to zero. o write.tree() now accepts simple lists of trees. o chronos(, model = "clock") is a short-cut to specifying model = "discrete" and a single category of rates in the settings. CHANGES IN APE VERSION 5.5 NEW FEATURES o The new functions rtopology and rmtopology generate random trees with equal frequencies of *labelled* topologies. rtree(, equiprob = TRUE) now gives equal frequencies of *unlabelled* topologies. These features are described in the new vignette "RandomTopologies". o dist.dna()'s internal code has been improved: it now accepts up to 2.1 billion sequences each up to 2.1 Gb (see CHANGES in APE VERSION 4.0 below). o nj()'s internal code has been improved and is slightly more efficient: combined with the improved dist.dna(), boot.phylo() with an NJ tree is now about 10% faster. o howmanytrees() can now compute (approximately) the number of binary, (un)rooted, labelled topologies beyond the standard resolution of numeric values (i.e., > 10^308) thanks to the new utility function LargeNumber. BUG FIXES o bind.tree() sometimes returned badly conformed trees. o write.FASTA() made R crash if the output file was in a non-existing directory (thanks to Richel Bilderbeek for the report). o ltt.plot.coords() now handles correctly trees with singleton nodes. This also fixes functions calling this one (e.g., ltt.plot). o estimate.dates() now works with non-bifurcating trees. o root(phy, outgroup, resolve.root = TRUE) failed to place correctly the new root if 'outgroup' was of longer two or more and paraphyletic (although monophyletic once 'phy' was unrooted). o pcoa() failed when a single axis was returned. OTHER CHANGES o node- and tiplabels(pie = ) now accept data frames (or a single-column matrix/data frame). o boot.phylo() now returns integer(0) with a warning if there are less than 4 rows in the data (argument x). In case of unrooted trees, the first support value is now NA (it used to be B, the number of bootstrap replications). o prop.part() and is now faster thanks to some internal improvements. This also affects dist.topo() which is twice faster for small trees, and up to 30 times faster for trees with 1000 tips. o bitsplits() now gives an error with rooted trees CHANGES IN APE VERSION 5.4-1 NEW FEATURES o rtree() gains several improvements: * The new option 'equiprob = TRUE' generates all topologies in equal proportions; 'equiprob = FALSE' generates unbalanced topologies in higher proportions (as in previous versions of ape). The latter is the default because several packages use rtree() with set.seed() in their examples. * Rooted trees with n = 1 tip can now be simulated. * Vector of tip labels supplied by the user are now checked and a warning is issued if its length does not match 'n'. * The internals of the code have been rewritten and are now more efficient (particularly if 'equiprob = TRUE'). o multi2di() gains an option 'equiprob' similar to rtree(). o The new function nexus2DNAbin is a helper to convert outputs from read.nexus.data(). BUG FIXES o is.monophyletic() might return the wrong answer if the tips were given as labels not alphabetically sorted. o boot.phylo() returned bootstrap values all equal to 0. CHANGES IN APE VERSION 5.4 NEW FEATURES o The new function getAnnotationsGenBank reads annotations from GenBank and returns them in data frame(s). o read.GenBank() gains two options: 'chunk.size' to set the number of sequences downloaded together, and 'quiet' to display the download progress (will also display the name of the temporary file where the sequences were saved). The options 'gene.names' has been removed. o The new convenience function latag2n (leading and traling alignment gaps to N) substitutes leading and trailing gaps in DNA alignments into N's. The gaps in the middle of the sequences are left unchanged. o There are new cbind() and rbind() methods for the class "AAbin". o seg.sites() gains two options: 'strict' and 'trailingGapsAsN' to give more flexibility in the presence of ambiguous bases and/or alignment gaps. o read.nexus.data() now handles polymorphisms in discrete characters (however with no distinction between ambiguous or true polymorphic characters). o trans() now supports genetic codes 1 to 6 (dnds() still supports only genetic codes 1 and 2). o chronos() has been improved with contributions by Santiago Claramunt: * Some errors were fixed in the calculation of the log- likelihood of the "discrete" models with more than one category of rates. * Calculations are now faster with "discrete" models. * A general convergence diagnostic has been added in the output. Some other contributions by Guillaume Louvel: * Improved algorithm to find initial dates. * There is now a parameter 'epsilon' to check convergence. * The number of iterations is output in the final results. o Phylogenetic correlation structures (functions with class "corPhyl") can now specify grouping factors via the arguments 'form' and 'covariates' (which are no more ignored; see examples in ?ape::corClasses). This can be used, for instance, to specify species names to match the tree with the covariates. BUG FIXES o A bug was fixed in pcoa() (thanks to Jesse Connell). o A bug was fixed in countBipartitions(). o unroot() did not handle node labels correctly. o label2table() now keeps all levels in labels (not only the first three ones. o A bug was fixed in drop.tip(, root.edge = ). o rphylo() failed with time-dependent parameters. o is.monophyletic() now returns an error if some labels are not found in the tree (thanks to David Winter for the fix). o read.tree() and read.nexus() crashed R if the numbers of left and right parentheses in a Newick string were not equal: this is fixed and an error is now returned. OTHER CHANGES o The option 'tbr' in fastme.bal() has been disabled. CHANGES IN APE VERSION 5.3 NEW FEATURES o The new function dnds calculates pairwise dN/dS ratios. o alview() has a new option 'showpos' to display the positions of the sites (can be a subset of these). Previously, only the position of the last site was printed. BUG FIXES o mantel.test() returned NA if the second matrix had its diagonal with at least one NA. o Several bugs were fixed in plot.popsize(). It gains the options 'xlab', 'ylab', and 'log' for more flexible plotting. OTHER CHANGES o NAMESPACE has now explicit exports. The present version has 325 exported objects. Most internal functions are not exported, as well as several methods. o checkLabel() is now documented. o The running times of write.tree() now scales with N (number of trees) instead of N^2: writing 2 million trees with 9 tips now takes 8 mins instead of 28 hrs with ape 5.2. CHANGES IN APE VERSION 5.2 NEW FEATURES o The new function keep.tip does the opposite operation than drop.tip(). o The new function rDNAbin generates random DNA sequences. o The new generic function updateLabel changes some (or all) labels in an object giving two vectors 'old' and 'new' of labels. There are methods for the following classes: DNAbin, AAbin, phylo, evonet, data.frame, and matrix. o read.gff() has a new option 'GFF3' (TRUE by default) to set correctly the column names of GFF2 (aka GTF) files. o Xplor() has the new option from = "HOME" which is passed to Xplorefiles(). o ace() can take state uncertainty for discrete characters into account: this should be coded with R's NA. o plotTreeTime() has a new option, color (TRUE by default). o as.phylo.formula() has a new option, collapse, to add a single node if a given taxonomic level has only one representative (contributed by Eric Marcon). BUG FIXES o read.FASTA(, type = "AA") failed. o image.DNAbin() failed with a single sequence. o write.nexus.data() wrote the binary representation of DNA or AA sequences. o read.tree() failed to read correctly single-quoted node labels unless some tip labels were also single-quoted. o node.height(, clado.style = TRUE) returned the wrong vector (it used to return the same result than node.depth(). o tiplabels() did not align labels properly if the tree was plotted with plot(, type = "fan", align.tip.label = TRUE). OTHER CHANGES o print.DNAbin() now prints nicely the total number of bases. o drop.tip(tr, 1:Ntip(tr), trim.internal = FALSE) now returns a tree with all the terminal branches of tr removed, unless it has less than three nodes. CHANGES IN APE VERSION 5.1 NEW FEATURES o The new function write.FASTA is much more efficient than write.dna(, format = "fasta") for very big data set. It can also write "AAbin" objects into FASTA files. o The new function comparePhylo compares two trees with optional plot showing the differences. o There are three new methods as.AAbin() to convert from BioConductor for the classes "AAString", "AAStringSet", and "AAMultipleAlignment". o read.FASTA() can now read amino acid sequences; it has a new option type = "DNA" by default. o write.nexus.data() can now handle continuous and standard data thanks to a contribution by Thomas Guillerme. o muscle(), clustal(), and clustalomega() have new arguments to do tree-guided or progressive alignment, and a new option 'file' to save the alignment in a file (see ?details). These functions now handle "AAbin" sequences. o mantel.test() can now analyse asymmetric matrices following a suggestion by Andrzej Galecki. o as.prop.part.bitsplits() gains an option 'include.trivial = FALSE', and the generic as.prop.part() gains a '...' argument. BUG FIXES o as.hclust.phylo() failed with two-tip trees. o drop.tip(, subtree = TRUE) failed when dropping a single tip. Thanks to Thomas Sibley for the fix. o nj() now checks that there are at least three observations. o chronos() better finds initial dates in difficult situations, particularly when the root age is unknown. o chronos() now exits with the current estimates if the plogLik is NA/NaN. Thanks to Daniel Lang for the fix. o plot.phylo(phy, type = "p", edge.color = x) failed if 'phy' had singleton nodes and 'x' was a vector of length > 1. o bind.tree() failed if the trees had unordered edges. Thanks to Veronika Boskova for the fix. o cbind.DNAbin() now checks that the rownames of each matrix are not duplicated (unless check.names = FALSE). OTHER CHANGES o branching.times() now returns a zero-length vector and a warning if the tree has no branch length (it used to return an error). o igraph is now a suggested package. o The as.DNAbin() methods to convert from BioConductor are more efficient. CHANGES IN APE VERSION 5.0 NEW FEATURES o ape now supports all types of phylogenetic trees and nerworks (see details below). o The new functions read.evonet and write.evonet read and write files in Newick extended format (Cardona et al. 2008. BMC Bioinformatics 9:532). o Four new functions contributed by Franz Krah help to manipulate AA sequences: methods c(), as.list(), and as.matrix() for the class "AAbin", and as.AAbin() for lists. o The new function has.singles tests if a tree has single nodes: such trees can now be plotted with plot.phylo(). o There are two new convenience functions cladewise() and postorder() which are short-cuts to reorder(, index.only = TRUE). o The new function Xplor shows all data files on the local machine in a Web browser with clickable links to the directories and files; it is based on the Xplorefiles() function. o image.DNAbin() and image.AAbin() gain two options, grid and show.base (or show.aa), to display a grid and the symbols of the bases (or amino acids), and three options to control the aspect of the latter: base.cex, base.font, and base.col (or aa.cex, aa.font, and aa.col). o image.AAbin() now shows the polymorphic positions. o Several internal improvements were made to pcoa(). biplot.pcoa() gains an optional argument 'main' to add a title. o tiplabels() has a new option 'offset' to position the labels with respect to the tips of the tree. This can be used in combination with 'adj' and works for all types of trees (except unrooted ones). o phydataplot(type = "mosaic", continuous = FALSE) allows to define the colours more flexibly thanks to a suggestion by Elizabeth Purdom; this can be done if the function passed with 'funcol' returns a named vector. Also, the values are now sorted alphabetically in the legend. o phydataplot(type = "mosaic", legend = "none") does not display the legend. o read.tree() and read.nexus() can now read trees with singleton nodes (i.e., nodes of degree 2) thanks to Klaus. o read.tree() can now read labels with special characters within single quotes. o There are reorder() and Nedge() methods for objects of class "evonet", and a new generic as.evonet() with a method for the class "phylo". o LLT() gains a '...' argument (e.g., to use log = "y"). o drop.tip() and extract.clade() gain an option 'collapse.singles' (TRUE by default). BUG FIXES o CADM.*(... make.sym = FALSE) failed: this is now fixed. o phydataplot(x, , type = "mosaic") now works if x is a vector or a one-column matrix. o as.igraph.phylo() has been improved to work with igraph >= 1.0. o reorder.multiPhylo() failed with compressed lists. o arecompatible() returned FALSE when comparing two identical splits. o as.hclust.phylo() failed if internal branches near the root have length zero, so sorting the branching times failed due to rounding errors. o plot.phylo() failed when branch lengths were very long compared to the tip labels: use of the plotting space has been improved. OTHER CHANGES o getMRCA() is now much faster (thanks to Joseph Brown and Klaus). o read.tree() and read.nexus() are now based on C code (thanks to Klaus) and should be several times faster. o The default read.tree(, comment.char = "#") has been changed to comment.char = "" so that extended Newick files can be read. o The internal code of prop.part() and of reorder.phylo() have been rewritten in C++ and should be several times faster. collapse.singles(), drop.tip(), and extract.clade() are also much faster. o dist.topo() is much faster with the default distance. o write.tree() and write.nexus() used to replace multiple underscores or dashes in labels by single ones: they are now left unchanged. The help page ?write.tree has been clarified on how tip and node labels are checked before being printed. CHANGES IN APE VERSION 4.1 NEW FEATURES o The new function read.fastq reads FASTQ files returning a "DNAbin" object with an attribute "QUAL". o The new functions Xplorefiles, bydir, and edit.file.extensions help to find files on the local disk. o read.FASTA() can now read connections thanks to a contribution by RJ Ewing: the help page shows how to read *.zip, *.gz, or *.bz2 files. This function can now read remote files through secure connections (with HTTPS or FTPS). These also work for read.dna(format = "fasta"). o read.nexus() has a new option, force.multi (FALSE by default), to always return an object of class "multiPhylo" even if the file contains a single tree (to keep the tree name which is lost in the object of class "phylo"). BUG FIXES o read.FASTA() crashed when reading a file with no sequence or a compressed file not identified as such (see above about the support for connections). o read.GenBank() failed when at least one accession number was wrong: it now returns the sequences eventually read and prints a warning with the accession numbers not read. o extract.clade() failed with trees previously rooted with root(resolve.root = TRUE). o A bug was fixed in estimate.dates(). o A bug was fixed in multi2di.multiPhylo(). o drop.tip() failed when dropping all tips but one. o nj() now checks for infinite distances (it used to return a badly conformed tree). OTHER CHANGES o chronos() now checks the presence of branch lengths in the tree. o print.DNAbin() now prints base frequencies until 10 million bases. The display of labels has been improved. CHANGES IN APE VERSION 4.0 NEW FEATURES o Two new functions, estimate.mu and estimate.dates, contributed by Bradley Jones, estimate the mutation rate and the dates for a rooted phylogenetic tree with dated tips. o The function plotTreeTime plots a non-ultrametric tree together with the dates of the tips. o The new function read.gff reads GFF files. o The new method all.equal.DNAbin compares two sets of DNA sequences mainly to compare alignments with a graphical display of the differences. o boot.phylo() has two new options: 'jumble' (TRUE by default) to randomize the order of the rows of the data matrix (and avoid artificially too large bootstrap values associated with very short branches), and 'mc.cores' to perform parallel bootstraps. o reconstruct() now computes ancestral states under directional models (Brownian with trend and OU; see: Royer-Carenzi, M. and Didier, G. 2016. A comparison of ancestral state reconstruction methods for quantitative characters. J Theor Biol, 404:126-142). o root() has the new option 'edgelabel' to treat node labels as edge labels so they are associated with the correct edges when using drawSupportOnEdges (edgelabel = FALSE by default). o read.GenBank() is faster and more flexible: it can read records such as contigs or scaffolds (based on a code available on GitHub for a while). o is.ultrametric() has a new argument, option, to change the criterion used to test ultrametricity. o .compressTipLabel() has a new option (ref = NULL) to constrain the order of the tips. BUG FIXES o plot.phylo() used to fail with NAs in branch lengths: they are now ignored with a warning. o plot.phylo(, type = "unrooted", label.offset = 1) used to have no effect (unless lab4ut = "a" was used). o checkValidPhylo() failed with trees with a single node. o unroot() returned badly conformed trees in some situations. o richness.yule.test() failed when the speciation rate(s) is high: the code has been improved with better starting values for the optimization. o dist.topo() now checks that the trees have the same labels. The code is ~100 times faster for comparing 100 trees with 100 labels. o as.character.AAbin() striped matrix rownames (thanks to Shaun Wilkinson for the report). OTHER CHANGES o clustal(), clustalomega(), muscle(), and tcoffee() now give a more explicit error message when they fail to find the executable. The help page has been completed and gives some recommendations. o "DNAbin" objects larger than 2.1 billion bases (exactly 2^31-1) can now be handled by most functions in ape. The limitations are the same than in base R: 2^52 (~4.5*10^15) elements, and the numbers of rows and columns of matrices cannot exceed 2^31-1. read.dna() can now read FASTA files larger than 2.1 gigabytes. Two functions are still limited to 2.1 Gb: dist.dna and DNAbin2indel (the limit applies to the product of the numbers of sequences by the sequence length since they work with matrices). o As a side-effect of the above, several functions are slightly faster as several calls to .C have been replaced by .Call. o The following functions are now generic with methods for the classes "phylo" and "multiPhylo": di2multi, multi2di, Ntip, Nnode, Nedge, reorder, is.ultrametric, root, unroot, is.rooted, and is.binary (new function name, see below). o c.phylo() and c.multiPhylo() have now recursive = TRUE by default and try to return a list of single trees unless some objects are not trees or lists of trees. o image.DNAbin() and image.AAbin() now order rows from top to bottom. o read.GenBank() now uses HTTPS in place of HTTP. o ape now requires R 3.2.0 (or higher). o boot.phylo() now checks for duplicated labels. o checkAlignment() now prints the gap lengths that are on the borders of the alignment. DEPRECATED & DEFUNCT o is.binary.tree() is now obsolete and will be removed soon. It currently calls is.binary(). o The data sets landplants.newick and opsin were removed. CHANGES IN APE VERSION 3.5 NEW FEATURES o Eleven new functions are provided to work with the new class "AAbin": - trans() translates DNA to AA sequences; - complement() returns the reverse complement of DNA sequences; - Six methods to manipulate and display objects of class "AAbin": print, [, labels, as.character, as.phyDat, and image (alview() works also on AA sequences); - New generic as.AAbin() with a method for the class "character"; - dist.aa() computes pairwise Hamming distances; - AAsubst() returns the indices of polymporphic sites (similar to seg.sites() for "DNAbin"). o The new function checkAlignment performs diagnostics on a DNA alignment. o The new functions plotBreakLongEdges and drawSupportOnEdges help to plot and annotate trees. o del.colgapsonly() gains two options: threshold (1 by default) to delete columns with less than 100% of gaps, and freq.only (FALSE by default) to return only the number of gaps for each column. o The new function del.rowgapsonly does the same operation than del.colgapsonly but on the rows (with the same new options). BUG FIXES o root() did not calculate the basal branch length correctly (thanks to Liam Revell for the report). o clustal() failed with long labels. o as.DNAbin.character() now works with uppercase letters as well. o prop.clades() now returns a sensible results whatever the value of its option 'rooted' (fixed by Klaus). The code is also faster. o root(resolve.root = TRUE) failed in some situations. OTHER CHANGES o boot.phylo(phy, ....) has now by default rooted = is.rooted(phy). CHANGES IN APE VERSION 3.4 NEW FEATURES o The new function checkValidPhylo checks the internal structure of a "phylo" object. o The new function clustalomega calls Clustal-Omega from R. o The new function DNAbin2indel recodes DNA sequences with gaps as numeric values. o The new function alview prints an alignment in a user-friendly way in the console or a file. o Three new functions, label2table, stripLabel and abbreviateGenus, help to manage taxa labels. o phydataplot() now accepts style="boxplot" and style="dotchart", and has a new option 'scaling' when the data are on a very different scale than the branch lengths of the tree. o phydataplot() now accepts style="mosaic" to plot discrete or continuous variables in matrix form, and gains three options (width, continuous, funcol) that work with this option. o plot.phylo() has a new option (align.tip.label) to align the labels of the tips even when the tree is not ultrametric. BUG FIXES o parafit() did not work with data frames or "dist" objects: this is now fixed. o plot.phylo() did not manage space correctly when show.tip.label = FALSE and some tip labels were long (thanks to David Bapst for the report and to Liam Revell for the fix). o collapse.singles() failed in some situations, returning trees that could make R crash. The function was completely re-written. o In clustal(), the default path and executable names were not correctly set under Windows. o balance() failed after reordering a tree (bug reported by G. Valiente in January 2008). o seg.sites() failed to find some segregating sites in some specific situations: gaps and ambiguous nucleotides are now handled correctly. The code is slightly faster. o rtt() didn't find the optimal root if located on a long branch. o plot.phylo(, type = "f") sometimes failed when colouring the edges of a tree with multichotomies (thanks to François Michonneau for the fix). o rphylo() now returns better conformed trees. o drop.tip() sometimes shuffled node labels. o edgelabels() did not put the labels in the correct place with "fan" trees (fixed by Klaus). o reorder.phylo(, index.only = TRUE) sometimes returned the tree if it has a single node. o as.hclust.phylo() returned node heights divided by two. o ladderize() did not work with trees not in "cladewise" postorder. o dist.dna(, model = "indelblock") has been fixed. o as.hclust.phylo failed when trees had ties in their branching times. o root() failed with some trees returned by phangorn. OTHER CHANGES o Updated NAMESPACE file for r-devel. o root() gives a more explicit error message when the outgroup is not among the labels of the tree. CHANGES IN APE VERSION 3.3 NEW FEATURES o Two new functions contributed by Anthony Ives, binaryPGLMM and binaryPGLMM.sim, fit phylogenetic generalized linear mixed models to binary data and simulate such data. o The new function corphylo, contributed by Anthony Ives, calculates Pearson correlation coefficients for multiple continuous traits that may have phylogenetic signal. o The new function rphylo simulates trees under any time-dependent model of diversification using the method from Stadler (2011, Syst Biol 60: 676) which conditions the simulation on a fixed number of living species at present time. o The new function del.colgapsonly removes the columns of a DNA alignment that contain only gaps (useful when a small matrix is extracted from a large alignment). o There is a sort() method for objects of class "bitsplits". o plot.phylo(type = "fan", root.edge = TRUE) now draws the root edge. Besides, the root edge is now coloured like the other edges if they are all of the same colour (for all tree types). o dist.topo() now accepts lists of trees. BUG FIXES o prop.clades() failed with compressed lists of trees. o read.tree() did not ignore comments inside straight brackets (contrary to what is written in ape's FAQ) potentially resulting in wrongly conformed "phylo" objects. o dbdTime(x = 0, ...., conditional = TRUE) did not return 0. o parafit() required a fix when F2.stat could not be calculated. o write.nexus.data() did not write "DATATYPE=" correctly so that the output NEXUS file could not be read by some other programs (thanks to Robin Cristofari for the fix). o seg.sites() made R crash when all sequences ended with N's. o root(phy, node = Ntip(phy) + 1, resolve.root = TRUE) now returns an explicit error. OTHER CHANGES o plot.prop.part() has been improved: it now uses image() internally, and gains a new option (col). It is now much more efficient to display very large numbers of bipartitions. o as.character.DNAbin() and as.DNAbin.character() are much faster (contributed by Klaus Schliep). DEPRECATED & DEFUNCT o node.height.clado() has been removed. CHANGES IN APE VERSION 3.2 NEW FEATURES o The new function reconstruct, contributed by Manuela Royer-Carenzi and Gilles Didier, does ancestral character state reconstruction using new algorithms based on matrix computations. o The new function vcv2phylo, contributed by Simon Blomberg, transforms a variance-covariance matrix into a phylogenetic tree (ultrametric or not). o The new function def helps to (re)define attributes for plotting and annotating trees using taxon names, labels, or other vectors of character strings. o There are four new as.DNAbin functions (S3 methods) to convert objects storing DNA sequences in BioConductor. The supported (S4) classes are: DNAString, DNAStringSet, DNAMultipleAlignment, and PairwiseAlignmentsSingleSubject (the examples show how to convert from the class DNAStringSetList). o The new function nodepath finds paths of nodes in a tree. o axisPhylo() gains two options: root.time and backward (similar to ltt.plot). o write.nexus.data() now accepts matrices (it accepted only lists in previous versions). BUG FIXES o dist.dna() did not compute distances correctly if gamma = TRUE. o read.nexus() sometimes failed on files with no TRANSLATION block. o read.nexus() failed to read correctly tree names (thanks to Graham Gower for the fix). o multi2di() used to return trees with elements as doubles instead of integers (this was not a problem for most applications). o In the output of phymltest(), the model F84 and HKY were swapped (thanks to Luiz Max Fagundes de Carvalho for the fix). OTHER CHANGES o plot.phylo() now saves the element 'root.time' of the plotted tree (if present) in the environment .PlotPhyloEnv. o write.nexus.data() now writes correctly all "DNAbin" objects (matrices and lists). The help page has been clarified. CHANGES IN APE VERSION 3.1-4 BUG FIXES o A bug was fixed in which.edge(). OTHER CHANGES o This file is no more distributed with the sources of ape and can be found at: http://ape-package.ird.fr/NEWS. CHANGES IN APE VERSION 3.1-3 NEW FEATURES o The new function rtt contributed by Rosemary McCloskey roots a tree with dated tips in the location most compatible with the assumption of a strict molecular clock. BUG FIXES o root(, resolve.root = TRUE) did not manage node labels correctly in the case where 'outgroup' was already an outgroup of the tree. o read.dna(file, format = "fasta") works (again) with URIs (i.e., file = "http://..." or "ftp://..."). o rotate() made R crash if the tree was not in cladewise order. o which.edge() did not work correctly for trees not in cladewise order. o read.GenBank failed to read older GenBank records. CHANGES IN APE VERSION 3.1-2 NEW FEATURES o plot.phylo: the option 'open.angle' now works also when type = "radial". The option 'lab4ut' now works also for type = "radial" or "fan"; the default is now NULL since it will behave differently depending on 'type'. o ace(, type = "discrete") gains an option 'marginal' (FALSE by default). The default is now to do a joint reconstruction of ancestral states. BUG FIXES o chronos: the gradients were not correctly computed when the age of the root was unknown. o prop.part() did not work with objects of class composite like c("chronos", "phylo"). (Thanks to Steve Walker for the fix.) o bionjs: a bug was fixed in the C code. o read.GenBank() failed if a line starts with "ORIGIN" in the description of the sequence (thanks to Sofia Sal Bregua for the fix). o cophyloplot() works better with length.line = 0. o The internal function clado.build used to return trees with the 'edge' matrix stored as double instead of integer. OTHER CHANGES o base.freq() now checks the class of the data. CHANGES IN APE VERSION 3.1-1 BUG FIXES o rTraitCont: a bug was introduced in the previous release. OTHER CHANGES o ape now requires R 3.0.0 or higher. CHANGES IN APE VERSION 3.1 NEW FEATURES o Two new functions, bitsplits and countBipartitions, handle bipartitions (aka splits) more efficiently. o The new generic function as.prop.part helps to convert among classes of bipartitions. o Two new functions, phydataplot and ring, helps to graphically annotate trees. The help page has many examples. o The new function LLT draws the theoretical LTT-plot under specified values of speciation and extinction rates together with a prediction interval. The function has several options for flexible plotting. o predict.compar.gee() gains a new option, newdata, to predict values from new observations of the predictors. This works like most predict() methods. BUG FIXES o root: the fix introduced in the previous version was not correct. o compar.gee: using a user-defined link failed. o plot.phylo: the tip labels were often outside the plotting area with direction = "downward". o unique.multiPhylo() did not work correctly with compressed lists. This function now returns a vector of integers giving the correspondance among similar trees. OTHER CHANGES o add.scale.bar() now draws a longer bar by default. o In boot.phylo(), the data 'x' must be a matrix-like object (lists are no more accepted). o boot.phylo() uses by default (ie, if rooted = FALSE) the new function countBipartitions, and so should be much faster even with small sample sizes and especially if the number of bootstrap replicates is large. o plot.phylo(, type = "fan") now colours the arcs in the same way than the default type = "p". o All instances of DUP=FALSE in calls of .C have been removed. CHANGES IN APE VERSION 3.0-11 BUG FIXES o branching.times() now reorders the tree if needed. o root(, resolved.root = TRUE) sometimes misplaced the root when 'outgroup' was of length one. o nodelabels() didn't work with versions of R older than 3.0.0. OTHER CHANGES o The code of branching.times() is now ca. 3 times faster. CHANGES IN APE VERSION 3.0-10 NEW FEATURES o The new function rotateConstr rotates internal branches given a constraint on the order of the tips. o plot.phylo() has a new option, node.depth, to specify the depths of nodes when the tree is plotted without branch lengths. node.depth() has a new option, method, with the same effect. o ace() has a new option, use.eigen, used when type = "d" to avoid computing matrix exponentials. With other coding improvements, the function is now about five times faster. BUG FIXES o node.height() and node.height.clado() returned wrong values. o pie charts drawn by nodelabels(), and other functions, used to draw a "3-o'clock" segment when only one proportion was equal to one (fixed by Klaus). o plot(phy, main = "title...") now works again. OTHER CHANGES o Improved DESCRIPTION and NAMESPACE files. o The C routines are now registered. o The generic function as.igraph has been removed from ape as it is now defined in the package igraph. o node.height() has now an option 'clado.style' (FALSE by default). node.height.clado() will be removed soon. o The code of several functions has been improved. DEPRECATED & DEFUNCT o mst() has been moved to pegas. CHANGES IN APE VERSION 3.0-9 NEW FEATURES o getMRCA() is now documented. BUG FIXES o ace(, type = "d") now checks for zero or negative branch lengths. o ace(, type = "d") now catches the error if the SEs of the rates cannot be calculated (usually due to a poor model fit). o reorder.phylo() made R crash with badly conformed "phylo" trees. o drop.tip(phy, interactive = TRUE) did not work. o drop.tip() now returns NULL (with a warning) if all tips are dropped. o mixedFontLabel() failed when passed only one vector of labels. It also failed when labels included - or '. In some cases, spaces in labels were not treated correctly. o root(, resolve.root = TRUE) wrongly placed the new root node when the outgroup has more than one tip. This is fixed, and now the root node is placed at the MRCA of the outgroup (and not of the ingroup). o where() failed with matrices. CHANGES IN APE VERSION 3.0-8 NEW FEATURES o The new function ewLasso tests whether an incomplete set of distances uniquely determines the edge weights of a given unrooted topology using the 'Lasso' method by Dress et al. (2012, J. Math. Biol. 65:77). o ace() gains a new option 'use.expm' to use expm() from the package of the same name in place of matexpo(). BUG FIXES o read.dna(, "fasta") may add '\r' in labels: this is fixed. o prop.clades() returned wrong numbers when the tip labels of 'phy' are not in the same order than the list of trees (thanks to Rupert Collins for the report). o CADM.post() displayed "1" on the diagonal of the matrix of Mantel p-values. It now displays "NA" on the diagonal, indicating that no test of significance is computed between a distance matrix and itself. o rtree(n, rooted = FALSE) returned trees with an 'edge' matrix stored as doubles instead of integers for n > 4. OTHER CHANGES o The files CDAM.global.R and CDAM.post.R have been renamed CADM.global.R and CADM.post.R. o ace() has a new default for its option 'method': this is "REML" for continuous characters and "ML" for discrete ones. o ape does not import gee anymore so the latter doesn't need to be installed. CHANGES IN APE VERSION 3.0-7 NEW FEATURES o The new function chronos estimates chronograms by penalised likelihood and maximum likelihood with a completely reworked code and interface. There is a new function makeChronosCalib to set the calibration points easily. chronos() will eventually replace chronopl(). o The new function 'where' searches patterns in DNA sequences. o pic() gains an option 'rescaled.tree = FALSE' to return the tree with its branch lengths rescaled for the PIC calculation. o clustal(), muscle(), and tcoffee() gain an option 'original.ordering = TRUE' to ease the comparisons of alignments. o plot.phylo() has a new option, open.angle, used when plotting circular trees. o The new function read.FASTA reads FASTA files much faster and more efficiently. It is called internally by read.dna(, "fasta") or can be called directly. BUG FIXES o drop.tip() shuffled node labels on some trees. o axisPhylo() now works correctly with circular trees, and gives a sensible error message when type = "r" or "u". OTHER CHANGES o .compressTipLabel() is 10 times faster thanks to Joseph Brown. o base.freq() is now faster with lists. o as.matrix.DNAbin() should be faster and more efficient with lists; it now accepts vectors. CHANGES IN APE VERSION 3.0-6 NEW FEATURES o reorder.phylo() has a new order, "postorder", and a new option index.only = TRUE to return only the vector of indices (the tree is unmodified, see ?reorder.phylo for details). o The three new functions node.depth.edgelength, node.height, and node.height.clado make some internal code available from R. See ?node.depth (which was already documented) for details. BUG FIXES o reorder(, "pruningwise") made R crash if the rows of the edge matrix are in random order: this is now fixed. o drop.tip() sometimes shuffled node labels (thanks to Rebecca Best for the report). o drop.tip(phy, "") returned a tree with zero-length tip labels: it now returns the tree unchanged (thanks to Brian Anacker for the report). o plot.phylo() made R crash if the tree has zero-length tip labels: it now returns NULL (thanks again to Brian Anacker). OTHER CHANGES o dist.nodes() is now 6 to 10 times faster. o reorder(, "cladewise") is now faster. The change is not very visible for small trees (n < 1000) but this can be more than 1000 faster for big trees (n >= 1e4). o The attribute "order" of the objects of class "phylo" is now strongly recommended, though not mandatory. Most functions in ape should return a tree with this attribute correctly set. o dbd() is now vectorized on both arguments 'x' (number of species in clade) and 't' (clade age) to make likelihood calculations easier and faster. CHANGES IN APE VERSION 3.0-5 BUG FIXES o ace() should better catch errors when SEs cannot be computed. OTHER CHANGES o write.dna(format = "fasta") now conforms more closely to the FASTA standard thanks to François Michonneau. o print.DNAbin() does not print base compositions if there are more than one million nucleotides. CHANGES IN APE VERSION 3.0-4 BUG FIXES o read.dna() failed to read Phylip files if the first line used tabulations instead of white spaces. o read.dna() failed to read Phylip or Clustal files with less than 10 nucleotides. (See other changes in this function below.) OTHER CHANGES o read.dna() now requires at least one space (or tab) between the taxa names and the sequences (whatever the length of taxa names). write.dna() now follows the same rule. o The option 'seq.names' of read.dna has been removed. o The files ape-defunct.R and ape-defunct.Rd, which have not been modified for almost two years, have been removed. o The C code of bionj() has been reworked: it is more stable (by avoiding passing character strings), slightly faster (by about 20%), and numerically more accurate. o The C code of fastme.*() has been slightly modified and should be more stable by avoiding passing character strings (the results are identical to the previous versions). o The file src/newick.c has been removed. CHANGES IN APE VERSION 3.0-3 BUG FIXES o birthdeath() now catches errors and warnings much better so that a result is returned in most cases. OTHER CHANGES o Because of problems with character string manipulation in C, the examples in ?bionj and in ?fastme have been disallowed. In the meantime, these functions might be unstable. This will be solved for the next release. CHANGES IN APE VERSION 3.0-2 NEW FEATURES o The new function alex (alignment explorator) zooms in a DNA alignment and opens the result in a new window. BUG FIXES o compute.brtime() did not completely randomized the order of the branching times. o write.nexus() did not work correctly with rooted trees (thanks to Matt Johnson for the fix). o mltt.plot(, backward = FALSE) did not set the x-axis correctly. o A bug was introduced in prop.clades() with ape 3.0. The help page has been clarified relative to the use of the option 'rooted'. o mantel.test() printed a useless warning message. o plot.phylo(, direction = "downward") ignored 'y.lim'. o is.monophyletic() did not work correctly if 'tips' was not stored as integers. o prop.part() could make R crash if the first tree had many multichotomies. o njs(), bionjs(), and mvrs() now return an error if 'fs < 1'. o SDM() did not work correctly. The code has also been generally improved. OTHER CHANGES o The DESCRIPTION file has been updated. o The option 'original.data' of write.nexus() has been removed. o The files bionjs.c, mvr.c, mvrs.c, njs.c, triangMtd.c, and triangMtds.c have been improved which should fix some bugs in the corresponding functions. o dist.gene() now coerces input data frame as matrix resulting in much faster calculations (thanks to a suggestion by Markus Schlegel). CHANGES IN APE VERSION 3.0-1 NEW FEATURES o dist.dna() has two new models: "indel" and "indelblock". o bind.tree() now accepts 'position' > 0 when the trees have no banch length permitting to create a node in 'x' when grafting 'y' (see ?bind.tree for details). BUG FIXES o cophyloplot( , rotate = TRUE) made R hanged after a few clicks. Also the tree is no more plotted twice. o read.GenBank() has been updated to work with EFetch 2.0. o unroot() on trees in "pruningwise" order did not respect this attribute. CHANGES IN APE VERSION 3.0 NEW FEATURES o The three functions dyule, dbd, and dbdTime calculate the density probability (i.e., the distribution of the number of species) for the Yule, the constant and the time-dependent birth-beath models, respectively. These probabilities can be conditional on no extinction and/or on a log-scale. o plot.phylo() has a new option 'rotate.tree' to rotate unrooted, fan, or radial trees around the center of the plot. o boot.phylo() and prop.clades() have a new option rooted = FALSE. Note that the behaviour of prop.part() is unchanged. o edgelabels() has a new option 'date' to place labels on edges of chronograms using the time scale (suggestion by Rob Lanfear). BUG FIXES o In chronopl(), the code setting the initial dates failed in complicated settings (several dates known within intervals). This has been generally improved and should result in faster and more efficient convergence even in simple settings. o mantel.test() sometimes returned P-values > 1 with the default two-tailed test. o extract.clade() sometimes shuffled some tip labels (thanks to Ludovic Mallet and Mahendra Mariadassou for the fix). o clustal() should now find by default the executable under Windows. OTHER CHANGES o The code of yule() has been simplified and is now much faster for big trees. o The code of mantel.test() has been adjusted to be consistent with common permutation tests. o The C code of base.freq() has been improved and is now nearly 8 times faster. o The option 'original.data' of write.nexus() is now deprecated and will be removed in a future release. o The code of is.ultrametric() has been improved and is now 3 times faster. o The code of vcv.phylo() has been improved and is now 10 or 30 times faster for 100 or 1000 tips, respectively. Consequently, fitting models with PGLS will be faster overall. CHANGES IN APE VERSION 2.8 NEW FEATURES o Twelve new functions have been written by Andrei-Alin Popescu: additive, ultrametric, is.compatible, arecompatible, mvr, mvrs, njs, bionjs, SDM, treePop, triangMtd, triangMtd*. o A new class "bitsplits" has been created by Andrei-Alin Popescu to code splits (aka, bipartition). o There is a new generic function as.bitsplits with a method to convert from the class "prop.part" to the class "bitsplits". o The new function ltt.coplot plots on the same scales a tree and the derived LTT plot. o ltt.plot() has two new options: backward and tol. It can now handle non-ultrametic trees and its internal coding has been improved. The coordinates of the plot can now be computed with the new function ltt.plot.coords. BUG FIXES o prop.part() crashed if some trees had some multichotomies. CHANGES IN APE VERSION 2.7-3 NEW FEATURES o The new function compute.brtime computes and sets branching times. o mantel.test() has a new argument 'alternative' which is "two-sided" by default. Previously, this test was one-tailed with no possibility to change. o ace() can now do REML estimation with continuous characters, giving better estimates of the variance of the Brownian motion process. BUG FIXES o Branch lengths were wrongly updated with bind.tree(, where = , position = 0). (Thanks to Liam Revell for digging this bug out.) o Simulation of OU process with rTraitCont() did not work correctly. This now uses formula from Gillespie (1996) reduced to a BM process when alpha = 0 to avoid division by zero. The option 'linear' has been removed. o Cross-validation in chronopl() did not work when 'age.max' was used. o consensus(, p = 0.5) could return an incorrect tree if some incompatible splits occur in 50% of the trees (especially with small number of trees). o c() with "multiPhylo" did not work correctly (thanks to Klaus Schliep for the fix). o root() failed in some cases with an outgroup made of several tips. The help page has been clarified a bit. CHANGES IN APE VERSION 2.7-2 NEW FEATURES o There is a new class "evonet" to code evolutionary networks, with a constructor function evonet(), a print() and a plot() methods, and four conversion methods to the classes "phylo", "networx", "network", and "igraph". o The new function rTraitMult does multivariate traits simulation with user-defined models. o plot.phylo() has a new option 'plot = TRUE'. If FALSE, the tree is not plotted but the graphical device is set and the coordinates are saved as usual. o diversity.contrast.test() gains a fourth version of the test with method = "logratio"; the literature citations have been clarified. o add.scale.bar() has two new options, 'lwd' and 'lcol', to modify the aspect of the bar. o boot.phylo() now displays a progress bar by default (can be off if 'quiet = TRUE'). o There is a new predict() method for compar.gee(). BUG FIXES o bionj() made R crash if distances were too large. It now returns an error if at least one distance is greater than 100. o drop.tip() returned a wrong tree if 'tip' was of zero length. o read.nexus.data() failed with URLs. o boot.phylo() returned overestimated support values in the presence of identical or nearly identical sequences. OTHER CHANGES o The data bird.families, bird.orders, cynipids, and woodmouse are now provided as .rda files. CHANGES IN APE VERSION 2.7-1 NEW FEATURES o The new function trex does tree exploration with multiple graphical devices. o The new function kronoviz plots several rooted (dated) trees on the scale scale. o identify.phylo() has a new option 'quiet' (FALSE by default). BUG FIXES o A bug was introduced in read.nexus() in ape 2.7. o image.DNAbin() did not colour correctly the bases if there were some '-' and no 'N'. o .compressTipLabel() failed with a list with a single tree. o identify.phylo() returned a wrong answer when the x- and y-scales are very different. o write.nexus() failed with lists of trees with compressed labels. OTHER CHANGES o identify.phylo() now returns NULL if the user right- (instead of left-) clicks (an error was returned previously). o read.nexus() should be robust to commands inserted in the TREES block. CHANGES IN APE VERSION 2.7 NEW FEATURES o There is a new image() method for "DNAbin" objects: it plots DNA alignments in a flexible and efficient way. o Two new functions as.network.phylo and as.igraph.phylo convert trees of class "phylo" into these respective network classes defined in the packages of the same names. o The three new functions clustal, muscle, and tcoffee perform nucleotide sequence alignment by calling the external programs of the same names. o Four new functions, diversity.contrast.test, mcconwaysims.test, richness.yule.test, and slowinskiguyer.test, implement various tests of diversification shifts using sister-clade comparisons. o base.freq() gains an option 'all' to count all the possible bases including the ambiguous ones (defaults to FALSE). o write.nexus() now writes tree names in the NEXUS file if given a list of trees with names. BUG FIXES o prop.part() failed in some situations with unrooted trees. o read.nexus() shuffled node labels when a TRANSLATE block was present. o varCompPhylip() did not work if 'exec' was specified. o bind.tree() shuffled node labels when position > 0 and 'where' was not the root. OTHER CHANGES o BaseProportion in src/dist_dna.c has been modified. o A number of functions in src/tree_build.c have been modified. o The matching representation has now only two columns as the third column was redundant. CHANGES IN APE VERSION 2.6-3 NEW FEATURES o rTraitCont() and rTraitDisc() gains a '...' argument used with user-defined models (suggestion by Gene Hunt). BUG FIXES o as.hclust.phylo() now returns an error with unrooted trees. o as.hclust.phylo() failed with trees with node labels (thanks to Jinlong Zhang for pointing this bug out). o read.dna(, "fasta") failed if sequences were not all of the same length. o plot.phylo() did not recycle values of 'font', 'cex' and 'tip.color' correctly when type = "fan" or "radial". o plot.phylo() ignored 'label.offset' when type = "radial", "fan", or "unrooted" with lab4ut = "axial" (the placement of tip labels still needs to be improved with lab4ut = "horizontal"). OTHER CHANGES o In drop.fossil() the default tol = 0 has been raised to 1e-8. o The help command ?phylo now points to the man page of read.tree() where this class is described. Similarly, ?matching points to the man page of as.matching(). CHANGES IN APE VERSION 2.6-2 NEW FEATURES o Two new functions, pic.ortho and varCompPhylip, implements the orthonormal contrasts of Felsenstein (2008, Am Nat, 171:713). The second function requires Phylip to be installed on the computer. o bd.ext() has a new option conditional = TRUE to use probabilities conditioned on no extinction for the taxonomic data. BUG FIXES o write.tree() failed to output correctly tree names. o dist.nodes() returned duplicated column(s) with unrooted and/or multichotomous trees. o mcmc.popsize() terminated unexpectedly if the progress bar was turned off. o prop.part(x) made R frozen if 'x' is of class "multiPhylo". o Compilation under Mandriva failed (thanks to Jos Käfer for the fix). o drop.tip() shuffled tip labels with subtree = TRUE or trim.internal = FALSE. o Objects returned by as.hclust.phylo() failed when analysed with cutree() or rect.hclust(). o write.tree() did not output correctly node labels (thanks to Naim Matasci and Jeremy Beaulieu for the fix). o ace(type = "discrete") has been improved thanks to Naim Marasci and Jeremy Beaulieu. CHANGES IN APE VERSION 2.6-1 NEW FEATURES o The new function speciesTree calculates the species tree from a set of gene trees. Several methods are available including maximum tree and shallowest divergence tree. BUG FIXES o A bug introduced in write.tree() with ape 2.6 has been fixed. o as.list.DNAbin() did not work correctly with vectors. o as.hclust.phylo() failed with trees with node labels (thanks to Filipe Vieira for the fix). CHANGES IN APE VERSION 2.6 NEW FEATURES o The new functions rlineage and rbdtree simulate phylogenies under any user-defined time-dependent speciation-extinction model. They use continuous time algorithms. o The new function drop.fossil removes the extinct species from a phylogeny. o The new function bd.time fits a user-defined time-dependent birth-death model. It is a generalization of yule.time() taking extinction into account. o The new function MPR does most parsimonious reconstruction of discrete characters. o The new function Ftab computes the contingency table of base frequencies from a pair of sequences. o There is now an 'as.list' method for the class "DNAbin". o dist.dna() can compute the number of transitions or transversions with the option model = "Ts" or model = "Tv", respectively. o [node|tip|edge]labels() gain three options with default values to control the aspect of thermometers: horiz = TRUE, width = NULL, and height = NULL. o compar.gee() has been improved with the new option 'corStruct' as an alternative to 'phy' to specify the correlation structure, and calculation of the QIC (Pan 2001, Biometrics). The display of the results has also been improved. o read.GenBank() has a new option 'gene.names' to return the name of the gene (FALSE by default). BUG FIXES o extract.clade() sometimes shuffled the tip labels. o plot.phylo(type = "unrooted") did not force asp = 1 (thanks to Klaus Schliep for the fix) o dist.dna(model = "logdet") used to divide distances by 4. The documentation has been clarified on the formulae used. OTHER CHANGES o rTraitCont(model = "OU") has an option 'linear = TRUE' to possibly change the parameterisation (see ?rTraitCont for details). o pic() now returns a vector with the node labels of the tree (if available) as names. o write.tree() and read.tree() have been substantially improved thanks to contributions by Klaus Schliep. CHANGES IN APE VERSION 2.5-3 NEW FEATURES o The new function mixedFontLabel helps to make labels with bits of text to be plotted in different fonts. o There are now replacement operators for [, [[, and $ for the class "multiPhylo" (i.e., TREES[11:20] <- rmtree(10, 100)). They possibly check that the tip labels are the same in all trees. o Objects of class "multiPhylo" can be built with c(): there are methods for the classes "phylo" and "multiPhylo". o The internal functions .compressTipLabel and .uncompressTipLabel are now documented. BUG FIXES o bind.tree(x, y, where, position = 0) did not work correctly if 'y' was a single-edge tree and 'where' was a tip. o rTraitCont() did not use the square-root of branch lengths when simulating a Brownian motion model. CHANGES IN APE VERSION 2.5-2 NEW FEATURES o There is now a print method for results from ace(). o There is a labels() method for objects of class "DNAbin". o read.dna() has a new option 'as.matrix' to possibly force sequences in a FASTA file to be stored in a matrix (see ?read.dna for details). BUG FIXES o as.phylo.hclust() used to multiply edge lengths by 2. o A minor bug was fixed in rTraitDisc(). o ace() sometimes failed (parameter value was NaN and the optimisation failed). DEPRECATED & DEFUNCT o evolve.phylo() and plot.ancestral() have been removed. o chronogram(), ratogram(), and NPRS.criterion() have been removed. OTHER CHANGES o nj() has been improved and is now about 30% faster. o The default option 'drop' of [.DNAbin has been changed to FALSE to avoid dropping rownames when selecting a single sequence. o print.DNAbin() has been changed to summary.DNAbin() which has been removed. CHANGES IN APE VERSION 2.5-1 NEW FEATURES o The new function stree generates trees with regular shapes. o It is now possible to bind two trees with x + y (see ?bind.tree for details). o drop.tip(), extract.clade(), root(), and bind.tree() now have an 'interactive' option to make the operation on a plotted tree. o cophyloplot() gains two new arguments 'lwd' and 'lty' for the association links; they are recycled like 'col' (which wasn't before). BUG FIXES o rTraitDisc() did not use its 'freq' argument correctly (it was multiplied with the rate matrix column-wise instead of row-wise). o [node|tip|edge]labels(thermo = ) used to draw empty thermometers with NA values. Nothing is drawn now like with 'text' or 'pch'. The same bug occurred with the 'pie' option. o A bug was fixed in compar.ou() and the help page was clarified. o bind.tree() has been rewritten fixing several bugs and making it more efficient. o plot.phylo(type = "p") sometimes failed to colour correctly the vertical lines representing the nodes. o plot.phylo(direction = "l", x.lim = 30) failed to plot the branches in the correct direction though the tip labels were displayed correctly. OTHER CHANGES o The c, cbind, and rbind methods for "DNAbin" objetcs now check that the sequences are correctly stored (in a list for c, in a matrix for the two other functions). CHANGES IN APE VERSION 2.5 NEW FEATURES o The new function parafit by Pierre Legendre tests for the coevolution between hosts and parasites. It has a companion function, pcoa, that does principal coordinate decomposition. The latter has a biplot method. o The new function lmorigin by Pierre Legendre performs multiple regression through the origin with testing by permutation. o The new functions rTraitCont and rTraitDisc simulate continuous and discrete traits under a wide range of evolutionary models. o The new function delta.plot does a delta plot following Holland et al. (2002, Mol. Biol. Evol. 12:2051). o The new function edges draws additional branches between any nodes and/or tips on a plotted tree. o The new function fancyarrows enhances arrows from graphics with triangle and harpoon heads; it can be called from edges(). o add.scale.bar() has a new option 'ask' to draw interactively. o The branch length score replaces the geodesic distance in dist.topo. o Three new data sets are included: the gopher-lice data (gopher.D), SO2 air pollution in 41 US cities (lmorigin.ex1, from Sokal & Rohlf 1995), and some host-parasite specificity data (lmorigin.ex2, from Legendre & Desdevises 2009). BUG FIXES o add.scale.bar() drew the bar outside the plotting region with the default options with unrooted or radial trees. o dist.topo() made R stuck when the trees had different sizes (thanks to Otto Cordero for the fix). OTHER CHANGES o The geodesic distance has been replaced by the branch length score in dist.topo(). CHANGES IN APE VERSION 2.4-1 NEW FEATURES o rtree() and rcoal() now accept a numeric vector for the 'br' argument. o vcv() is a new generic function with methods for the classes "phylo" and "corPhyl" so that it is possible to calculate the var-cov matrix for "transformation models". vcv.phylo() can still be used for trees of class "phylo"; its argument 'cor' has been renamed 'corr'. BUG FIXES o bind.tree() failed when 'y' had no root edge. o read.nexus() shuffled tip labels when the trees have no branch lengths and there is a TRANSLATE block. o read.nexus() does not try to translate node labels if there is a translation table in the NEXUS file. See ?read.nexus for a clarification on this behaviour. o plot.multiPhylo() crashed R when plotting a list of trees with compressed tip labels. o write.nexus() did not translate the taxa names when asked for. o plot.phylo(type = "fan") did not rotate the tip labels correctly when the tree has branch lengths. o ace(type = "continuous", method = "ML") now avoids sigma² being negative (which resulted in an error). o nj() crashed with NA/NaN in the distance matrix: an error in now returned. CHANGES IN APE VERSION 2.4 NEW FEATURES o base.freq() has a new option 'freq' to return the counts; the default is still to return the proportions. BUG FIXES o seg.sites() did not handle ambiguous nucleotides correctly: they are now ignored. o plot(phy, root.edge = TRUE) failed if there was no $root.edge in the tree: the argument is now ignored. o add.scale.bar() failed when 'x' and 'y' were given (thanks to Janet Young for the fix). OTHER CHANGES o Trying to plot a tree with a single tip now returns NULL with a warning (it returned an error previously). o The way lines representing nodes are coloured in phylograms has been modified (as well as their widths and types) following some users' request; this is only for dichotomous nodes. o The argument 'adj' in [node][tip][edge]labels() now works when using 'pie' or 'thermo'. o A more informative message error is now returned by dist.dna() when 'model' is badly specified (partial matching of this argument is done now). o Deprecated functions are now listed in a help page: see help("ape-defunct") with the quotes. DEPRECATED & DEFUNCT o The functions heterozygosity, nuc.div, theta.h, theta.k and theta.s have been moved from ape to pegas. o The functions mlphylo, DNAmodel and sh.test have been removed. CHANGES IN APE VERSION 2.3-3 BUG FIXES o add.scale.bar() always drew a horizontal bar. o zoom() shuffled tips with unrooted trees. o write.nexus() failed to write correctly trees with a "TipLabel" attribute. o rcoal() failed to compute branch lengths with very large n. o A small bug was fixed in compar.cheverud() (thanks to Michael Phelan for the fix). o seg.sites() failed when passing a vector. o drop.tip() sometimes shuffled tip labels. o root() shuffled node labels with 'resolve.root = TRUE'. CHANGES IN APE VERSION 2.3-2 BUG FIXES o all.equal.phylo() did not compare unrooted trees correctly. o dist.topo(... method = "PH85") did not treat unrooted trees correctly (thanks to Tim Wallstrom for the fix). o root() sometimes failed to test for the monophyly of the outgroup correctly. o extract.clade() sometimes included too many edges. o vcv.phylo() did not work correctly when the tree is in "pruningwise" order. o nj() did not handle correctly distance matrices with many 0's. The code has also been significantly improved: 7, 70, 160 times faster with n = 100, 500, 1000, respectively. CHANGES IN APE VERSION 2.3-1 NEW FEATURES o The new function is.monophyletic tests the monophyly of a group. o There is now a c() method for lists of class "DNAbin". o yule.cov() now fits the null model, and its help page has been corrected with respect to this change. o drop.tip() has a new option 'rooted' to force (or not) a tree to be treated as (un)rooted. BUG FIXES o dist.gene() failed on most occasions with the default pairwise.deletion = FALSE. o read.tree() failed to read correctly the tree name(s). o boot.phylo() now treats correctly data frames. o del.gaps() did not copy the rownames of a matrix. o A small bug was fixed in CDAM.global(). o ace() failed with large data sets. Thanks to Rich FitzJohn for the fix. With other improvements, this function is now about 6 times faster. o write.tree() failed with objects of class "multiPhylo". o drop.tip(, subtree = TRUE) sometimes shuffled tip labels. OTHER CHANGES o [.multiPhylo and [.DNAbin now respect the original class. o Instances of the form class(phy) == "phylo" have been replaced by inherits(phy, "phylo"). o rcoal() is now faster. DEPRECATED & DEFUNCT o klastorin() has been removed. CHANGES IN APE VERSION 2.3 NEW FEATURES o The new functions CADM.global and CADM.post, contributed by Pierre Legendre, test the congruence among several distance matrices. o The new function yule.time fits a user-defined time-dependent Yule model by maximum likelihood. o The new function makeNodeLabel creates and/or modifies node labels in a flexible way. o read.tree() and write.tree() have been modified so that they can handle individual tree names. o plot.phylo() has a new argument 'edge.lty' that specifies the types of lines used for the edges (plain, dotted, dashed, ...) o phymltest() has been updated to work with PhyML 3.0.1. BUG FIXES o drop.tip() shuffled tip labels in some cases. o drop.tip() did not handle node.label correctly. o is.ultrametric() now checks the ordering of the edge matrix. o ace() sometimes returned negative values of likelihoods of ancestral states (thanks to Dan Rabosky for solving this long lasting bug). OTHER CHANGES o The data set xenarthra has been removed. CHANGES IN APE VERSION 2.2-4 BUG FIXES o The bug fix in read.nexus() in version 2.2-3 was wrong: this is now fixed. (Thanks to Peter Wragg for the fix!) o A warning message occurred for no reason with ace(method="GLS"). OTHER CHANGES o There is now a general help page displayed with '?ape'. CHANGES IN APE VERSION 2.2-3 NEW FEATURES o The new function extract.clade extracts a clade from a tree by specifying a node number or label. o fastme.bal() has two new options 'spr' and 'tbr' to perform tree operations of the same names. o dist.dna() can now return the number of site differences by specifying model="N". BUG FIXES o chronopl() did not work with CV = TRUE. o read.nexus() did not work correctly in some situations (trees on multiple lines with different numbers of lines and/or with comments inserted within the trees). o ltt.plot(), ltt.lines(), and mltt.plot() did not count correctly the number of lineages with non-binary trees. OTHER CHANGES o ape has now a namespace. o drop.tip() has been improved: it should be much faster and work better in some cases (e.g., see the example in ?zoom). CHANGES IN APE VERSION 2.2-2 NEW FEATURES o dist.gene() has been substantially improved and gains an option 'pairwise.deletion'. o cbind.DNAbin() has a new option 'fill.with.gaps' and is now more flexible. BUG FIXES o prop.part() failed with a single tree with the default option 'check.labels = TRUE'. o summary.DNAbin() failed to display correctly the summary of sequence lengths with lists of sequences of 10,000 bases or more (because summary.default uses 4 significant digits by default). o read.nexus() failed to read a file with a single tree with line breaks in the Newick string. o del.gaps() returned a list of empty sequences when there were no gaps. OTHER CHANGES o phymltest() has been updated for PhyML 3.0 and gains an option 'append', whereas the option 'path2exec' has been removed. o rbind.DNAbin() and cbind.DNAbin() now accept a single matrix which is returned unchanged (instead of an error). o The data sets bird.orders and bird.families are now stored as Newick strings; i.e., the command data(bird.orders) calls read.tree(). CHANGES IN APE VERSION 2.2-1 NEW FEATURES o The new function makeLabel() helps to modify labels of trees, lists of trees, or DNA sequences, with several utilities to truncate and/or make them unique, substituting some characters, and so on. o The new function del.gaps() removes insertion gaps ("-") in a set of DNA sequences. o read.dna() can now read Clustal files (*.aln). BUG FIXES o root() failed with 'resolve.root = TRUE' when the root was already the specified root. o Several bugs were fixed in mlphylo(). o collapsed.singles() did not propagate the 'Nnode' and 'node.labels' elements (thanks to Elizabeth Purdom for the fix). o read.nexus() failed to remove correctly the comments within trees. o read.nexus() failed to read a file with a single tree and no translation of tip labels. o read.nexus() failed to place correctly tip labels when reading a single tree with no edge lengths. o A bug was fixed in sh.test(). OTHER CHANGES o unique.multiPhylo() is faster thanks to a suggestion by Vladimir Minin. o The option 'check.labels' of consensus() and prop.part() is now TRUE by default. o write.dna() now does not truncate names to 10 characters with the Phylip formats. CHANGES IN APE VERSION 2.2 NEW FEATURES o Four new functions have been written by Damien de Vienne for the graphical exploration of large trees (cophyloplot, subtrees, subtreeplot), and to return the graphical coordinates of tree (without plotting). o The new functions corPagel and corBlomberg implement the Pagel's "lambda" and Blomberg et al.'s "ACDC" correlation structures. o chronopl() has been improved and gains several options: see its help page for details. o boot.phylo() has now an option 'trees' to possibly return the bootstraped trees (the default is FALSE). o prop.part() has been improved and should now be faster in all situations. BUG FIXES o read.dna() failed if "?" occurred in the first 10 sites of the first sequence. o The x/y aspect of the plot is now respected when plotting a circular tree (type = "r" or "f"). o Drawing the tip labels sometimes failed when plotting circular trees. o zoom() failed when tip labels were used instead of their numbers (thanks to Yan Wong for the fix). o drop.tip() failed with some trees (fixed by Yan Wong). o seg.sites() failed with a list. o consensus() failed in some cases. The function has been improved as well and is faster. CHANGES IN APE VERSION 2.1-3 BUG FIXES o A bug in read.nexus() made the Windows R-GUI crash. o An error was fixed in the computation of ancestral character states by generalized least squares in ace(). o di2multi() did not modify node labels correctly. o multi2di() failed if the tree had its attribute "order" set to "cladewise". CHANGES IN APE VERSION 2.1-2 NEW FEATURES o There three new methods for the "multiPhylo" class: str, $, and [[. o root() gains the options 'node' and 'resolve.root' (FALSE by default) as well as its code being improved. o mltt.plot() has now an option 'log' used in the same way than in plot.default(). BUG FIXES o mltt.plot() failed to display the legend with an unnamed list of trees. o nodelabels() with pies now correcly uses the argument 'cex' to draw symbols of different sizes (which has worked already for thermometers). o read.nexus() generally failed to read very big files. OTHER CHANGES o The argument 'family' of compar.gee() can now be a function as well as a character string. o read.tree() and read.nexus() now return an unnamed list if 'tree.names = NULL'. o read.nexus() now returns a modified object of class "multiPhylo" when there is a TRANSLATE block in the NEXUS file: the individual trees have no 'tip.label' vector, but the list has a 'TipLabel' attribute. The new methods '$' and '[[' set these elements correctly when extracting trees. CHANGES IN APE VERSION 2.1-1 NEW FEATURES o The new function rmtree generates lists of random trees. o rcoal() now generates a genuine coalescent tree by default (thanks to Vladimir Minin for the code). BUG FIXES o nuc.div() returned an incorrect value with the default pairwise.deletion = FALSE. OTHER CHANGES o The internal codes of bionj(), fastme.bal(), and fastme.ols() have been improved so that they are stabler and faster. o R packages used by ape are now loaded silently; lattice and gee are loaded only when needed. CHANGES IN APE VERSION 2.1 NEW FEATURES o The new function identify.phylo identifies clades on a plotted tree using the mouse. o It is now possible to subset a list of trees (object of class "multiPhylo") with "[" while keeping its class correct. o The new function as.DNAbin.alignment converts DNA sequences stored in the "alignment" format of the package seqinr into an object of class "DNAbin". o The new function weight.taxo2 helps to build similarity matrices given two taxonomic levels (usually called by other functions). o write.tree() can now take a list of trees (class "multiPhylo") as its main argument. o plot.correlogram() and plot.correlogramList() have been improved, and gain several options (see the help page for details). A legend is now plotted by default. BUG FIXES o dist.dna() returned some incorrect values with `model = "JC69"' and `pairwise.deletion = TRUE'. This affected only the distances involving sequences with missing values. (Thanks to Bruno Toupance for digging this bug out.) o write.tree() failed with some trees: this is fixed by removing the `multi.line' option (trees are now always printed on a single line). o read.nexus() did not correctly detect trees with multiple root edges (see OTHER CHANGES). OTHER CHANGES o The code of mlphylo() has been almost entirely rewritten, and should be much stabler. The options have been also greatly simplified (see ?mlphylo and ?DNAmodel for details). o The internal function nTips has been renamed klastorin_nTips. o The code of is.ultrametric() contained redundancies and has been cleaned-up. o The code of Moran.I() and of correlogram.formula() have been improved. o read.tree() and read.nexus() now return an error when trying to read a tree with multiple root edges (see BUG FIXES). The correction applied in previous version did not work in all situations. o The class c("multi.tree", "phylo") has been renamed "multiPhylo". DOCUMENTATION o There is now a vignette in ape: see vignette("MoranI", "ape"). DEPRECATED & DEFUNCT o as.matching() and as.phylo.matching() do not support branch lengths. o correlogram.phylo() and discrete.dist() have been removed. CHANGES IN APE VERSION 2.0-2 NEW FEATURES o The new function matexpo computes the exponential of a square matrix. o The new function unique.multi.tree removes duplicate trees from a list. o yule() has a new option `use.root.edge = FALSE' that specifies to ignore, by default, the root edge of the tree if it exists. BUG FIXES o which.edge() failed when the index of a single terminal edge was looked for. o In diversi.time(), the values returned for model C were incorrect. o A bug was fixed in yule() that affected the calculation of the likelihood in the presence of ties in the branching times. o There was a bug in the C function mat_expo4x4 affecting the calculations of the transition probabilities for models HKY and GTR in mlphylo(). o A small bug was fixed in as.matrix.DNAbin (thanks to James Bullard). o rtree() did not `shuffle' the tip labels by default, so only a limited number of labelled topologies could be generated. CHANGES IN APE VERSION 2.0-1 NEW FEATURES o The three new functions bionj, fastme.ols, and fastme.bal perform phylogeny estimation by the BIONJ and fastME methods in OLS and balanced versions. This is a port to R of previous previous programs done by Vincent Lefort. o The new function chronoMPL performs molecular dating with the mean path lengths method of Britton et al. (2002, Mol. Phyl. Evol. 24: 58). o The new function rotate, contributed by Christoph Heibl, swaps two clades connected to the same node. It works also with multichotomous nodes. o The new `method' as.matrix.DNAbin() may be used to convert easily DNA sequences stored in a list into a matrix while keeping the names and the class. BUG FIXES o chronopl() failed when some branch lengths were equal to zero: an error message is now returned. o di2multi() failed when there was a series of consecutive edges to remove. CHANGES IN APE VERSION 1.10-2 NEW FEATURES o plot.phylo() can now plot circular trees: the option is type = "fan" or type = "f" (to avoid the ambiguity with type = "c"). o prop.part() has a new option `check.labels = FALSE' which allows to considerably speed-up the calculations of bipartitions. As a consequence, calculations of bootstrap values with boot.phylo() should be much faster. BUG FIXES o read.GenBank() did not return correctly the list of species as from ape 1.10: this is fixed in this version o Applying as.phylo() on a tree of class "phylo" failed: the object is now returned unchanged. CHANGES IN APE VERSION 1.10-1 NEW FEATURES o The three new functions Ntip, Nnode, and Nedge return, for a given tree, the number of tips, nodes, or edges, respectively. BUG FIXES o read.nexus() did not set correctly the class of the returned object when reading multiple trees. o mllt.plot() failed with objects of class c("multi.tree", "phylo"). o unroot() did not work correctly in most cases. o reorder.phylo() made R freeze in some occasions. o Plotting a tree in pruningwise order failed. o When plotting an unrooted tree, the tip labels where not all correctly positioned if the option `cex' was used. CHANGES IN APE VERSION 1.10 NEW FEATURES o Five new `method' functions have been introduced to manipulate DNA sequences in binary format (see below). o Three new functions have been introduced to convert between the new binary and the character formats. o The new function as.alignment converts DNA sequences stored as single characters into the class "alignment" used by the package seqinr. o read.dna() and read.GenBank() have a new argument `as.character' controlling whether the sequences are returned in binary format or as character. BUG FIXES o root() failed when the tree had node labels: this is fixed. o plot.phylo() did not correctly set the limits on the y-axis with the default setting: this is fixed. o dist.dna() returned a wrong result for the LogDet, paralinear, and BH87 models with `pairwise.deletion = TRUE'. OTHER CHANGES o DNA sequences are now internally stored in a binary format. See the document "A Bit-Level Coding Scheme for Nucleotides" for the details. Most functions analyzing DNA functions have been modified accordingly and are now much faster (dist.dna is now ca. 60 times faster). CHANGES IN APE VERSION 1.9-4 BUG FIXES o A bug was fixed in edgelabels(). o as.phylo.hclust() did not work correctly when the object of class "hclust" has its labels set to NULL: the returned tree has now its tip labels set to "1", "2", ... o consensus could fail if some tip labels are a subset of others (e.g., "a" and "a_1"): this is now fixed. o mlphylo() failed in most cases if some branch lengths of the initial tree were greater than one: an error message is now issued. o mlphylo() failed in most cases when estimating the proportion of invariants: this is fixed. CHANGES IN APE VERSION 1.9-3 NEW FEATURES o The new function edgelabels adds labels on the edge of the tree in the same way than nodelabels or tiplabels. BUG FIXES o multi2di() did not handle correctly branch lengths with the default option `random = TRUE': this is now fixed. o A bug was fixed in nuc.div() when using pairwise deletions. o A bug occurred in the analysis of bipartitions with large numbers of large trees, with consequences on prop.part, prop.clades, and boot.phylo. o The calculation of the Billera-Holmes-Vogtmann distance in dist.topo was wrong: this has been fixed. CHANGES IN APE VERSION 1.9-2 NEW FEATURES o The new function ladderize reorganizes the internal structure of a tree to plot them left- or right-ladderized. o The new function dist.nodes computes the patristic distances between all nodes, internal and terminal, of a tree. It replaces the option `full = TRUE' of cophenetic.phylo (see below). BUG FIXES o A bug was fixed in old2new.phylo(). o Some bugs were fixed in chronopl(). o The edge colours were not correctly displayed by plot.phylo (thank you to Li-San Wang for the fix). o cophenetic.phylo() failed with multichotomous trees: this is fixed. OTHER CHANGES o read.dna() now returns the sequences in a matrix if they are aligned (interleaved or sequential format). Sequences in FASTA format are still returned in a list. o The option `full' of cophenetic.phylo() has been removed because it could not be used from the generic. DEPRECATED & DEFUNCT o rotate() has been removed; this function did not work correctly since ape 1.9. CHANGES IN APE VERSION 1.9-1 BUG FIXES o Trees with a single tip were not read correctly in R as the element `Nnode' was not set: this is fixed. o unroot() did not set correctly the number of nodes of the unrooted tree in most cases. o read.GenBank() failed when fetching very long sequences, particularly of the BX-series. o A bug was introduced in read.tree() with ape 1.9: it has been fixed CHANGES IN APE VERSION 1.9 NEW FEATURES o There are two new print `methods' for trees of class "phylo" and lists of trees of class "multi.tree", so that they are now displayed in a compact and informative way. o There are two new functions, old2new.phylo and new2old.phylo, for converting between the old and new coding of the class "phylo". o dist.dna() has three new models: Barry and Hartigan ("BH87"), LogDet ("logdet"), and paralinear ("paralin"). o compute.brlen() has been extended: several methods are now available to compute branch lengths. o write.dna() can now handle matrices as well as lists. BUG FIXES o cophenetic.phylo() sometimes returned a wrong result with multichotomous trees: this is fixed. o rotate() failed when a single tip was specified: the tree is now returned unchanged. o ace() did not return the correct index matrix with custom models: this is fixed. o multi2di() did not work correctly when resolving multichotomies randomly: the topology was always the same, only the arrangement of clades was randomized: this is fixed. This function now accepts trees with no branch lengths. o The output of diversi.gof() was blurred by useless prints when a user distribution was specified. This has been corrected, and the help page of this function has been expanded. OTHER CHANGES o The internal structure of the class "phylo" has been changed: see the document "Definition of Formats for Coding Phylogenetic Trees in R" for the details. In addition, the code of most functions has been improved. o Several functions have been improved by replacing some R codes by C codes: pic, plot.phylo, and reorder.phylo. o There is now a citation information: see citation("ape") in R. o write.tree() now does not add extra 0's to branch lengths so that 1.23 is printed "1.23" by default, not "1.2300000000". o The syntax of bind.tree() has been simplified. This function now accepts trees with no branch lengths, and handles correctly node labels. o The option `as.numeric' of mrca() has been removed. o The unused options `format' and `rooted' of read.tree() have been removed. o The unused option `format' of write.tree() has been removed. o The use of node.depth() has been simplified. CHANGES IN APE VERSION 1.8-5 NEW FEATURES o Two new functions read.nexus.data() and write.nexus.data(), contributed by Johan Nylander, allow to read and write molecular sequences in NEXUS files. o The new function reorder.phylo() reorders the internal structure of a tree of class "phylo". It is used as the generic, e.g., reorder(tr). o read.tree() and read.nexus() can now read trees with a single edge. o The new data set `cynipids' supplies a set of protein sequences in NEXUS format. BUG FIXES o The code of all.equal.phylo() has been completely rewritten (thanks to Benoît Durand) which fixes several bugs. o read.tree() and read.nexus() now checks the labels of the tree to remove or substitute any characters that are illegal in the Newick format (parentheses, etc.) o A negative P-value could be returned by mantel.test(): this is now fixed. CHANGES IN APE VERSION 1.8-4 NEW FEATURES o The new function sh.test() computes the Shimodaira- Hasegawa test. o The new function collapse.singles() removes the nodes with a single descendant from a tree. o plot.phylo() has a new argument `tip.color' to specify the colours of the tips. o mlphylo() has now an option `quiet' to control the display of the progress of the analysis (the default is FALSE). BUG FIXES o read.dna() did not read correctly sequences in sequential format with leading alignment gaps "-": this is fixed. o ace() returned a list with no class so that the generic functions (anova, logLik, ...) could not be used directly. This is fixed as ace() now returns an object of class "ace". o anova.ace() had a small bug when computing the number of degrees of freedom: this is fixed. o mlphylo() did not work when the sequences were in a matrix or a data frame: this is fixed. o rtree() did not work correctly when trying to simulate an unrooted tree with two tips: an error message is now issued. OTHER CHANGES o The algorithm of rtree() has been changed: it is now about 40, 100, and 130 times faster for 10, 100, and 1000 tips, respectively. CHANGES IN APE VERSION 1.8-3 NEW FEATURES o There are four new `method' functions to be used with the results of ace(): logLik(), deviance(), AIC(), and anova(). o The plot method of phymltest has two new arguments: `main' to change the title, and `col' to control the colour of the segments showing the AIC values. o ace() has a new argument `ip' that gives the initial values used in the ML estimation with discrete characters (see the examples in ?ace). This function now returns a matrix giving the indices of the estimated rates when analysing discrete characters. o nodelabels() and tiplabels() have a new argument `pie' to represent proportions, with any number of categories, as piecharts. The use of the option `thermo' has been improved: there is now no limitation on the number of categories. BUG FIXES o mlphylo() did not work with more than two partitions: this is fixed. o root() failed if the proposed outgroup was already an outgroup in the tree: this is fixed. o The `col' argument in nodelabels() and tiplabels() was not correctly passed when `text' was used: this is fixed. o Two bugs were fixed in mlphylo(): parameters were not always correctly output, and the estimation failed in some cases. o plot.phylo() was stuck when given a tree with a single tip: this is fixed and a message error is now returned. o An error was corrected in the help page of gammaStat regarding the calculation of P-values. o Using gls() could crash R when the number of species in the tree and in the variables were different: this is fixed. CHANGES IN APE VERSION 1.8-2 NEW FEATURES o The new function mlphylo() fits a phylogenetic tree by maximum likelihood from DNA sequences. Its companion function DNAmodel() is used to define the substitution model which may include partitioning. There are methods for logLik(), deviance(), and AIC(), and the summary() method has been extended to display in a friendly way the results of this model fitting. Currently, the functionality is limited to estimating the substitution and associated parameters and computing the likelihood. o The new function drop1.compar.gee (used as, e.g., drop1(m)) tests for single effects in GEE-based comparative method. A warning message is printed if there is not enough degrees of freedom. BUG FIXES o An error message was sometimes issued by plot.multi.tree(), though with no consequence. CHANGES IN APE VERSION 1.8-1 NEW FEATURES o There is a new plot method for lists of trees (objects of class "multi.tree"): it calls plot.phylo() internally and is documented on the same help page. BUG FIXES o A bug was fixed in the C code that analyzes bipartitions: this has impact on several functions like prop.part, prop.clades, boot.phylo, or consensus. o root() did not work correctly when the specified outgroup had more than one element: this is fixed. o dist.dna() sometimes returned a warning inappropriately: this has been corrected. o If the distance object given to nj() had no rownames, nj() returned a tree with no tip labels: it now returns tips labelled "1", "2", ..., corresponding to the row numbers. OTHER CHANGES o nj() has been slightly changed so that tips with a zero distance are first aggregated with zero-lengthed branches; the usual NJ procedure is then performed on a distance matrix without 0's. CHANGES IN APE VERSION 1.8 NEW FEATURES o The new function chronopl() estimates dates using the penalized likelihood method by Sanderson (2002; Mol. Biol. Evol., 19:101). o The new function consensus() calculates the consensus tree of a list of trees. o The new function evolve.phylo() simulates the evolution of continuous characters along a phylogeny under a Brownian model. o The new plot method for objects of class "ancestral" displays a tree together with ancestral values, as returned by the above function. o The new function as.phylo.formula() returns a phylogeny from a set of nested taxonomic variables given as a formula. o The new function read.caic() reads trees in CAIC format. o The new function tiplabels() allows to add labels to the tips of a tree using text or plotting symbols in a flexible way. o The new function unroot() unroots a phylogeny. o multi2di() has a new option, `random', which specifies whether to resolve the multichotomies randomly (the default) or not. o prop.part() now returns an object of class "prop.part" for which there are print (to display a partition in a more friendly way) and summary (to extract the numbers) methods. o plot.phylo() has a new option, `show.tip.label', specifying whether to print the labels of the tips. The default is TRUE. o The code of nj() has been replaced by a faster C code: it is now about 10, 25, and 40 times faster for 50, 100, and 200 taxa, respectively. o write.nexus() now writes whether a tree is rooted or not. BUG FIXES o Two bugs have been fixed in root(): unrooted trees are now handled corretly, and node labels are now output normally. o A bug was fixed in phymltest(): the executable couldn't be found in some cases. o Three bugs have been fixed in ace(): computing the likelihood of ancestral states of discrete characters failed, custom models did not work, and the function failed with a null gradient (a warning message is now returned; this latter bug was also present in yule.cov() as well and is now fixed). o pic() hanged out when missing data were present: an error is now returned. o A small bug was fixed in dist.dna() where the gamma correction was not always correctly dispatched. o plot.phylo() plotted correctly the root edge only when the tree was plotted rightwards: this works now for all directions. OTHER CHANGES o dist.taxo() has been renamed as weight.taxo(). o dist.phylo() has been replaced by the method cophenetic.phylo(). o Various error and warning messages have been improved. CHANGES IN APE VERSION 1.7 NEW FEATURES o The new function ace() estimates ancestral character states for continuous characters (with ML, GLS, and contrasts methods), and discrete characters (with ML only) for any number of states. o The new function compar.ou() fits the Ornstein-Uhlenbeck model of directional evolution for continuous characters. The user specifies the node(s) of the tree where the character optimum changes. o The new function is.rooted() tests whether a tree (of class "phylo") is rooted. o The new function rcoal() generates random ultrametric trees with the possibility to specify the function that generates the inter-nodes distances. o The new function mrca() gives for all pairs of tips in a tree (and optionally nodes too) the most recent common ancestor. o nodelabels() has a new option `thermo' to plot proportions (up to three classes) on the nodes of a tree. o rtree() has been improved: it can now generate rooted or unrooted trees, and the mathematical function that generates the branch lengths may be specified by the user. The tip labels may be given directly in the call to rtree. The limit cases (n = 2, 3) are now handled correctly. o dist.topo() has a new argument `method' with two choices: "PH85" for Penny and Henny's method (already available before and now the default), and "BHV01" for the geometric distance by Billera et al. (2001, Adv. Appl. Math. 27:733). o write.tree() has a new option, `digits', which specifies the number of digits to be printed in the Newick tree. By default digits = 10. The numbers are now always printed in decimal form (i.e., 1.0e-1 is now avoided). o dist.dna() can now compute the raw distances between pairs of DNA sequences by specifying model = "raw". o dist.phylo() has a new option `full' to possibly compute the distances among all tips and nodes of the tree. The default is `full = FALSE'. BUG FIXES o Several bugs were fixed in all.equal.phylo(). o dist.dna() did not handle correctly gaps ("-") in alignments: they are now considered as missing data. o rotate() did not work if the tips were not ordered: this is fixed. o mantel.test() returned NA in some special cases: this is fixed and the function has been improved and is now faster. o A bug was fixed in diversi.gof() where the calculation of A² was incorrect. o cherry() did not work correctly under some OSs (mainly Linux): this is fixed. o is.binary.tree() has been modified so that it works with both rooted and unrooted trees. o The documentation of theta.s() was not correct: this has been fixed. o plot.mst() did not work correctly: this is fixed. CHANGES IN APE VERSION 1.6 NEW FEATURES o The new function dist.topo() computes the topological distances between two trees. o The new function boot.phylo() performs a bootstrap analysis on phylogeny estimation. o The new functions prop.part() and prop.clades() analyse bipartitions from a series of trees. OTHER CHANGES o read.GenBank() now uses the EFetch utility of NCBI instead of the usual Web interface: it is now much faster (e.g., 12 times faster to retrieve 8 sequences, 37 times for 60 sequences). BUG FIXES o Several bugs were fixed in read.dna(). o Several bugs were fixed in diversi.time(). o is.binary.tree() did not work correctly if the tree has no edge lengths: this is fixed. o drop.tip() did not correctly propagated the `node.label' of a tree: this is fixed. CHANGES IN APE VERSION 1.5 NEW FEATURES o Two new functions, as.matching.phylo() and as.phylo.matching(), convert objects between the classes "phylo" and "matching". The latter implements the representation of binary trees introduced by Diaconis and Holmes (1998; PNAS 95:14600). The generic function as.matching() has been introduced as well. o Two new functions, multi2di() and di2multi(), allow to resolve and collapse multichotomies with branches of length zero. o The new function nuc.div() computes the nucleotide diversity from a sample a DNA sequences. o dist.dna() has been completely rewritten with a much faster (particularly for large data sets) C code. Eight models are available: JC69, K80, F81, K81, F84, T92, TN93, and GG95 (the option `method' has been renamed `model'). Computation of variance is available for all models. A gamma-correction is possible for JC69, K80, F81, and TN93. There is a new option, pairwise.deletion, to remove sites with missing data on a pairwise basis. The option `GCcontent' has been removed. o read.GenBank() has a new option (species.names) which specifies whether to return the species names of the organisms in addition to the accession numbers of the sequences (this is the default behaviour). o write.nexus() can now write several trees in the same NEXUS file. o drop.tip() has a new option `root.edge' that allows to specify the new root edge if internal branches are trimmed. BUG FIXES o as.phylo.hclust() failed if some labels had parentheses: this is fixed. o Several bugs were fixed in all.equal.phylo(). This function now returns the logical TRUE if the trees are identical but with different representations (a report was printed previously). o read.GenBank() did not correctly handle ambiguous base codes: this is fixed. OTHER CHANGES o birthdeath() now returns an object of class "birthdeath" for which there is a print method. CHANGES IN APE VERSION 1.4 NEW FEATURES o The new function nj() performs phylogeny estimation with the neighbor-joining method of Saitou and Nei (1987; Mol. Biol. Evol., 4:406). o The new function which.edge() identifies the edges of a tree that belong to a group specified as a set of tips. o The new function as.phylo.phylog() converts an object of class "phylog" (from the package ade4) into an object of class "phylo". o The new function axisPhylo() draws axes on the side of a phylogeny plot. o The new function howmanytrees() calculates the number of trees in different cases and giving a number of tips. o write.tree() has a new option `multi.line' (TRUE by default) to write a Newick tree on several lines rather than on a single line. o The functionalities of zoom() have been extended. Several subtrees can be visualized at the same time, and they are marked on the main tree with colors. The context of the subtrees can be marked with the option `subtree' (see below). o drop.tip() has a new option `subtree' (FALSE by default) which specifies whether to output in the tree how many tips have been deleted and where. o The arguments of add.scale.bar() have been redefined and have now default values (see ?add.scale.bar for details). This function now works even if the plotted tree has no edge length. o plot.phylo() can now plot radial trees, but this does not take edge lengths into account. o In plot.phylo() with `type = "phylogram"', if the values of `edge.color' and `edge.width' are identical for sister-branches, they are propagated to the vertical line that link them. BUG FIXES o Repeated calls to as.phylo.hclust() or as.hclust.phylo() made R crashing. This is fixed. o In plot.phylo(), the options `edge.color' and `edge.width' are now properly recycled; their default values are now "black" and 1, respectively. o A bug has been fixed in write.nexus(). OTHER CHANGES o The function node.depth.edgelength() has been removed and replaced by a C code. CHANGES IN APE VERSION 1.3-1 NEW FEATURES o The new function nodelabels() allows to add labels to the nodes of a tree using text or plotting symbols in a flexible way. o In plot.phylo() the arguments `x.lim' and `y.lim' can now be two numeric values specifying the lower and upper limits on the x- and y-axes. This allows to leave some space on any side of the tree. If a single value is given, this is taken as the upper limit (as before). CHANGES IN APE VERSION 1.3 NEW FEATURES o The new function phymltest() calls the software PHYML and fits 28 models of DNA sequence evolution. There are a print method to display likelihood and AIC values, a summary method to compute the hierarchical likelihood ratio tests, and a plot method to display graphically the AIC values of each model. o The new function yule.cov() fits the Yule model with covariates, a model where the speciation rate is affected by several species traits through a generalized linear model. The parameters are estimated by maximum likelihood. o Three new functions, corBrownian(), corGrafen(), and corMartins(), compute the expected correlation structures among species given a phylogeny under different models of evolution. These can be used for GLS comparative phylogenetic methods (see the examples). There are coef() and corMatrix() methods and an Initialize.corPhyl() function associated. o The new function compar.cheverud() implements Cheverud et al.'s (1985; Evolution 39:1335) phylogenetic comparative method. o The new function varcomp() estimates variance components; it has a plot method. o Two new functions, panel.superpose.correlogram() and plot.correlogramList(), allow to plot several phylogenetic correlograms. o The new function node.leafnumber() computes the number of leaves of a subtree defined by a particular node. o The new function node.sons() gets all tags of son nodes from a given parent node. o The new function compute.brlen() computes the branch lengths of a tree according to a specified method. o plot.phylo() has three new options: "cex" controls the size of the (tip and node) labels (thus it is no more needed to change the global graphical parameter), "direction" which allows to plot the tree rightwards, leftwards, upwards, or downwards, and "y.lim" which sets the upper limit on the y-axis. BUG FIXES o Some functions which try to match tip labels and names of additional data (e.g. vector) are likely to fail if there are typing or syntax errors. If both series of names do not perfectly match, they are ignored and a warning message is now issued. These functions are bd.ext, compar.gee, pic. Their help pages have been clarified on this point. CHANGES IN APE VERSION 1.2-7 NEW FEATURES o The new function root() reroots a phylogenetic tree with respect to a specified outgroup. o The new function rotate() rotates an internal branch of a tree. o In plot.phylo(), the new argument "lab4ut" (labels for unrooted trees) controls the display of the tip labels in unrooted trees. This display has been greatly improved: the tip labels are now not expected to overlap with the tree (particularly if lab4ut = "axial"). In all cases, combining appropriate values of "lab4ut" and the font size (via "par(cex = )") should result in readable unrooted trees. See ?plot.phylo for some examples. o In drop.tip(), the argument `tip' can now be numeric or character. BUG FIXES o drop.tip() did not work correctly with trees with no branch lengths: this is fixed. o A bug in plot.phylo(..., type = "unrooted") made some trees being plotted with some line crossings: this is now fixed. CHANGES IN APE VERSION 1.2-6 NEW FEATURES o Six new functions (Moran.I, correlogram.formula, discrete.dist, correlogram.phylo, dist.taxo, plot.correlogram) have been added to implement comparative methods with an autocorrelation approach. o A new data set describing some life history traits of Carnivores has been included. BUG FIXES o A fix was made on mcmc.popsize() to conform to R 2.0.0. OTHER CHANGES o When plotting a tree with plot.phylo(), the new default of the option `label.offset' is now 0, so the labels are always visible. CHANGES IN APE VERSION 1.2-5 NEW FEATURES o The new function bd.ext() fits a birth-death model with combined phylogenetic and taxonomic data, and estimates the corresponding speciation and extinction rates. OTHER CHANGES o The package gee is no more required by ape but only suggested since only the function compar.gee() calls gee. CHANGES IN APE VERSION 1.2-4 NEW FEATURES o Four new functions (mcmc.popsize, extract.popsize, plot.popsize, and lines.popsize) implementing a new approach for inferring the demographic history from genealogies using a reversible jump MCMC have been introduced. o The unit of time in the skyline plot and in the new plots can now be chosen to be actual years, rather than substitutions. CHANGES IN APE VERSION 1.2-3 NEW FEATURES o The new function rtree() generates a random binary tree with or without branch lengths. o Two new functions for drawing lineages-through-time (LTT) plots are provided: ltt.lines() adds a LTT curve to an existing plot, and mltt.plot() does a multiple LTT plot giving several trees as arguments (see `?ltt.plot' for details). BUG FIXES o Some taxon names made R crashing when calling as.phylo.hclust(): this is fixed. o dist.dna() returned an error with two identical DNA sequences (only using the Jukes-Cantor method returned 0): this is fixed. OTHER CHANGES o The function dist.phylo() has been re-written using a different algorithm: it is now about four times faster. o The code of branching.times() has been improved: it is now about twice faster. CHANGES IN APE VERSION 1.2-2 NEW FEATURES o The new function seg.sites() finds the segregating sites in a sample of DNA sequences. BUG FIXES o A bug introduced in read.tree() and in read.nexus() with version 1.2-1 was fixed. o A few errors were corrected and a few examples were added in the help pages. CHANGES IN APE VERSION 1.2-1 NEW FEATURES o plot.phylo() can now draw the edge of the root of a tree if it has one (see the new option `root.edge', its default is FALSE). BUG FIXES o A bug was fixed in read.nexus(): files with semicolons inside comment blocks were not read correctly. o The behaviour of read.tree() and read.nexus() was corrected so that tree files with badly represented root edges (e.g., with an extra pair of parentheses, see the help pages for details) are now correctly represented in the object of class "phylo"; a warning message is now issued. CHANGES IN APE VERSION 1.2 NEW FEATURES o plot.phylo() has been completely re-written and offers several new functionalities. Three types of trees can now be drawn: phylogram (as previously), cladogram, and unrooted tree; in all three types the branch lengths can be drawn using the edge lengths of the phylogeny or not (e.g., if the latter is absent). The vertical position of the nodes can be adjusted with two choices (see option `node.pos'). The code has been re-structured, and two new functions (potentially useful for developpers) are documented separately: node.depth.edgelength() and node.depth(); see the respective help pages for details. o The new function zoom() allows to explore very large trees by focusing on a small portion of it. o The new function yule() fits by maximum likelihood the Yule model (birth-only process) to a phylogenetic tree. o Support for writing DNA sequences in FASTA format has been introduced in write.dna() (support for reading sequences in this format was introduced in read.dna() in version 1.1-2). The function has been completely re-written, fixing some bugs (see below); the default behaviour is no more to display the sequences on the standard output. Several options have been introduced to control the sequence printing in a flexible way. The help page has been extended. o A new data set is included: a supertree of bats in NEXUS format. BUG FIXES o In theta.s(), the default of the option `variance' has been changed to `FALSE' (as was indicated in the help page). o Several bugs were fixed in the code of all.equal.phylo(). o Several bugs were fixed in write.dna(), particularly this function did not work with `format = "interleaved"'. o Various errors were corrected in the help pages. OTHER CHANGES o The argument names of as.hclust.phylo() have been changed from "(phy)" to "(x, ...)" to conform to the definition of the corresponding generic function. o gamma.stat() has been renamed gammaStat() to avoid confusion since gamma() is a generic function. CHANGES IN APE VERSION 1.1-3 BUG FIXES o base.freq() previously did not return a value of 0 for bases absent in the data (e.g., a vector of length 3 was returned if one base was absent). This is now fixed (a vector of length 4 is always returned). o Several bugs were fixed in read.nexus(), including that this function did not work in this absence of a "TRANSLATE" command in the NEXUS file, and that the commands were case-sensitive. CHANGES IN APE VERSION 1.1-2 NEW FEATURES o The Tamura and Nei (1993) model of DNA distance is now implemented in dist.dna(): five models are now available in this function. o A new data set is included: a set of 15 sequences of the cytochrome b mitochondrial gene of the woodmouse (Apodemus sylvaticus). BUG FIXES o A bug in read.nexus() was fixed. o read.dna() previously did not work correctly in most cases. The function has been completely re-written and its help page has been considerably extended (see ?read.dna for details). Underscores (_) in taxon names are no more replaced with spaces (this behaviour was undocumented). o A bug was fixed in write.dna(). CHANGES IN APE VERSION 1.1-1 BUG FIXES o A bug in read.tree() introduced in APE 1.1 was fixed. o A bug in compar.gee() resulted in an error when trying to fit a model with `family = "binomial"'. This is now fixed. CHANGES IN APE VERSION 1.1 NEW FEATURES o The Klastorin (1982) method as suggested by Misawa and Tajima (2000, Mol. Biol. Evol. 17:1879-1884) for classifying genes on the basis of phylogenetic trees has been implemented (see the function klastorin()). o Functions have been added to convert APE's "phylo" objects in "hclust" cluster objects and vice versa (see the help page of as.phylo for details). o Three new functions, ratogram(), chronogram() and NPRS.criterion(), are introduced for the estimation of absolute evolutionary rates (ratogram) and dated clock-like trees (chronogram) from phylogenetic trees using the non-parametric rate smoothing approach by MJ Sanderson (1997, Mol. Biol. Evol. 14:1218-1231). o A summary method is now provided printing a summary information on a phylogenetic tree with, for instance, `summary(tree)'. o The behaviour of read.tree() was changed so that all spaces and tabulations in tree files are now ignored. Consequently, spaces in tip labels are no more allowed. Another side effect is that read.nexus() now does not replace the underscores (_) in tip labels with spaces (this behaviour was undocumented). o The function plot.phylo() has a new option (`underscore') which specifies whether the underscores in tip labels should be written on the plot as such or replaced with spaces (the default). o The function birthdeath() now computes 95% confidence intervals of the estimated parameters using profile likelihood. o Three new data sets are included: a gene tree estimated from 36 landplant rbcL sequences, a gene tree estimated from 32 opsin sequences, and a gene tree for 50 BRCA1 mammalian sequences. BUG FIXES o A bug was fixed in dist.gene() where nothing was returned. o A bug in plot.mst() was fixed. o A bug in vcv.phylo() resulted in false correlations when the option `cor = TRUE' was used (now fixed). CHANGES IN APE VERSION 1.0 NEW FEATURES o Two new functions, read.dna() and write.dna(), read/write in a file DNA sequences in interleaved or in sequential format. o Two new functions, read.nexus() and write.nexus(), read/write trees in a NEXUS file. o The new function bind.tree() allows to bind two trees together, possibly handling root edges to give internal branches. o The new function drop.tip() removes the tips in a phylogenetic tree, and trims (or not) the corresponding internal branches. o The new function is.ultrametric() tests if a tree is ultrametric. o The function plot.phylo() has more functionalities such as drawing the branches with different colours and/or different widths, showing the node labels, controling the position and font of the labels, rotating the labels, and controling the space around the plot. o The function read.tree() can now read trees with no branch length, such as "(a,b),c);". Consequently, the element `edge.length' in objects of class "phylo" is now optional. o The function write.tree() has a new default behaviour: if the default for the option `file' is used (i.e. file = ""), then a variable of mode character containing the tree in Newick format is returned which can thus be assigned (e.g., tree <- write.tree(phy)). o The function read.tree() has a new argument `text' which allows to read the tree in a variable of mode character. o A new data set is included: the phylogenetic relationships among the orders of birds from Sibley and Ahlquist (1990). CHANGES IN APE VERSION 0.2-1 BUG FIXES o Several bugs were fixed in the help pages. CHANGES IN APE VERSION 0.2 NEW FEATURES o The function write.tree() writes phylogenetic trees (objects of class "phylo") in an ASCII file using the Newick parenthetic format. o The function birthdeath() fits a birth-death model to branching times by maximum likelihood, and estimates the corresponding speciation and extinction rates. o The function scale.bar() adds a scale bar to a plot of a phylogenetic tree. o The function is.binary.tree() tests whether a phylogeny is binary. o Two generic functions, coalescent.intervals() and collapsed.intervals(), as well as some methods are introduced. o Several functions, including some generics and methods, for computing skyline plot estimates (classic and generalized) of effective population size through time are introduced and replace the function skyline.plot() in version 0.1. o Two data sets are now included: the phylogenetic relationships among the families of birds from Sibley and Ahlquist (1990), and an estimated clock-like phylogeny of HIV sequences sampled in the Democratic Republic of Congo. DEPRECATED & DEFUNCT o The function skyline.plot() in ape 0.1 has been deprecated and replaced by more elaborate functions (see above). BUG FIXES o Two important bugs were fixed in plot.phylo(): phylogenies with multichotomies not at the root or not with only terminal branches, and phylogenies with a single node (i.e. only terminal branches) did not plot. These trees should be plotted correctly now. o Several bugs were fixed in diversi.time() in the computation of AICs and LRTs. o Various errors were corrected in the help pages. ape/COPYING0000644000176200001440000004313312337057645012072 0ustar liggesusers GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. ape/vignettes/0000755000176200001440000000000014726075214013037 5ustar liggesusersape/vignettes/ape.bib0000644000176200001440000000443713773050433014267 0ustar liggesusers@STRING{Ev = {Evolution}} @STRING{SZ = {Systematic Zoology}} @STRING{ny = {New York}} @book{Cliff1973, Author = {Cliff, A. D. and Ord, J. K.}, Title = {{Spatial Autocorrelation}}, Publisher = {Pion}, Address = {London}, Year = 1973} @incollection{Cliff1981, Author = {Cliff, A. D. and Ord, J. K.}, Title = {{Spatial and temporal analysis: autocorrelation in space and time}}, BookTitle = {{Quantitative Geography: A British View}}, Editor = {Wrigley, E. N. and Bennett, R. J.}, Publisher = {Routledge \& Kegan Paul}, Address = {London}, Pages = {104-110}, Year = 1981} @article{Cheverud1985, Author = {Cheverud, J. M. and Dow, M. M. and Leutenegger, W.}, Title = {{The quantitative assessment of phylogenetic constraints in comparative analyses: sexual dimorphism in body weight among primates}}, Journal = Ev, Volume = {39}, Pages = {1335-1351}, Year = 1985} @article{Felsenstein1978, Author = {Felsenstein, J.}, Title = {{Number of evolutionary trees}}, Journal = SZ, Volume = {27}, Number = {1}, Pages = {27-33}, Year = 1978} @book{Felsenstein2004, Author = {Felsenstein, J.}, Title = {{Inferring Phylogenies}}, Publisher = {Sinauer Associates}, Address = {Sunderland, MA}, Year = 2004} @article{Gittleman1990, Author = {Gittleman, J. L. and Kot, M.}, Title = {{Adaptation: statistics and a null model for estimating phylogenetic effects}}, Journal = SZ, Volume = {39}, Pages = {227-241}, Year = 1990} @article{Moran1950, Author = {Moran, P. A. P.}, Title = {{Notes on continuous stochastic phenomena}}, Journal = {Biometrika}, Volume = {37}, Pages = {17-23}, Year = 1950} @book{Paradis2006, Author = {Paradis, E.}, Title = {{Analysis of Phylogenetics and Evolution with R}}, Publisher = {Springer}, Address = ny, Year = 2006} @book{Paradis2012, Author = {Paradis, E.}, Title = {{Analysis of Phylogenetics and Evolution with R (Second Edition)}}, Publisher = {Springer}, Address = ny, Year = 2012} @book{Murrell2006, Author = {Murrell, P.}, Title = {{R Graphics}}, Publisher = {Chapman \& Hall/CRC}, Address = {Boca Raton, FL}, Year = 2006} @article{Czech2017, Author = {Czech, L. and Huerta-Cepas, J. and Stamatakis, A.}, Title = {{A critical review on the use of support values in tree viewers and bioinformatics toolkits}}, Journal = {Molecular Biology and Evolution}, Volume = {34}, Pages = {1535-1542}, Year = 2017} ape/vignettes/ape.sty0000644000176200001440000000142214156324304014336 0ustar liggesusers\usepackage{fancyvrb} \usepackage{color,amsmath,booktabs} \usepackage[margin=3.5cm]{geometry} \makeatletter \renewcommand{\maketitle}{% \begin{flushleft}% \sffamily {\LARGE\bfseries\color{darkblue}\@title\par}% \medskip {\large\bfseries\@author\par}% \medskip {\textit{\@date}\par}% \bigskip\hrule\vspace*{1pc}% \end{flushleft}% } \makeatother \usepackage{tcolorbox,xcolor} \tcbuselibrary{skins,breakable} \definecolor{darkblue}{rgb}{0, 0, 0.9} \newcommand{\code}{\texttt} \newcommand{\pkg}{\textsf} \newcommand{\ape}{\pkg{ape}} \newcommand{\phangorn}{\pkg{phangorn}} \newcommand{\R}{\pkg{R}} \newenvironment{Schunk}{\begin{tcolorbox}[breakable,colback=black!10,top=1mm,bottom=0mm,left=1mm,right=0mm,boxrule=.1mm]}{\end{tcolorbox}} ape/vignettes/DrawingPhylogenies.Rnw0000644000176200001440000012245714605157044017342 0ustar liggesusers\documentclass[a4paper]{article} %\VignetteIndexEntry{Drawing Phylogenies} %\VignettePackage{ape} \usepackage{ape} \author{Emmanuel Paradis} \title{Drawing Phylogenies in \R: Basic and Advanced Features With \pkg{ape}} \begin{document} \DefineVerbatimEnvironment{Sinput}{Verbatim}{formatcom=\color{darkblue}} \DefineVerbatimEnvironment{Soutput}{Verbatim}{formatcom=\color{black}\vspace{-1.5em}} \maketitle \tableofcontents\vspace*{1pc}\hrule <>= options(width = 80, prompt = "> ") @ \vspace{1cm} \section{Introduction} Graphical functions have been present in \ape\ since its first version (0.1, released in August 2002). Over the years, these tools have been improved to become quite sophisticated although complicated to use efficiently. This document gives an overview of these functionalities. Section~\ref{sec:basic} explains the basic concepts and tools behind graphics in \ape. A figure made with \ape\ usually starts by calling the function \code{plot.phylo} which is detailed in Section~\ref{sec:plotphylo}, and further graphical annotations can be done with functions covered in Section~\ref{sec:annot}. Section~\ref{sec:spec} shows some specialized functions available in \ape, and finally, Sections~\ref{sec:geom} and \ref{sec:build} give an overview of some ideas to help making complicated figures. \section{Basic Concepts}\label{sec:basic} The core of \ape's graphical tools is the \code{plot} method for the class \code{"phylo"}, the function \code{plot.phylo}. This function is studied in details in Section~\ref{sec:plotphylo}, but first we see the basic ideas behind it and other functions mentioned in this document. \subsection{Graphical Model} The graphical functions in \ape\ use the package \pkg{graphics}. Overall, the conventions of this package are followed quite closely (see Murrell's book \cite{Murrell2006}), so users familiar with graphics in \R\ are expected to find their way relatively easily when plotting phylogenies with \ape. \ape\ has several functions to perform computations before drawing a tree, so that they may be used to implement the same graphical functionalities with other graphical engines such as the \pkg{grid} package. These functions are detailed in the next section. To start simply, we build a small tree with three genera of primates which we will use in several examples in this document: <<>>= library(ape) mytr <- read.tree(text = "((Pan:5,Homo:5):2,Gorilla:7);") @ \noindent Now let's build a small function to show the frame around the plot with dots, and the $x$- and $y$-axes in green: <<>>= foo <- function() { col <- "green" for (i in 1:2) axis(i, col = col, col.ticks = col, col.axis = col, las = 1) box(lty = "19") } @ \noindent We then plot the tree in four different ways (see below for explanations about the options) and call for each of them the previous small function: <>= layout(matrix(1:4, 2, 2, byrow = TRUE)) plot(mytr); foo() plot(mytr, "c", FALSE); foo() plot(mytr, "u"); foo() par(xpd = TRUE) plot(mytr, "f"); foo() box("outer") @ \noindent The last command (\code{box("outer")}) makes visible the most outer frame of the figure showing more clearly the margins around each tree (more on this in Sect.~\ref{sec:geom}). We note also the command \code{par(xpd = TRUE)}: by default this parameter is \code{FALSE} so that graphical elements (points, lines, text, \dots) outside the plotting region (i.e., in the margins or beyond) are cut (clipped).\footnote{\code{par(xpd = TRUE)} is used in several examples in this document mainly because of the small size of the trees drawn here. However, in practice, this is rarely needed.} These small figures illustrate the way trees are drawn with \ape. This can be summarised with the following (pseudo-)algorithm: \bigskip\hrule height 1pt\relax %\renewcommand{\theenumi}{\alph{enumi}} \renewcommand{\labelenumi}{\textbf{\theenumi.}} \begin{enumerate}\small \item Compute the node coordinates depending on the type of tree plot, the branch lengths, and other parameters. \item Evaluate the space required for printing the tip labels. \item Depending on the options, do some rotations and/or translations. \item Set the limits of the $x$- and $y$-axes. \item Open a graphical device (or reset it if already open) and draw an empty plot with the limits found at the previous step. \item Call \code{segments()} to draw the branches. \item Call \code{text()} to draw the labels. \end{enumerate} \hrule height 1pt\relax\bigskip There are a lot of ways to control these steps. The main variations along these steps are given below. \textbf{Step 1. } The option \code{type} specifies the shape of the tree plot: five values are possible, \code{"phylogram"}, \code{"cladogram"}, \code{"fan"}, \code{"unrooted"}, and \code{"radial"} (the last one is not considered in this document). The first three types are valid representations for rooted trees, while the fourth one should be selected for unrooted trees. The node coordinates depend also on whether the tree has branch lengths or not, and on the options \code{node.pos} and \code{node.depth}. This is illustrated below using a tree with eight tips and all branch length equal to one (these options have little effect if the tree has only three tips): <<>>= tr <- compute.brlen(stree(8, "l"), 0.1) tr$tip.label[] <- "" @ \noindent We now draw this tree using the option \code{type = "phylogram"} (first column of plots) or \code{type = "cladogram"} (second column) and different options: <>= foo <- function() { col <- "green" axis(1, col = col, col.ticks = col, col.axis = col) axis(2, col = col, col.ticks = col, col.axis = col, at = 1:Ntip(tr), las = 1) box(lty = "19") } @ <<>>= @ <>= layout(matrix(1:12, 6, 2)) par(mar = c(2, 2, 0.3, 0)) for (type in c("p", "c")) { plot(tr, type); foo() plot(tr, type, node.pos = 2); foo() plot(tr, type, FALSE); foo() plot(tr, type, FALSE, node.pos = 1, node.depth = 2); foo() plot(tr, type, FALSE, node.pos = 2); foo() plot(tr, type, FALSE, node.pos = 2, node.depth = 2); foo() } @ \noindent Some combinations of options may result in the same tree shape as shown by the last two rows of trees. For unrooted and circular trees, only the option \code{use.edge.length} has an effect on the layout and/or the scales of the axes: <>= foo <- function() { col <- "green" for (i in 1:2) axis(i, col = col, col.ticks = col, col.axis = col, las = 1) box(lty = "19") } @ <>= layout(matrix(1:4, 2, 2)) par(las = 1) plot(tr, "u"); foo() plot(tr, "u", FALSE); foo() plot(tr, "f"); foo() plot(tr, "f", FALSE); foo() @ \textbf{Step 2.} In the \pkg{graphics} package, text are printed with a fixed size, which means that whether you draw a small tree or a large tree, on a small or large device, the labels will have the same size. However, before anything is plotted or drawn on the device it is difficult to find the correspondence between this size (in inches) and the user coordinates used for the node coordinates. Therefore, the following steps are implemented to determine the limits on the $x$-axis: \renewcommand{\labelenumi}{\theenumi.} \begin{enumerate} \item Find the width of the device in inches (see Sect.~\ref{sec:overlay}). \item Find the widths of all labels in inches: if at least one of them is wider than the device, assign two thirds of the device for the branches and one third to the tip labels. (This makes sure that by default the tree is visible in the case there are very long tip labels.) \item Otherwise, the space allocated to the tip labels is increased incrementally until all labels are visible on the device. \end{enumerate} The limits on the $y$-axis are easier to determine since it depends only on the number of branches in the tree. The limits on both axes can be changed manually with the options \code{x.lim} and \code{y.lim} which take one or two values: if only one value is given this will set the rightmost or uppermost limit, respectively; if two values are given these will set both limits on the respecive axis.\footnote{These two options differ from their standard counterparts \code{xlim} and \code{ylim} which always require two values.} By default, there is no space between the tip labels and the tips of the terminal branches; however, text strings are printed with a bounding box around them making sure there is actually a small space (besides, the default font is italics making this space more visible). The option \code{label.offset} (which is 0 by default) makes possible to add an explicit space between them (this must be in user coordinates). \textbf{Step 3.} For rooted trees, only 90\textdegree\ rotations are supported using the option \code{direction}.\footnote{To have full control of the tree rotation, the option `rotate' in \LaTeX\ does the job very well.} For unrooted (\code{type = "u"}) and circular (\code{type = "fan"}) trees, full rotation is supported with the option \code{rotate.tree}. If these options are used, the tip labels are not rotated. Label rotation is controlled by other options: \code{srt}\footnote{\code{srt} is for \textit{string rotation}, not to be confused with the function \code{str} to print the \textit{structure} of an object.} for all trees, and \code{lab4ut} for unrooted trees. \textbf{Step 4.} These can be fully controlled with the options \code{x.lim} and \code{y.lim}. Note that the options \code{xlim} and \code{ylim} \emph{cannot} be used from \code{plot.phylo}. \textbf{Step 5.} If the options \code{plot = FALSE} is used, then steps 6 and 7 are not performed. \subsection{Computations}\label{sec:comput} As we can see from the previous section, a lot of computations are done before a tree is plotted. Some of these computations are performed by special functions accessible to all users, particularly the three functions used to calculate the node coordinates. First, two functions calculate ``node depths'' which are the coordinates of the nodes on the $x$-axis for rooted trees: <<>>= args(node.depth.edgelength) args(node.depth) @ \noindent Here, \code{phy} is an object of class \code{"phylo"}. The first function uses edge lengths to calculate these coordinates, while the second one calculates these coordinates proportional to the number of tips descending from each node (if \code{method = 1}), or evenly spaced (if \code{method = 2}). The third function is \code{node.height} and is used to calculate ``node heights'', the coordinates of the nodes on the $y$-axis: <<>>= args(node.height) @ \noindent If \code{clado.style = TRUE}, the node heights are calculated for a ``triangular cladogram'' (see figure above). Otherwise, by default they are calculated to fall in the middle of the vertical segments with the default \code{type = "phylogram"}.\footnote{It may be good to remind here than these segments, vertical since \code{direction = "rightwards"} is the default, are not part of the edges of the tree.} For unrooted trees, the node coordinates are calculated with the ``equal angle'' algorithm described by Felsenstein \cite{Felsenstein2004}. This is done by an internal function which arguments are: <<>>= args(unrooted.xy) @ \noindent There are three other internal functions used to plot the segments of the tree after the above calculations have been performed (steps 1--4 in the previous section): <<>>= args(phylogram.plot) args(cladogram.plot) args(circular.plot) @ \noindent Although these four functions are not formally documented, they are anyway exported because they are used by several packages outside \ape. \section{The \code{plot.phylo} Function}\label{sec:plotphylo} The \code{plot} method for \code{"phylo"} objects follows quite closely the \R\ standard practice. It has a relatively large number of arguments: the first one (\code{x}) is mandatory and is the tree to be drawn. It is thus not required to name it, so in practice the tree \code{tr} can be plotted with the command \code{plot(tr)}. All other arguments have default values: <<>>= args(plot.phylo) @ \noindent The second and third arguments are the two commonly used in practice, so they can be modified without explicitly naming them like in the above examples. Besides, \code{"cladogram"} can be abbreviated with \code{"c"}, \code{"unrooted"} with \code{"u"}, and so on. For the other arguments, it is better to name them if they are used or modified (e.g., \code{lab4ut = "a"}). \subsection{Overview on the Options} The logic of this long list of options is double: the user can modify the aspect of the tree plot, and/or use some of these options to display some data in association with the tree. Therefore, the table below group these options into three categories. The following two sections show how data can be displayed in connection to the tips or to the branches of the tree. \begin{center} \begin{tabular}{lll} \toprule Aspect of the tree & Attributes of the labels & Attributes of the edges\\ \midrule \code{type} & \code{show.tip.label} & \code{edge.color}\\ \code{use.edge.length} & \code{show.node.label} & \code{edge.width}\\ \code{node.pos} & \code{font} & \code{edge.lty}\\ \code{x.lim} & \code{tip.color}\\ \code{y.lim} & \code{cex}\\ \code{direction} & \code{adj}\\ \code{no.margin} & \code{underscore}\\ \code{root.edge} & \code{srt}\\ \code{rotate.tree} & \code{lab4ut}\\ \code{open.angle} & \code{label.offset}\\ \code{node.depth} & \code{align.tip.label}\\ \bottomrule \end{tabular} \end{center} \subsection{Connecting Data to the Tips} It is common that some data are associated with the tips of a tree: body mass, population, treatment, \dots\ The options \code{font}, \code{tip.color}, and \code{cex} make possible to show this kind of information by changing the font (normal, bold, italics, or bold-italics), the colour, or the size of the tip labels, or any combination of these. These three arguments work in the usual \R\ way: they can a vector of any length whose values are eventually recycled if this length is less than the number of tips. This makes possible to change all tips if a single value is given. For instance, consider the small primate tree where we want to show the geographic distributions stored in a factor: <<>>= geo <- factor(c("Africa", "World", "Africa")) @ \noindent We can define a color for each region and use the above factor as a numeric index vector and pass it to \code{tip.color}: \begin{center} \setkeys{Gin}{width=.5\textwidth} <>= (mycol <- c("blue", "red")[geo]) plot(mytr, tip.color = mycol) @ \end{center} The values must be in the same order than in the vector of tip labels, here \code{mytr\$tip.label}. Reordering can be done in the usual \R\ way (e.g., with \code{names} or with \code{row.names} if the data are in a data frame). This can be combined with another argument, for instance to show (relative) body size: \begin{center} \setkeys{Gin}{width=.5\textwidth} <>= par(xpd = TRUE) plot(mytr, tip.color = mycol, cex = c(1, 1, 1.5)) @ \end{center} The function \code{def} gives another way to define the above arguments given a vector of labels (\code{x}): <<>>= args(def) @ \noindent The `\code{...}' are arguments separated by commas of the form \code{\textsl{