rotl/0000755000177500001440000000000013056457107011562 5ustar deepayanusersrotl/inst/0000755000177500001440000000000013056407503012532 5ustar deepayanusersrotl/inst/CITATION0000644000177500001440000000476113055321701013671 0ustar deepayanusersc( bibentry( header = "To cite rotl in publications use:", bibtype = "Article", title = "{rotl}: an R package to interact with the Open Tree of Life data", author = personList(as.person("Francois Michonneau"), as.person("Joseph W. Brown"), as.person("David J. Winter")), journal = "Methods in Ecology and Evolution", year = "2016", volume = "7", number = "12", pages = "1476-1481", doi = "10.1111/2041-210X.12593", textVersion = paste("Michonneau, F., Brown, J. W. and Winter, D. J. (2016), rotl: an R package to interact with the Open Tree of Life data. Methods Ecol Evol. 7(12):1476-1481. doi:10.1111/2041-210X.12593") ), bibentry( header = "You may also want to cite the paper for the Open Tree of Life", bibtype = "Article", title = "Synthesis of phylogeny and taxonomy into a comprehensive tree of life", , author = personList( as.person("Cody E. Hinchliff"), as.person("Stephen A. Smith"), as.person("James F. Allman"), as.person("J. Gordon Burleigh"), as.person("Ruchi Chaudhary"), as.person("Lyndon M. Coghill"), as.person("Keith A. Crandall"), as.person("Jiabin Deng"), as.person("Bryan T. Drew"), as.person("Romina Gazis"), as.person("Karl Gude"), as.person("David S. Hibbett"), as.person("Laura A. Katz"), as.person("H. Dail Laughinghouse IV"), as.person("Emily Jane McTavish"), as.person("Peter E. Midford"), as.person("Christopher L. Owen"), as.person("Richard H. Ree"), as.person("Jonathan A. Rees"), as.person("Douglas E. Soltis"), as.person("Tiffani Williams"), as.person("Karen A. Cranston")), journal = "Proceedings of the National Academy of Sciences", year = "2015", volume = "112", number = "41", pages = "12764-12769", doi = "10.1073/pnas.1423041112", textVersion = c("Hinchliff, C. E., et al. (2015). Synthesis of phylogeny and taxonomy into a comprehensive tree of life. Proceedings of the National Academy of Sciences 112.41 (2015): 12764-12769") ) ) rotl/inst/extdata/0000755000177500001440000000000012707532206014165 5ustar deepayanusersrotl/inst/extdata/egg.csv0000644000177500001440000002111512706456642015454 0ustar deepayanusersanimal,Spp,Lndim,Measure,Neggs,Nclutches,ESr,Type,StudyID,Year,D,EN,Zr,VZr Zonotrichia_leucophrys,White-crowned sparrow,0,volume,294,73,0.140045943,stat,Mead1987,1987,3.421917808,85.91673339,0.140972438,0.012060292 Passer_domesticus,House sparrow,0.009407469,volume,149,31,0.11175203,stat,Cordero2000,2000,4.04516129,36.83413078,0.112220753,0.029555954 Serinus_canaria,Canary,0,volume,52,21,0.4967914,stat,Leitner2006,2006,2.180952381,23.84279476,0.545037117,0.047978211 Turdus_merula,European blackbird,0.021189299,volume,82,54,0.3859854,stat,Martyka2010,2010,1.414814815,57.95811518,0.40707397,0.018195675 Agelaius_phoeniceus,Red-winged blackbird,0.218316086,volume,394,106,0.07410136,raw,Weatherhead1985,1985,3.173584906,124.1498216,0.074237439,0.008254242 Quiscalus_mexicanus,Great-tailed grackle,0.281894985,mass,822,205,0.051788336,raw,Teather1989,1989,3.407804878,241.2109934,0.05183471,0.004197959 Taeniopygia_guttata,Zebra finch,-0.010812869,mass,116,24,-0.05636213,stat,Rutkowska2005,2005,4.066666667,28.52459016,-0.056421926,0.039177906 Taeniopygia_guttata,Zebra finch,-0.010812869,mass,90,20,0,stat,Rutkowska2002,2002,3.8,23.68421053,0,0.048346056 Vanellus_vanellus,Northern lapwing,-0.029825984,volume,114,32,0.03014961,stat,Lislevand2005,2005,3.05,37.37704918,0.03015875,0.029089175 Philomachus_pugnax,Ruff,0.22184875,volume,120,30,0.03462025,stat,Thuman2003,2003,3.4,35.29411765,0.034634091,0.030965392 Luscinia_svecica,Bluethroat,0,volume,102,18,-0.1468127,stat,Lifjeld2005,2005,4.733333333,21.54929577,-0.147881353,0.053910402 Sturnus_unicolor,Spotless starling,0.025305865,mass,153,34,-0.133824538,stat,Cordero2001,2001,3.8,40.26315789,-0.134632122,0.026836158 Branta_canadensis,Canada goose,0.061028185,mass,242,44,-0.006674089,stat,Leblanc1987,1987,4.6,52.60869565,-0.006674188,0.020157756 Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,132,33,-0.1910071,stat,Martinez-Padilla2007,2007,3.4,38.82352941,-0.193382195,0.027914614 Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,108,28,0.2165249,stat,Blanco2003,2003,3.285714286,32.86956522,0.220007175,0.033478894 Larus_michahellis,Yellow-legged gull,0.091409863,mass,1068,669,-0.06718163,stat,Rubolini2009,2009,1.477130045,723.0236794,-0.067282976,0.001388843 Cincloramphus_cruralis,Brown songlark,0.363356588,mass,44,17,0.1439293,stat,Isaksson2010,2010,2.270588235,19.37823834,0.144935702,0.061056628 Pavo_cristatus,Peafowl,0.119205592,mass,3313,205,0.005428755,stat,Petrie2001,2001,13.12878049,252.3463625,0.005428808,0.004010486 Parus_caeruleus,Blue tit,0.010299957,mass,192,21,0.07261821,stat,Cichon2003,2003,7.514285714,25.5513308,0.072746264,0.044343281 Larus_argentatus,Herring gull,0.049687784,mass,79,30,0.1160999,stat,Bogdanova2006,2006,2.306666667,34.24855491,0.116625804,0.03200148 Larus_fuscus,Lesser black-backed gull,0.064503231,mass,60,22,0.124354,stat,Bogdanova2005,2005,2.381818182,25.19083969,0.125001014,0.045063639 Corvus_monedula,Jackdaw,0.041392685,mass,226,70,0.2033713,stat,Arnold2003,2003,2.782857143,81.21149897,0.206246808,0.012785844 Carpodacus_mexicanus,House finch,0.002809678,volume,378,84,-0.075561895,raw,Badyaev2006,2006,3.8,99.47368421,-0.075706199,0.010365521 Carpodacus_mexicanus,House finch,0,volume,304,64,-0.072529696,raw,Badyaev2006,2006,4,76,-0.072657281,0.01369863 Sterna_hirundo,Common Tern,0.008600172,volume,427,158,0,raw,Fletcher2004,2004,2.362025316,180.7770632,0,0.005625023 Lonchura_striata,Bengalese finch,-0.021189299,mass,116,34,0.09239892,stat,Soma2007,2007,2.929411765,39.59839357,0.092663229,0.027323604 Molothrus_bonariensis,Ring-billed gull,0.078874433,mass,90,30,-0.000377536,stat,Chin2012,2012,2.6,34.61538462,-0.000377536,0.03163017 Pica_pica,Magpie,0.063358906,volume,43,8,-0.036738337,raw,Slagsvold1992,1992,4.5,9.555555556,-0.036754879,0.152542373 Corvus_corone,Hooded crow,0.049218023,volume,103,31,0.073794034,raw,Slagsvold1992,1992,2.858064516,36.03837472,0.073928423,0.030267833 Taeniopygia_guttata,Zebra finch,-0.010812869,mass,133,22,-0.03994883,stat,Pariser2012,2012,5.036363636,26.40794224,-0.039970102,0.042720543 Delichon_urbicum,House martin,0,mass,90,35,0.03076685,stat,Gil2006,2006,2.257142857,39.87341772,0.030776563,0.027119808 Xanthocephalus_xanthocephalus,Yellow-headed blackbird,0.247321812,mass,90,23,-0.088294337,raw,Richter1983,1983,3.330434783,27.02349869,-0.088524861,0.04162591 Larus_delawarensis,Ring-billed gull,0.06069784,mass,110,37,0.053696485,raw,Meathrel1987,1987,2.578378378,42.66247379,0.053748182,0.025212749 Cincloramphus_cruralis,Brown songlark,0.363356588,volume,95,40,-0.3293106,stat,Magrath2003,2003,2.1,45.23809524,-0.342054801,0.02367531 Larus_ridibundus,Black-headed gull,0,mass,60,20,0.1391671,stat,Groothuis2006,2006,2.6,23.07692308,0.140076126,0.049808429 Anseranas_semipalmata,Magpie goose,0.125672077,mass,60,16,-0.110406595,raw,Whitehead1990,1990,3.2,18.75,-0.11085851,0.063492063 Sterna_hirundo,Common Tern,0,volume,108,40,0.073872636,raw,Gonzalez-Solis2005,2005,2.36,45.76271186,0.074007456,0.023384859 Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,80,16,0.097815848,raw,Wu2010,2010,4.2,19.04761905,0.098129617,0.06231454 Larus_delawarensis,Shiny cowbird,0.035472318,volume,90,90,-0.07294093,stat,Tuero2012,2012,1,90,-0.073070702,0.011494253 Pygoscelis_antarcticus,Chinstrap penguin,0.058509856,volume,264,132,0.03094903,stat,Fargallo2006,2006,1.8,146.6666667,0.030958917,0.006960557 Phoebastria_irrorata,Waved albatross,0.091157684,volume,224,224,-0.088469783,stat,Awkerman2007,2007,1,224,-0.088701688,0.004524887 Passer_domesticus,House sparrow,0.009407469,volume,77,19,0.05470045,stat,Wetzel2012,2012,3.442105263,22.37003058,0.054755105,0.051626145 Sula_nebouxii,Blue-footed booby,-0.147287056,mass,76,56,-0.2611557,stat,D'Alba2007,2007,1.285714286,59.11111111,-0.267348297,0.017821782 Stercorarius_parasiticus,Parasitic jaeger,-0.063477845,volume,534,267,0.1397023,stat,Janssen2006,2006,1.8,296.6666667,0.140621937,0.003405221 Ficedula_albicollis,Collared flycatcher,0.013679697,volume,1162,198,0.04804496,stat,Bowers2013,2013,4.894949495,237.3875361,0.048081979,0.004266438 Sterna_dougallii,Roseate tern,0,mass,440,146,-0.062868359,raw,Szczys2005,2005,2.610958904,168.5204617,-0.062951384,0.006041549 Centrocercus_urophasianus,Greater sage-grouse,0.261995252,volume,146,20,0.06614164,stat,Atamian2010,2010,6.04,24.17218543,0.066238344,0.04723178 Phasianus_colchicus,Ring-necked pheasant,0.140492874,mass,106,15,0.4531655,stat,Rubolini2007,2007,5.853333333,18.10933941,0.488676681,0.06618423 Taeniopygia_guttata,Zebra finch,-0.010812869,mass,43,15,0,raw,Clotfelter1996,1996,2.493333333,17.2459893,0,0.070195195 Larus_fuscus,Lesser black-backed gull,-0.010812869,volume,304,101,-0.00990453,raw,Bradbury1999,1999,2.607920792,116.5679575,-0.009904854,0.008805301 Sturnus_vulgaris,European starling,0.01616166,mass,354,69,0.04996099,stat,Love2011,2011,4.304347826,82.24242424,0.050002622,0.012619503 Erythrura_gouldiae,Gouldian finch,0,volume,1473,324,-0.019559388,stat,Pryke2009,2009,3.837037037,383.8899614,-0.019561883,0.00262543 Larus_ridibundus,Black-headed gull,0,volume,147,49,0.05453834,stat,Lezalova2005,2005,2.6,56.53846154,0.05459251,0.018678161 Aythya_ferina,Common pochard,0,volume,185,26,0.1957707,stat,Lezalova2013a,2014,5.892307692,31.39686684,0.198330883,0.035215153 Aythya_fuligula,Tufted duck,0.026629385,volume,46,7,-0.04923335,stat,Lezalova2013b,2014,5.457142857,8.429319372,-0.049273187,0.184185149 Aythya_affinis,Lesser scaup,0.031791834,volume,38,5,-0.143657133,raw,Dawson1996,1996,6.28,6.050955414,-0.144657788,0.32776618 Chen_caerulescens,Lesser snow goose,0.037500891,mass,85,22,-0.089455106,raw,Ankney1980,1980,3.290909091,25.82872928,-0.089694871,0.043804453 Falco_sparverius,American kestrels,-0.036212173,volume,170,34,0.093153037,raw,Anderson1997,1997,4.2,40.47619048,0.093423893,0.026683609 Columba_livia,Domestic pigeon,0,mass,63,16,0.3301501,stat,Pike2005,2005,3.35,18.80597015,0.342996707,0.063267233 Zonotrichia_leucophrys,White-crowned sparrow,0,mass,38,11,0.06348392,stat,Bonier2007,2007,2.963636364,12.82208589,0.063569411,0.101811368 Quiscalus_major,Boat-tailed grackle,0.268544242,mass,122,41,-0.056059589,raw,Bancroft1984,1984,2.580487805,47.2778828,-0.056118426,0.022584639 Cuculus_canorus,Common cuckoo,0.042879996,volume,71,38,0.1527316,stat,Fossoy2012,2012,1.694736842,41.89440994,0.153936091,0.025710636 Eudyptes_chrysocome,Rockhopper penguin,0.037301411,mass,213,194,0.1191936,stat,Poisbleau2010,2010,1.078350515,197.5239006,0.119762927,0.005140756 Larus_michahellis,Yellow-legged gull,0.091409863,volume,124,48,-0.003408011,stat,Perez2006,2006,2.266666667,54.70588235,-0.003408024,0.019340159 Gallus_gallus,Red Junglefowl,0.080943092,volume,220,71,-0.08467843,stat,Parker2005,2005,2.678873239,82.12407992,-0.084881699,0.012638378 rotl/inst/extdata/protist_mutation_rates.csv0000644000177500001440000000053612547574666021552 0ustar deepayanusersspecies,mu,pop.size,genome.size Tetrahymena thermophila,7.61E-012,1.12E+008,1.04E+008 Paramecium tetraurelia,1.94E-011,1.24E+008,7.20E+007 Chlamydomonas reinhardtii,2.08E-010,1.00E+008,1.12E+008 Dictyostelium discoideum,2.9E-011,7.40E+006,3.40E+007 Saccharomyces cerevisiae,3.3E-010,1.00E+008,1.25E+008 Saccharomyces pombe,2E-010,1.00E+007,1.25E+008 rotl/inst/doc/0000755000177500001440000000000013056407503013277 5ustar deepayanusersrotl/inst/doc/how-to-use-rotl.R0000644000177500001440000000654313056407503016417 0ustar deepayanusers## ------------------------------------------------------------------------ library(rotl) taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus") resolved_names <- tnrs_match_names(taxa) ## ------------------------------------------------------------------------ resolved_names <- tnrs_match_names(taxa, context_name = "Animals") ## ---- fig.width=7, fig.height=4------------------------------------------ my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id) plot(my_tree, no.margin=TRUE) ## ------------------------------------------------------------------------ taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus") resolved_names <- tnrs_match_names(taxa) resolved_names inspect(resolved_names, taxon_name = "diadema") ## ------------------------------------------------------------------------ resolved_names <- update(resolved_names, taxon_name = "diadema", new_row_number = 2) ## we could also have used the ott_id to replace this taxon: ## resolved_names <- update(resolved_names, taxon_name = "diadema", ## new_ott_id = 4930522) ## ------------------------------------------------------------------------ diadema_info <- taxonomy_taxon_info(631176) tax_rank(diadema_info) synonyms(diadema_info) tax_name(diadema_info) ## ------------------------------------------------------------------------ diadema_tax_tree <- taxonomy_subtree(631176) diadema_tax_tree ## ---- fig.width=7, fig.height=4------------------------------------------ mono_id <- tnrs_match_names("Monotremata") mono_tree <- tol_subtree(ott_id = ott_id(mono_id)) plot(mono_tree) ## ------------------------------------------------------------------------ furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia") furry_ids <- furry_studies$study_ids ## ------------------------------------------------------------------------ furry_meta <- get_study_meta("pg_2550") get_publication(furry_meta) ## The citation for the source of the study get_tree_ids(furry_meta) ## This study has 10 trees associated with it candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL ## ---- eval=FALSE--------------------------------------------------------- # get_study_tree(study_id="pg_710", tree_id="tree1277", # tip_label='ott_taxon_name', file = "/tmp/tree.tre", # file_format = "newick") # tr <- ape::read.tree(file = "/tmp/tree.tre") ## ------------------------------------------------------------------------ giant_squid <- tnrs_match_names("Architeuthis") tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE)) ## ------------------------------------------------------------------------ turducken <- c("Meleagris", "Anas", "Gallus", "Sus") taxa <- tnrs_match_names(turducken, context="Animals") taxa ## ---- error=TRUE--------------------------------------------------------- tr <- tol_induced_subtree(ott_id(taxa)) ## ------------------------------------------------------------------------ in_tree <- is_in_tree(ott_id(taxa)) in_tree tr <- tol_induced_subtree(ott_id(taxa)[in_tree]) ## ---- fig.width=7, fig.height=4------------------------------------------ turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa") taxa <- tnrs_match_names(turducken_spp, context="Animals") tr <- tol_induced_subtree(ott_id(taxa)) plot(tr) rotl/inst/doc/meta-analysis.R0000644000177500001440000000435013056407503016173 0ustar deepayanusers## ----egg_data, cache=TRUE------------------------------------------------ library(rotl) if (require(readxl) && require(fulltext)) { doi <- "10.1111/jeb.12282" xl_file <- ft_get_si(doi, 1, save.name="egg.xls") egg_data <- read_excel(xl_file) } else { egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl")) } head(egg_data) ## ----eggs_in_a_funnel, fig.width=6, fig.height=3------------------------- plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16, ylab="Effect size (Zr)", xlab="Precision (1/SE)", main="Effect sizes for sex bias in egg size among 51 brid species" ) ## ---- clean_eggs--------------------------------------------------------- egg_data <- as.data.frame(egg_data) egg_data$animal <- tolower(egg_data$animal) ## ---- birds, cache=TRUE-------------------------------------------------- taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals") head(taxa) ## ----bird_map------------------------------------------------------------ taxon_map <- structure(taxa$search_string, names=taxa$unique_name) ## ----odd_duck------------------------------------------------------------ taxon_map["Anser caerulescens"] ## ----birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'------ tr <- tol_induced_subtree(taxa$ott_id) plot(tr, show.tip.label=FALSE) ## ----tip_lab------------------------------------------------------------- tr$tip.label[1:4] ## ----clean_tips---------------------------------------------------------- otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE) tr$tip.label <- taxon_map[ otl_tips ] ## ----remove_nodes-------------------------------------------------------- tr$node.label <- NULL ## ----model--------------------------------------------------------------- library(MCMCglmm, quiet=TRUE) set.seed(123) pr<-list(R=list(V=1,nu=0.002), G=list(G1=list(V=1,nu=0.002)) ) model <- MCMCglmm(Zr~1,random=~animal, pedigree=tr, mev=egg_data$VZr, prior=pr, data=egg_data, verbose=FALSE) ## ----PhyH---------------------------------------------------------------- var_comps <- colMeans(model$VCV ) var_comps["animal"] / sum(var_comps) rotl/inst/doc/data_mashups.R0000644000177500001440000000435013056407503016075 0ustar deepayanusers## ---- data--------------------------------------------------------------- csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl") mu <- read.csv(csv_path, stringsAsFactors=FALSE) mu ## ---- context------------------------------------------------------------ library(rotl) tnrs_contexts() ## ---- match-------------------------------------------------------------- taxon_search <- tnrs_match_names(names=mu$species, context_name="All life") knitr::kable(taxon_search) ## ---- munge-------------------------------------------------------------- mu$ott_name <- taxon_search$unique_name mu$ott_id <- taxon_search$ott_id ## ---- properties--------------------------------------------------------- studies_properties() ## ----taxon_count--------------------------------------------------------- studies_find_trees(property="ot:ottId", value="180195") ## ---- all_taxa_count----------------------------------------------------- hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE) sapply(hits, function(x) sum(x[["n_matched_trees"]])) ## ----subtree, fig.width=7, fig.height=4--------------------------------- tr <- tol_induced_subtree(ott_ids=mu$ott_id) plot(tr) ## ---- match_names-------------------------------------------------------- mu$ott_name[1] tr$tip.label[4] ## ---- sub---------------------------------------------------------------- tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE) tr$tip.label %in% mu$ott_name ## ----phylobase----------------------------------------------------------- library(phylobase) mu_numeric <- mu[,c("mu", "pop.size", "genome.size")] rownames(mu_numeric) <- mu$ott_name tree_data <- phylo4d(tr, mu_numeric) ## ---- fig.width=7, fig.height=5----------------------------------------- plot(tree_data) ## ------------------------------------------------------------------------ extra_data <- study_external_IDs("pg_1980") extra_data ## ------------------------------------------------------------------------ library(rentrez) seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta") cat(seqs) ## ------------------------------------------------------------------------ Tt_ids <- taxon_external_IDs(mu$ott_id[2]) Tt_ids rotl/inst/doc/data_mashups.Rmd0000644000177500001440000001764313056407503016427 0ustar deepayanusers--- title: "Connecting data to Open Tree trees" author: "David Winter" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Connecting data to Open Tree trees} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Combining data from OToL and other sources. One of the major goals of `rotl` is to help users combine data from other sources with the phylogenetic trees in the Open Tree database. This examples document describes some of the ways in whih a user might connect data to trees from Open Tree. ## Get Open Tree IDs to match your data. Let's say you have a dataset where each row represents a measurement taken from one species, and your goal is to put these measurements in some phylogenetic context. Here's a small example: the best estimate of the mutation rate for a set of unicellular Eukaryotes along with some other property of those species which might explain the mutation rate: ```{r, data} csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl") mu <- read.csv(csv_path, stringsAsFactors=FALSE) mu ``` If we want to get a tree for these species we need to start by finding the unique ID for each of these species in the Open Tree database. We can use the Taxonomic Name Resolution Service (`tnrs`) functions to do this. Before we do that we should see if any of the taxonomic contexts, which can be used to narrow a search and avoid conflicts between different codes, apply to our group of species: ```{r, context} library(rotl) tnrs_contexts() ``` Hmm, none of those groups contain all of our species. In this case we can search using the `All life` context and the function `tnrs_match_names`: ```{r, match} taxon_search <- tnrs_match_names(names=mu$species, context_name="All life") knitr::kable(taxon_search) ``` Good, all of our species are known to Open Tree. Note, though, that one of the names is a synonym. _Saccharomyces pombe_ is older name for what is now called _Schizosaccharomyces pombe_. As the name suggests, the Taxonomic Name Resolution Service is designed to deal with these problems (and similar ones like misspellings), but it is always a good idea to check the results of `tnrs_match_names` closely to ensure the results are what you expect. In this case we have a good ID for each of our species so we can move on. Before we do that, let's ensure we can match up our original data to the Open Tree names and IDs by adding them to our `data.frame`: ```{r, munge} mu$ott_name <- taxon_search$unique_name mu$ott_id <- taxon_search$ott_id ``` ## Find a tree with your taxa Now let's find a tree. There are two possible options here: we can search for published studies that include our taxa or we can use the 'synthetic tree' from Open Tree. We can try both approaches. ### Published trees Before we can search for published studies or trees, we should check out the list of properties we can use to perform such searches: ```{r, properties} studies_properties() ``` We have `ottIds` for our taxa, so let's use those IDs to search for trees that contain them. Starting with our first species _Tetrahymena thermophila_ we can use `studies_find_trees` to do this search. ```{r taxon_count} studies_find_trees(property="ot:ottId", value="180195") ``` Well... that's not very promising. We can repeat that process for all of the IDs to see if the other species are better represented. ```{r, all_taxa_count} hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE) sapply(hits, function(x) sum(x[["n_matched_trees"]])) ``` OK, most of our species are not in any of the published trees available. You can help fix this sort of problem by [making sure you submit your published trees to Open Tree](https://tree.opentreeoflife.org/curator). ### A part of the synthesis tree Thankfully, we can still use the complete Tree of Life made from the combined results of all of the published trees and taxonomies that go into Open Tree. The function `tol_induced_subtree` will fetch a tree relating a set of IDs. Using the default arguments you can get a tree object into your R session: ```{r subtree, fig.width=7, fig.height=4} tr <- tol_induced_subtree(ott_ids=mu$ott_id) plot(tr) ``` ### Connect your data to the tips of your tree Now we have a tree for of our species, how can we use the tree and the data together? The package `phylobase` provide an object class called `phylo4d`, which is designed to represent a phylogeny and data associated with its tips. In oder to get our tree and data into one of these objects we have to make sure the labels in the tree and in our data match exactly. That's not quite the case at the moment (tree labels have underscores and IDs appended): ```{r, match_names} mu$ott_name[1] tr$tip.label[4] ``` `rotl` provides a convienence function `strip_ott_ids` to deal with these. ```{r, sub} tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE) tr$tip.label %in% mu$ott_name ``` Ok, now the tips are together we can make a new dataset. The `phylo4d()` functions matches tip labels to the row names of a `data.frame`, so let's make a new dataset that contains just the relevant data and has row names to match the tree ```{r phylobase} library(phylobase) mu_numeric <- mu[,c("mu", "pop.size", "genome.size")] rownames(mu_numeric) <- mu$ott_name tree_data <- phylo4d(tr, mu_numeric) ``` And now we can plot the data and the tree together ```{r, fig.width=7, fig.height=5} plot(tree_data) ``` ##Find external data associated with studies, trees and taxa from Open Tree In the above example we looked for a tree that related species in another dataset. Now we will go the other way, and try to find data associated with Open Tree records in other databases. ### Get external data from a study Let's imagine you were interested in extending or reproducing the results of a published study. If that study is included in Open Tree you can find it via `studies_find_studies` or `studies_find_trees` and retrieve the published trees with `get_study`. `rotl` will also help you find external. The function `study_external_IDs` retrieves the DOI for a given study, and uses that to gather some more data: ```{r} extra_data <- study_external_IDs("pg_1980") extra_data ``` Here the returned object contains an `external_data_url` (in this case a link to the study in Treebase), a pubmed ID for the paper and a vector IDs for the NCBI's nuleotide database. The packages `treebase` and `rentrez` provide functions to make use of these IDs within R. As an example, let's use `rentrez` to download the first two DNA seqences and print them. ```{r} library(rentrez) seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta") cat(seqs) ``` You could further process these sequences in R with the function `read.dna` from `ape` or save them to disk by specifying a file name with `cat`. ### Find a OTT taxon in another taxonomic database It is also possible map an Open Tree taxon to a record in another taxonomic database. For instance, if we wanted to search for data about one of the tips of the sub-tree we fetched in the example above we could do so using `taxon_external_IDs`: ```{r} Tt_ids <- taxon_external_IDs(mu$ott_id[2]) Tt_ids ``` A user could then use `rgbif` to find locality records using the gbif ID or `rentrez` to get genetic or bibliometric data about from the NCBI's databases. ## What next The demonstration gets you to the point of visualizing your data in a phylogenetic context. But there's a lot more you do with this sort of data in R. For instance, you could use packages like `ape`, `caper`, `phytools` and `mcmcGLMM` to perform phylogenetic comparative analyses of your data. You could gather more data on your species using packages that connect to trait databases like `rfishbase`, `AntWeb` or `rnpn` which provides data from the US National Phenology Network. You could also use `rentrez` to find genetic data for each of your species, and use that data to generate branch lengths for the phylogeny. rotl/inst/doc/meta-analysis.html0000644000177500001440000012107513056407503016742 0ustar deepayanusers Using the Open Tree synthesis in a comparative analysis

Using the Open Tree synthesis in a comparative analysis

David Winter

2017-03-03

Phylogenetic Comparative Methods

The development of phylogenetic comparative methods has made phylogenies and important source of data in fields as diverse as ecology, genomic and medicine. Comparative methods can be used to investigate patterns in the evolution of traits or the diversification of lineages. In other cases a phylogeny is treated as a “nuisance parameter”, allowing with the autocorrelation created by the shared evolutionary history of the different species included to be controlled for.

In many cases finding a tree that relates the species for which trait data are available is a rate-limiting step in such comparative analyses. Here we show how the synthetic tree provided by Open Tree of Life (and made available in R via rotl) can help to fill this gap.

A phylogenetic meta-analysis

To demonstrate the use of rotl in a comparative analysis, we will partially reproduce the results of Rutkowska et al 2014. Very briefly, this study is a meta-analysis summarising the results of multiple studies testing for systematic differences in the size of eggs which contain male and female offspring. Such a difference might mean that birds invest more heavily in one sex than the other.

Because this study involves data from 51 different species, Rutkowska et al used a phylogenetic comparative approach to account for the shared evolutionary history among some of the studied-species.

Gather the data

If we are going to reproduce this analysis, we will first need to gather the data. Thankfully, the data is available as supplementary material from the publisher’s website. We can collect the data from using fulltext (with the papers DOI as input) and read it into memory with gdata:

library(rotl)

if (require(readxl) && require(fulltext)) {
    doi <- "10.1111/jeb.12282"
    xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
    egg_data <- read_excel(xl_file)
} else {
    egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
}
## Loading required package: readxl
## Loading required package: fulltext
head(egg_data)
## # A tibble: 6 × 14
##                   animal                   Spp       Lndim Measure Neggs
##                    <chr>                 <chr>       <dbl>   <chr> <dbl>
## 1 Zonotrichia_leucophrys White-crowned sparrow 0.000000000  volume   294
## 2      Passer_domesticus         House sparrow 0.009407469  volume   149
## 3        Serinus_canaria                Canary 0.000000000  volume    52
## 4          Turdus_merula    European blackbird 0.021189299  volume    82
## 5    Agelaius_phoeniceus  Red-winged blackbird 0.218316086  volume   394
## 6    Quiscalus_mexicanus  Great-tailed grackle 0.281894985    mass   822
## # ... with 9 more variables: Nclutches <dbl>, ESr <dbl>, Type <chr>,
## #   StudyID <chr>, Year <dbl>, D <dbl>, EN <dbl>, Zr <dbl>, VZr <dbl>

The most important variable in this dataset is Zr, which is a normalized effect size for difference in size between eggs that contain males and females. Values close to zero come from studies that found the sex of an egg’s inhabitant had little effect in its size, while large positive or negative values correspond to studies with substantial sex biases (towards males and females respectively). Since this is a meta-analysis we should produce the classic funnel plot with effects-size on the y-axis and precision (the inverse of the sample standard error) on the x-axis. Here we calculate precision from the sample variance (Vzr):

plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
     ylab="Effect size (Zr)",
     xlab="Precision (1/SE)",
     main="Effect sizes for sex bias in egg size among 51 brid species" )

In order to use this data later on we need to first convert it to a standard data.frame. We can also convert the animal column (the species names) to lower case which will make it easier to match names later on:

egg_data <- as.data.frame(egg_data)
egg_data$animal <- tolower(egg_data$animal)

Find the species in OTT

We can use the OTL synthesis tree to relate these species. To do so we first need to find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the Taxonomic Name Resolution Service function tnrs_match_names:

taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
head(taxa)
##            search_string            unique_name approximate_match ott_id
## 1 zonotrichia_leucophrys Zonotrichia leucophrys              TRUE 265553
## 2      passer_domesticus      Passer domesticus              TRUE 745175
## 3        serinus_canaria        Serinus canaria              TRUE 464865
## 4          turdus_merula          Turdus merula              TRUE 568572
## 5    agelaius_phoeniceus    Agelaius phoeniceus              TRUE 226605
## 6    quiscalus_mexicanus    Quiscalus mexicanus              TRUE 743411
##   is_synonym          flags number_matches
## 1      FALSE                             1
## 2      FALSE                             1
## 3      FALSE SIBLING_HIGHER              2
## 4      FALSE                             1
## 5      FALSE                             2
## 6      FALSE                             1

All of these species are in OTT, but a few of them go by different names in the Open Tree than we have in our data set. Because the tree rotl fetches will have Open Tree names, we need to create a named vector that maps the names we have for each species to the names Open Tree uses for them:

taxon_map <- structure(taxa$search_string, names=taxa$unique_name)

Now we can use this map to retrieve “data set names” from “OTT names”:

taxon_map["Anser caerulescens"]
##  Anser caerulescens 
## "chen_caerulescens"

Get a tree

Now we can get the tree. There are really too many tips here to show nicely, so we will leave them out of this plot

tr <- tol_induced_subtree(taxa$ott_id)
plot(tr, show.tip.label=FALSE)

There are a few things to note here. First, the tree has not branch lengths. At present this is true for the whole of the Open Tree synthetic tree. Some comparative methods require either branch lengths or an ultrametric tree. Before you can use one of those methods you will need to get a tree with branch lengths. You could try looking for published trees made available by the Open Tree with studies_find_trees. Alternatively, you could estimate branch lengths from the toplogy of a phylogeny returned by tol_induced_subtree, perhaps by downloading DNA sequences from the NCBI with rentrez or “hanging” the tree on nodes of known-age using penalized likelihood method in ape::chronos. In this case, we will use only the topology of the tree as input to our comparative analysis, so we can skip these steps.

Second, the tip labels contain OTT IDs, which means they will not perfectly match the species names in our dataset or the taxon map that we created earlier:

tr$tip.label[1:4]
## [1] "Erythrura_gouldiae_ott465909"    "Taeniopygia_guttata_ott708327"  
## [3] "Lonchura_striata_ott306760"      "Molothrus_bonariensis_ott213451"

Finally, the tree contains node labels for those nodes that match a higher taxonomic group, and empty character vectors ("") for all other nodes. Some comparative methods either do no expect node labels at all, or require all labeled nodes to have a unique name (meaning multiple “empty” labels will cause and error).

We can deal with all these details easily. rotl provides the convenience function strip_ott_ids to remove the extra information from the tip labels. With the IDs removed, we can use our taxon map to replace the tip labels in the tree with the species names from dataset.

otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label <- taxon_map[ otl_tips ]

Finally, we can remove the node labels by setting the node.label attribute of the tree to NULL.

tr$node.label <- NULL

Perform the meta-analysis

Now we have data and a tree, and we know the names in the tree match the ones in the data. It’s time to do the comparative analysis. Rutkowska et al. used MCMCglmm, a Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis, and we will do the same. Of course, to properly analyse these data you would take some care in deciding on the appropriate priors to use and inspect the results carefully. In this case, we are really interested in using this as a demonstration, so we will just run a simple model.

Specifically we sill fit a model where the only variable that might explain the values of Zr is the random factor animal, which corresponds to the phylogenetic relationships among species. We also provide Zvr as the measurement error variance, effectively adding extra weight to the results of more powerful studies. Here’s how we specify and fit that model with MCMCglmm:

library(MCMCglmm, quiet=TRUE)
## 
## Attaching package: 'ape'
## The following object is masked from 'package:phylobase':
## 
##     edges
set.seed(123)

pr<-list(R=list(V=1,nu=0.002),
             G=list(G1=list(V=1,nu=0.002))
)

model <- MCMCglmm(Zr~1,random=~animal,
                       pedigree=tr,
                       mev=egg_data$VZr,
                       prior=pr,
                       data=egg_data,
                       verbose=FALSE)
## Warning in inverseA(pedigree = pedigree, scale = scale, nodes = nodes): no
## branch lengths: compute.brlen from ape has been used

Now that we have a result we can find out how much phylogenetic signal exists for sex-biased differences in egg-size. In a multi-level model we can use variance components to look at this, specifically the proportion of the total variance that can be explained by phylogeny is called the phylogenetic reliability, H. Let’s calculate the H for this model:

var_comps <- colMeans(model$VCV )
var_comps["animal"] / sum(var_comps)
##      animal 
## 0.003066892

It appears there is almost no phylogenetic signal to the data. The relationships among species explain much less that one percent of the total variance in the data. If you were wondering, Rutkowska et al. report a similar result, even after adding more predictors to their model most of the variance in Zr was left unexplained.

What other comparative methods can I use in R?

Here we have demonstrated just one comparative analysis that you might do in R. There are an ever-growing number of packages that allow an ever-growing number of analysis to performed in R. Some “classics” like ancestral state reconstruction, phylogenetic independent contrasts and lineage through time plots are implemented in ape. Packages like phytools, caper and diversitree provide extensions to these methods. The CRAN Phylogenetics Taskview gives a good idea of the diversity of packages and analyses that can be completed in R.

rotl/inst/doc/meta-analysis.Rmd0000644000177500001440000002240713056407503016517 0ustar deepayanusers--- title: "Using the Open Tree synthesis in a comparative analysis" author: "David Winter" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Using the Open Tree synthesis in a comparative analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Phylogenetic Comparative Methods The development of phylogenetic comparative methods has made phylogenies and important source of data in fields as diverse as ecology, genomic and medicine. Comparative methods can be used to investigate patterns in the evolution of traits or the diversification of lineages. In other cases a phylogeny is treated as a "nuisance parameter", allowing with the autocorrelation created by the shared evolutionary history of the different species included to be controlled for. In many cases finding a tree that relates the species for which trait data are available is a rate-limiting step in such comparative analyses. Here we show how the synthetic tree provided by Open Tree of Life (and made available in R via `rotl`) can help to fill this gap. ## A phylogenetic meta-analysis To demonstrate the use of `rotl` in a comparative analysis, we will partially reproduce the results of [Rutkowska _et al_ 2014](dx.doi.org/10.1111/jeb.12282). Very briefly, this study is a meta-analysis summarising the results of multiple studies testing for systematic differences in the size of eggs which contain male and female offspring. Such a difference might mean that birds invest more heavily in one sex than the other. Because this study involves data from 51 different species, Rutkowska _et al_ used a phylogenetic comparative approach to account for the shared evolutionary history among some of the studied-species. ### Gather the data If we are going to reproduce this analysis, we will first need to gather the data. Thankfully, the data is available as supplementary material from the publisher's website. We can collect the data from using `fulltext` (with the papers DOI as input) and read it into memory with `gdata`: ```{r egg_data, cache=TRUE} library(rotl) if (require(readxl) && require(fulltext)) { doi <- "10.1111/jeb.12282" xl_file <- ft_get_si(doi, 1, save.name="egg.xls") egg_data <- read_excel(xl_file) } else { egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl")) } head(egg_data) ``` The most important variable in this dataset is `Zr`, which is a [normalized effect size](https://en.wikipedia.org/wiki/Fisher_transformation) for difference in size between eggs that contain males and females. Values close to zero come from studies that found the sex of an egg's inhabitant had little effect in its size, while large positive or negative values correspond to studies with substantial sex biases (towards males and females respectively). Since this is a meta-analysis we should produce the classic [funnel plot](https://en.wikipedia.org/wiki/Funnel_plot) with effects-size on the y-axis and precision (the inverse of the sample standard error) on the x-axis. Here we calculate precision from the sample variance (`Vzr`): ```{r eggs_in_a_funnel, fig.width=6, fig.height=3} plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16, ylab="Effect size (Zr)", xlab="Precision (1/SE)", main="Effect sizes for sex bias in egg size among 51 brid species" ) ``` In order to use this data later on we need to first convert it to a standard `data.frame`. We can also convert the `animal` column (the species names) to lower case which will make it easier to match names later on: ```{r, clean_eggs} egg_data <- as.data.frame(egg_data) egg_data$animal <- tolower(egg_data$animal) ``` ### Find the species in OTT We can use the OTL synthesis tree to relate these species. To do so we first need to find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the Taxonomic Name Resolution Service function `tnrs_match_names`: ```{r, birds, cache=TRUE} taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals") head(taxa) ``` All of these species are in OTT, but a few of them go by different names in the Open Tree than we have in our data set. Because the tree `rotl` fetches will have Open Tree names, we need to create a named vector that maps the names we have for each species to the names Open Tree uses for them: ```{r bird_map} taxon_map <- structure(taxa$search_string, names=taxa$unique_name) ``` Now we can use this map to retrieve "data set names" from "OTT names": ```{r odd_duck} taxon_map["Anser caerulescens"] ``` ### Get a tree Now we can get the tree. There are really too many tips here to show nicely, so we will leave them out of this plot ```{r birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'} tr <- tol_induced_subtree(taxa$ott_id) plot(tr, show.tip.label=FALSE) ``` There are a few things to note here. First, the tree has not branch lengths. At present this is true for the whole of the Open Tree synthetic tree. Some comparative methods require either branch lengths or an ultrametric tree. Before you can use one of those methods you will need to get a tree with branch lengths. You could try looking for published trees made available by the Open Tree with `studies_find_trees`. Alternatively, you could estimate branch lengths from the toplogy of a phylogeny returned by `tol_induced_subtree`, perhaps by downloading DNA sequences from the NCBI with `rentrez` or "hanging" the tree on nodes of known-age using penalized likelihood method in `ape::chronos`. In this case, we will use only the topology of the tree as input to our comparative analysis, so we can skip these steps. Second, the tip labels contain OTT IDs, which means they will not perfectly match the species names in our dataset or the taxon map that we created earlier: ```{r tip_lab} tr$tip.label[1:4] ``` Finally, the tree contains node labels for those nodes that match a higher taxonomic group, and empty character vectors (`""`) for all other nodes. Some comparative methods either do no expect node labels at all, or require all labeled nodes to have a unique name (meaning multiple "empty" labels will cause and error). We can deal with all these details easily. `rotl` provides the convenience function `strip_ott_ids` to remove the extra information from the tip labels. With the IDs removed, we can use our taxon map to replace the tip labels in the tree with the species names from dataset. ```{r clean_tips} otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE) tr$tip.label <- taxon_map[ otl_tips ] ``` Finally, we can remove the node labels by setting the `node.label` attribute of the tree to `NULL`. ```{r remove_nodes} tr$node.label <- NULL ``` ### Perform the meta-analysis Now we have data and a tree, and we know the names in the tree match the ones in the data. It's time to do the comparative analysis. Rutkowska _et al_. used `MCMCglmm`, a Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis, and we will do the same. Of course, to properly analyse these data you would take some care in deciding on the appropriate priors to use and inspect the results carefully. In this case, we are really interested in using this as a demonstration, so we will just run a simple model. Specifically we sill fit a model where the only variable that might explain the values of `Zr` is the random factor `animal`, which corresponds to the phylogenetic relationships among species. We also provide `Zvr` as the measurement error variance, effectively adding extra weight to the results of more powerful studies. Here's how we specify and fit that model with `MCMCglmm`: ```{r model} library(MCMCglmm, quiet=TRUE) set.seed(123) pr<-list(R=list(V=1,nu=0.002), G=list(G1=list(V=1,nu=0.002)) ) model <- MCMCglmm(Zr~1,random=~animal, pedigree=tr, mev=egg_data$VZr, prior=pr, data=egg_data, verbose=FALSE) ``` Now that we have a result we can find out how much phylogenetic signal exists for sex-biased differences in egg-size. In a multi-level model we can use variance components to look at this, specifically the proportion of the total variance that can be explained by phylogeny is called the phylogenetic reliability, _H_. Let's calculate the _H_ for this model: ```{r PhyH} var_comps <- colMeans(model$VCV ) var_comps["animal"] / sum(var_comps) ``` It appears there is almost no phylogenetic signal to the data. The relationships among species explain much less that one percent of the total variance in the data. If you were wondering, Rutkowska _et al_. report a similar result, even after adding more predictors to their model most of the variance in `Zr` was left unexplained. ## What other comparative methods can I use in R? Here we have demonstrated just one comparative analysis that you might do in R. There are an ever-growing number of packages that allow an ever-growing number of analysis to performed in R. Some "classics" like ancestral state reconstruction, phylogenetic independent contrasts and lineage through time plots are implemented in `ape`. Packages like `phytools`, `caper` and `diversitree` provide extensions to these methods. The [CRAN Phylogenetics Taskview](https://CRAN.R-project.org/view=Phylogenetics) gives a good idea of the diversity of packages and analyses that can be completed in R. rotl/inst/doc/how-to-use-rotl.html0000644000177500001440000017543013056407503017164 0ustar deepayanusers How to use rotl?

How to use rotl?

François Michonneau

2017-03-03

rotl provides an interface to the Open Tree of Life (OTL) API and allows users to query the API, retrieve parts of the Tree of Life and integrate these parts with other R packages.

The OTL API provides services to access:

In rotl, each of these services correspond to functions with different prefixes:

Service rotl prefix
Tree of Life tol_
TNRS tnrs_
Taxonomy taxonomy_
Studies studies_

rotl also provides a few other functions and methods that can be used to extract relevant information from the objects returned by these functions.

Demonstration of a basic workflow

The most common use for rotl is probably to start from a list of species and get the relevant parts of the tree for these species. This is a two step process:

  1. the species names need to be matched to their ott_id (the Open Tree Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)
  2. these ott_id will then be used to retrieve the relevant parts of the Tree of Life.

Step 1: Matching taxonomy to the ott_id

Let’s start by doing a search on a diverse group of taxa: a tree frog (genus Hyla), a fish (genus Salmo), a sea urchin (genus Diadema), and a nautilus (genus Nautilus).

library(rotl)
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)

It’s always a good idea to check that the resolved names match what you intended:

search_string unique_name approximate_match ott_id is_synonym flags number_matches
hyla Hyla FALSE 1062216 FALSE 1
salmo Salmo FALSE 982359 FALSE 1
diadema Diadema (genus in Nucletmycea) FALSE 4930522 FALSE 3
nautilus Nautilus FALSE 616358 FALSE 1

The column unique_name sometimes indicates the higher taxonomic level associated with the name. The column number_matches indicates the number of ott_id that corresponds to a given name. In this example, our search on Diadema returns 2 matches, and the one returned by default is indeed the sea urchin that we want for our query. The argument context_name allows you to limit the taxonomic scope of your search. Diadema is also the genus name of a fungus. To ensure that our search is limited to animal names, we could do:

resolved_names <- tnrs_match_names(taxa, context_name = "Animals")

If you are trying to build a tree with deeply divergent taxa that the argument context_name cannot fix, see “How to change the ott ids assigned to my taxa?” in the FAQ below.

Step 2: Getting the tree corresponding to our taxa

Now that we have the correct ott_id for our taxa, we can ask for the tree using the tol_induced_subtree() function. By default, the object returned by tol_induced_subtree is a phylo object (from the ape package), so we can plot it directly.

my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
plot(my_tree, no.margin=TRUE)

FAQ

How to change the ott ids assigned to my taxa?

If you realize that tnrs_match_names assigns the incorrect taxonomic group to your name (e.g., because of synonymy) and changing the context_name does not help, you can use the function inspect. This function takes the object resulting from tnrs_match_names(), and either the row number, the taxon name (you used in your search in lowercase), or the ott_id returned by the initial query.

To illustrate this, let’s re-use the previous query but this time pretending that we are interested in the fungus Diadema and not the sea urchin:

taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
resolved_names
##   search_string                    unique_name approximate_match  ott_id
## 1          hyla                           Hyla             FALSE 1062216
## 2         salmo                          Salmo             FALSE  982359
## 3       diadema Diadema (genus in Nucletmycea)             FALSE 4930522
## 4      nautilus                       Nautilus             FALSE  616358
##   is_synonym flags number_matches
## 1      FALSE                    1
## 2      FALSE                    1
## 3      FALSE                    3
## 4      FALSE                    1
inspect(resolved_names, taxon_name = "diadema")
##   search_string                    unique_name approximate_match  ott_id
## 1       diadema Diadema (genus in Nucletmycea)             FALSE 4930522
## 2       diadema     Diadema (genus in Holozoa)             FALSE  631176
## 3       diadema                     Hypolimnas             FALSE  643831
##   is_synonym flags number_matches
## 1      FALSE                    3
## 2      FALSE                    3
## 3       TRUE                    3

In our case, we want the second row in this data frame to replace the information that initially matched for Diadema. We can now use the update() function, to change to the correct taxa (the fungus not the sea urchin):

resolved_names <- update(resolved_names, taxon_name = "diadema",
                         new_row_number = 2)

## we could also have used the ott_id to replace this taxon:
## resolved_names <- update(resolved_names, taxon_name = "diadema",
##                          new_ott_id = 4930522)

And now our resolved_names data frame includes the taxon we want:

search_string unique_name approximate_match ott_id is_synonym flags number_matches
hyla Hyla FALSE 1062216 FALSE 1
salmo Salmo FALSE 982359 FALSE 1
diadema Diadema (genus in Holozoa) FALSE 631176 FALSE 3
nautilus Nautilus FALSE 616358 FALSE 1

How do I know that the taxa I’m asking for is the correct one?

The function taxonomy_taxon_info() takes ott_ids as arguments and returns taxonomic information about the taxa. This output can be passed to some helpers functions to extract the relevant information. Let’s illustrate this with our Diadema example

diadema_info <- taxonomy_taxon_info(631176)
tax_rank(diadema_info)
## $`Diadema (genus in Holozoa)`
## [1] "genus"
## 
## attr(,"class")
## [1] "otl_rank" "list"
synonyms(diadema_info)
## $`Diadema (genus in Holozoa)`
## [1] "Diamema"                "Centrechinus (Diadema)"
## [3] "Cidaris (Diadema)"      "Centrechinus"          
## 
## attr(,"class")
## [1] "otl_synonyms" "list"
tax_name(diadema_info)
## $`Diadema (genus in Holozoa)`
## [1] "Diadema"
## 
## attr(,"class")
## [1] "otl_name" "list"

In some cases, it might also be useful to investigate the taxonomic tree descending from an ott_id to check that it’s the correct taxon and to determine the species included in the Open Tree Taxonomy:

diadema_tax_tree <- taxonomy_subtree(631176)
diadema_tax_tree
## $tip_label
##  [1] "Diadema_principeana_ott5725746"          
##  [2] "Diadema_vetus_ott5725747"                
##  [3] "Diadema_sp._CS-2014_ott5502179"          
##  [4] "Diadema_ascensionis_ott4950423"          
##  [5] "Diadema_africanum_ott4147369"            
##  [6] "Diadema_antillarum_antillarum_ott4147370"
##  [7] "Diadema_antillarum_scensionis_ott220009" 
##  [8] "Diadema_palmeri_ott836860"               
##  [9] "Diadema_sp._DSM6_ott771059"              
## [10] "Diadema_mexicanum_ott639130"             
## [11] "Diadema_setosum_ott631175"               
## [12] "Diadema_sp._SETO15_ott587479"            
## [13] "Diadema_sp._seto17_ott587478"            
## [14] "Diadema_sp._DSM7_ott587487"              
## [15] "Diadema_sp._DSM8_ott587486"              
## [16] "Diadema_sp._seto9_ott587485"             
## [17] "Diadema_sp._seto10_ott587484"            
## [18] "Diadema_sp._DSM2_ott587483"              
## [19] "Diadema_sp._DSM3_ott587482"              
## [20] "Diadema_sp._DSM4_ott587481"              
## [21] "Diadema_sp._dsm5_ott587480"              
## [22] "Diadema_savignyi_ott395692"              
## [23] "Diadema_paucispinum_ott312263"           
## [24] "Diadema_sp._seto16_ott312262"            
## [25] "Diadema_sp._DSM1_ott219999"              
## [26] "Diadema_sp._DJN9_ott66626"               
## [27] "Diadema_sp._seto19_ott66624"             
## [28] "Diadema_sp._seto38_ott66625"             
## [29] "Diadema_sp._seto18_ott66623"             
## [30] "Diadema_sp._seto35_ott66618"             
## 
## $edge_label
## [1] "Diadema_antillarum_ott1022356" "Diadema_ott631176"

By default, this function return all taxa (including self, and internal) descending from this ott_id but it also possible to return phylo object.

How do I get the tree for a particular taxonomic group?

If you are looking to get the tree for a particular taxonomic group, you need to first identify it by its node id or ott id, and then use the tol_subtree() function:

mono_id <- tnrs_match_names("Monotremata")
mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
## Warning in collapse_singles(tr): Dropping singleton nodes with labels:
## Ornithorhynchidae ott344066, Ornithorhynchus ott962391, Tachyglossus
## ott16047, Tachyglossus aculeatus ott16038
plot(mono_tree)

How do I find trees from studies focused on my favourite taxa?

The function studies_find_trees() allows the user to search for studies matching a specific criteria. The function studies_properties() returns the list of properties that can be used in the search.

furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
furry_ids <- furry_studies$study_ids

Now that we know the study_id, we can ask for the meta data information associated with this study:

furry_meta <- get_study_meta("pg_2550")
get_publication(furry_meta)     ## The citation for the source of the study
## [1] "O'Leary, Maureen A., Marc Allard, Michael J. Novacek, Jin Meng, and John Gatesy. 2004. \"Building the mammalian sector of the tree of life: Combining different data and a discussion of divergence times for placental mammals.\" In: Cracraft J., & Donoghue M., eds. Assembling the Tree of Life. pp. 490-516. Oxford, United Kingdom, Oxford University Press."
## attr(,"DOI")
## [1] ""
get_tree_ids(furry_meta)        ## This study has 10 trees associated with it
##  [1] "tree5513" "tree5515" "tree5516" "tree5517" "tree5518" "tree5519"
##  [7] "tree5520" "tree5521" "tree5522" "tree5523"
candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
## NULL

Using get_study("pg_2550") would returns a multiPhylo object (default) with all the trees associated with this particular study, while get_study_tree("pg_2550", "tree5513") would return one of these trees.

The tree returned by the API has duplicated tip labels, how can I work around it?

You may encounter the following error message:

Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
angulata) has already been encountered in this tree. Duplication of taxa in a
tree is prohibited.

This message occurs as duplicate labels are not allowed in the NEXUS format and it is stricly enforced by the part of the code used by rotl to import the trees in memory.

If you use a version of rotl more recent than 0.4.1, this should not happen by default for the function get_study_tree. If it happens with another function, please let us know.

The easiest way to work around this is to save the tree in a file, and use APE to read it in memory:

get_study_tree(study_id="pg_710", tree_id="tree1277",
               tip_label='ott_taxon_name', file = "/tmp/tree.tre",
               file_format = "newick")
tr <- ape::read.tree(file = "/tmp/tree.tre")

How do I get the higher taxonomy for a given taxa?

If you encounter a taxon name you are not familiar with, it might be useful to obtain its higher taxonomy to see where it fits in the tree of life. We can combine several taxonomy methods to extract this information easily.

giant_squid <- tnrs_match_names("Architeuthis")
tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
## $`5295401`
##          rank               name        unique_name  ott_id
## 1      family     Architeuthidae     Architeuthidae  564393
## 2    suborder          Oegopsina          Oegopsina   43352
## 3       order           Teuthida           Teuthida  380472
## 4  superorder     Decapodiformes     Decapodiformes  854107
## 5  infraclass       Neocoleoidea       Neocoleoidea  329546
## 6    subclass          Coleoidea          Coleoidea    7371
## 7       class        Cephalopoda        Cephalopoda    7368
## 8      phylum           Mollusca           Mollusca  802117
## 9     no rank     Lophotrochozoa     Lophotrochozoa  155737
## 10    no rank        Protostomia        Protostomia  189832
## 11    no rank          Bilateria          Bilateria  117569
## 12    no rank          Eumetazoa          Eumetazoa  641038
## 13    kingdom            Metazoa            Metazoa  691846
## 14    no rank            Holozoa            Holozoa 5246131
## 15    no rank       Opisthokonta       Opisthokonta  332573
## 16     domain          Eukaryota          Eukaryota  304358
## 17    no rank cellular organisms cellular organisms   93302
## 18    no rank               life               life  805080

Why are OTT IDs discovered with rotl missing from an induced subtree?

Some taxonomic names that can be retrieved through the taxonomic name resolution service are not part of the Open Tree’s synthesis tree. These are usually traditional higher-level taxa that have been found to be paraphyletic.

For instance, if you wanted to fetch a tree relating the three birds that go into a Turkducken as well as the pork used for stuffing, you might search for the turkey, duck, chicken, and pork genera:

turducken <- c("Meleagris", "Anas", "Gallus", "Sus")
taxa <- tnrs_match_names(turducken, context="Animals")
taxa
##   search_string unique_name approximate_match ott_id is_synonym flags
## 1     meleagris   Meleagris             FALSE 446481      FALSE      
## 2          anas        Anas             FALSE 765185      FALSE      
## 3        gallus      Gallus             FALSE 153562      FALSE      
## 4           sus         Sus             FALSE 730021      FALSE      
##   number_matches
## 1              2
## 2              1
## 3              3
## 4              1

We have the OTT ids for each genus, however, if we tried to get the induced subtree from these results, we would get an error:

tr <- tol_induced_subtree(ott_id(taxa))
## Error: HTTP failure: 400
## The following OTT ids were not found: [765185]. BadIdsExceptionopentree.plugins.BadIdsExceptionlist("opentree.plugins.tree_of_life_v3.doInducedSubtree(tree_of_life_v3.java:516)", "opentree.plugins.tree_of_life_v3.induced_subtree(tree_of_life_v3.java:400)", "java.lang.reflect.Method.invoke(Method.java:498)", "org.neo4j.server.plugins.PluginMethod.invoke(PluginMethod.java:57)", "org.neo4j.server.plugins.PluginManager.invoke(PluginManager.java:168)", "org.neo4j.server.rest.web.ExtensionService.invokeGraphDatabaseExtension(ExtensionService.java:300)", "org.neo4j.server.rest.web.ExtensionService.invokeGraphDatabaseExtension(ExtensionService.java:122)", 
##     "java.lang.reflect.Method.invoke(Method.java:498)", "org.neo4j.server.rest.security.SecurityFilter.doFilter(SecurityFilter.java:112)")

As the error message suggests, some of the taxa are not found in the synthetic tree. This occurs for 2 main reasons: either the taxa is invalid, or it is part of a group that is not monophyletic in the synthetic tree. There are two ways to get around this issue: (1) removing the taxa that are not part of the Open Tree; (2) using the complete species name.

Removing the taxa missing from the synthetic tree

To help with this situation, rotl provides a way to identify the OTT ids that are not part of the synthetic tree. The function is_in_tree() takes the output of the ott_id() function and returns a vector of logical indicating whether the taxa are part of the synthetic tree. We can then use to only keep the taxa that appear in the synthetic tree:

in_tree <- is_in_tree(ott_id(taxa))
in_tree
## Meleagris      Anas    Gallus       Sus 
##      TRUE     FALSE      TRUE      TRUE
tr <- tol_induced_subtree(ott_id(taxa)[in_tree])

Using the full taxonomic names

The best way to avoid these problems is to specify complete species names (species being the lowest level of classification in the Open Tree taxonomy they are guaranteed to be monophyletic):

turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa")
taxa <- tnrs_match_names(turducken_spp, context="Animals")
tr <- tol_induced_subtree(ott_id(taxa))
plot(tr)

rotl/inst/doc/how-to-use-rotl.Rmd0000644000177500001440000002553113056407503016736 0ustar deepayanusers--- title: "How to use rotl?" author: "François Michonneau" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: css: vignette.css vignette: > %\VignetteIndexEntry{How to use rotl?} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- `rotl` provides an interface to the Open Tree of Life (OTL) API and allows users to query the API, retrieve parts of the Tree of Life and integrate these parts with other R packages. The OTL API provides services to access: * the **Tree of Life** a.k.a. TOL (the synthetic tree): a single draft tree that is a combination of **the OTL taxonomy** and the **source trees** (studies) * the **Taxonomic name resolution services** a.k.a. TNRS: the methods for resolving taxonomic names to the internal identifiers used by the TOL and the GOL (the `ott ids`). * the **Taxonomy** a.k.a. OTT (for Open Tree Taxonomy): which represents the synthesis of the different taxonomies used as a backbone of the TOL when no studies are available. * the **Studies** containing the source trees used to build the TOL, and extracted from the scientific literature. In `rotl`, each of these services correspond to functions with different prefixes: | Service | `rotl` prefix | |---------------|---------------| | Tree of Life | `tol_` | | TNRS | `tnrs_` | | Taxonomy | `taxonomy_` | | Studies | `studies_` | `rotl` also provides a few other functions and methods that can be used to extract relevant information from the objects returned by these functions. ## Demonstration of a basic workflow The most common use for `rotl` is probably to start from a list of species and get the relevant parts of the tree for these species. This is a two step process: 1. the species names need to be matched to their `ott_id` (the Open Tree Taxonomy identifiers) using the Taxonomic name resolution services (TNRS) 1. these `ott_id` will then be used to retrieve the relevant parts of the Tree of Life. ### Step 1: Matching taxonomy to the `ott_id` Let's start by doing a search on a diverse group of taxa: a tree frog (genus _Hyla_), a fish (genus _Salmo_), a sea urchin (genus _Diadema_), and a nautilus (genus _Nautilus_). ```{r} library(rotl) taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus") resolved_names <- tnrs_match_names(taxa) ``` It's always a good idea to check that the resolved names match what you intended: `r knitr::kable(resolved_names)` The column `unique_name` sometimes indicates the higher taxonomic level associated with the name. The column `number_matches` indicates the number of `ott_id` that corresponds to a given name. In this example, our search on _Diadema_ returns 2 matches, and the one returned by default is indeed the sea urchin that we want for our query. The argument `context_name` allows you to limit the taxonomic scope of your search. _Diadema_ is also the genus name of a fungus. To ensure that our search is limited to animal names, we could do: ```{r} resolved_names <- tnrs_match_names(taxa, context_name = "Animals") ``` If you are trying to build a tree with deeply divergent taxa that the argument `context_name` cannot fix, see "How to change the ott ids assigned to my taxa?" in the FAQ below. ### Step 2: Getting the tree corresponding to our taxa Now that we have the correct `ott_id` for our taxa, we can ask for the tree using the `tol_induced_subtree()` function. By default, the object returned by `tol_induced_subtree` is a phylo object (from the [ape](https://cran.r-project.org/package=ape) package), so we can plot it directly. ```{r, fig.width=7, fig.height=4} my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id) plot(my_tree, no.margin=TRUE) ``` ## FAQ ### How to change the ott ids assigned to my taxa? If you realize that `tnrs_match_names` assigns the incorrect taxonomic group to your name (e.g., because of synonymy) and changing the `context_name` does not help, you can use the function `inspect`. This function takes the object resulting from `tnrs_match_names()`, and either the row number, the taxon name (you used in your search in lowercase), or the `ott_id` returned by the initial query. To illustrate this, let's re-use the previous query but this time pretending that we are interested in the fungus _Diadema_ and not the sea urchin: ```{r} taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus") resolved_names <- tnrs_match_names(taxa) resolved_names inspect(resolved_names, taxon_name = "diadema") ``` In our case, we want the second row in this data frame to replace the information that initially matched for _Diadema_. We can now use the `update()` function, to change to the correct taxa (the fungus not the sea urchin): ```{r} resolved_names <- update(resolved_names, taxon_name = "diadema", new_row_number = 2) ## we could also have used the ott_id to replace this taxon: ## resolved_names <- update(resolved_names, taxon_name = "diadema", ## new_ott_id = 4930522) ``` And now our `resolved_names` data frame includes the taxon we want: `r knitr::kable(resolved_names)` ### How do I know that the taxa I'm asking for is the correct one? The function `taxonomy_taxon_info()` takes `ott_ids` as arguments and returns taxonomic information about the taxa. This output can be passed to some helpers functions to extract the relevant information. Let's illustrate this with our _Diadema_ example ```{r} diadema_info <- taxonomy_taxon_info(631176) tax_rank(diadema_info) synonyms(diadema_info) tax_name(diadema_info) ``` In some cases, it might also be useful to investigate the taxonomic tree descending from an `ott_id` to check that it's the correct taxon and to determine the species included in the Open Tree Taxonomy: ```{r} diadema_tax_tree <- taxonomy_subtree(631176) diadema_tax_tree ``` By default, this function return all taxa (including self, and internal) descending from this `ott_id` but it also possible to return `phylo` object. ### How do I get the tree for a particular taxonomic group? If you are looking to get the tree for a particular taxonomic group, you need to first identify it by its node id or ott id, and then use the `tol_subtree()` function: ```{r, fig.width=7, fig.height=4} mono_id <- tnrs_match_names("Monotremata") mono_tree <- tol_subtree(ott_id = ott_id(mono_id)) plot(mono_tree) ``` ### How do I find trees from studies focused on my favourite taxa? The function `studies_find_trees()` allows the user to search for studies matching a specific criteria. The function `studies_properties()` returns the list of properties that can be used in the search. ```{r} furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia") furry_ids <- furry_studies$study_ids ``` Now that we know the `study_id`, we can ask for the meta data information associated with this study: ```{r} furry_meta <- get_study_meta("pg_2550") get_publication(furry_meta) ## The citation for the source of the study get_tree_ids(furry_meta) ## This study has 10 trees associated with it candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL ``` Using `get_study("pg_2550")` would returns a `multiPhylo` object (default) with all the trees associated with this particular study, while `get_study_tree("pg_2550", "tree5513")` would return one of these trees. ### The tree returned by the API has duplicated tip labels, how can I work around it? You may encounter the following error message: ``` Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia angulata) has already been encountered in this tree. Duplication of taxa in a tree is prohibited. ``` This message occurs as duplicate labels are not allowed in the NEXUS format and it is stricly enforced by the part of the code used by `rotl` to import the trees in memory. If you use a version of `rotl` more recent than 0.4.1, this should not happen by default for the function `get_study_tree`. If it happens with another function, please [let us know](https://github.com/ropensci/rotl/issues). The easiest way to work around this is to save the tree in a file, and use APE to read it in memory: ```{r, eval=FALSE} get_study_tree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name', file = "/tmp/tree.tre", file_format = "newick") tr <- ape::read.tree(file = "/tmp/tree.tre") ``` ### How do I get the higher taxonomy for a given taxa? If you encounter a taxon name you are not familiar with, it might be useful to obtain its higher taxonomy to see where it fits in the tree of life. We can combine several taxonomy methods to extract this information easily. ```{r} giant_squid <- tnrs_match_names("Architeuthis") tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE)) ``` ### Why are OTT IDs discovered with `rotl` missing from an induced subtree? Some taxonomic names that can be retrieved through the taxonomic name resolution service are not part of the Open Tree's synthesis tree. These are usually traditional higher-level taxa that have been found to be paraphyletic. For instance, if you wanted to fetch a tree relating the three birds that go into a [Turkducken](https://en.wikipedia.org/wiki/Turducken) as well as the pork used for stuffing, you might search for the turkey, duck, chicken, and pork genera: ```{r} turducken <- c("Meleagris", "Anas", "Gallus", "Sus") taxa <- tnrs_match_names(turducken, context="Animals") taxa ``` We have the OTT ids for each genus, however, if we tried to get the induced subtree from these results, we would get an error: ```{r, error=TRUE} tr <- tol_induced_subtree(ott_id(taxa)) ``` As the error message suggests, some of the taxa are not found in the synthetic tree. This occurs for 2 main reasons: either the taxa is invalid, or it is part of a group that is not monophyletic in the synthetic tree. There are two ways to get around this issue: (1) removing the taxa that are not part of the Open Tree; (2) using the complete species name. #### Removing the taxa missing from the synthetic tree To help with this situation, `rotl` provides a way to identify the OTT ids that are not part of the synthetic tree. The function `is_in_tree()` takes the output of the `ott_id()` function and returns a vector of logical indicating whether the taxa are part of the synthetic tree. We can then use to only keep the taxa that appear in the synthetic tree: ```{r} in_tree <- is_in_tree(ott_id(taxa)) in_tree tr <- tol_induced_subtree(ott_id(taxa)[in_tree]) ``` #### Using the full taxonomic names The best way to avoid these problems is to specify complete species names (species being the lowest level of classification in the Open Tree taxonomy they are guaranteed to be monophyletic): ```{r, fig.width=7, fig.height=4} turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa") taxa <- tnrs_match_names(turducken_spp, context="Animals") tr <- tol_induced_subtree(ott_id(taxa)) plot(tr) ``` rotl/inst/doc/data_mashups.html0000644000177500001440000016503513056407503016650 0ustar deepayanusers Connecting data to Open Tree trees

Connecting data to Open Tree trees

David Winter

2017-03-03

Combining data from OToL and other sources.

One of the major goals of rotl is to help users combine data from other sources with the phylogenetic trees in the Open Tree database. This examples document describes some of the ways in whih a user might connect data to trees from Open Tree.

Get Open Tree IDs to match your data.

Let’s say you have a dataset where each row represents a measurement taken from one species, and your goal is to put these measurements in some phylogenetic context. Here’s a small example: the best estimate of the mutation rate for a set of unicellular Eukaryotes along with some other property of those species which might explain the mutation rate:

csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
mu <- read.csv(csv_path, stringsAsFactors=FALSE)
mu
##                     species       mu pop.size genome.size
## 1   Tetrahymena thermophila 7.61e-12 1.12e+08    1.04e+08
## 2    Paramecium tetraurelia 1.94e-11 1.24e+08    7.20e+07
## 3 Chlamydomonas reinhardtii 2.08e-10 1.00e+08    1.12e+08
## 4  Dictyostelium discoideum 2.90e-11 7.40e+06    3.40e+07
## 5  Saccharomyces cerevisiae 3.30e-10 1.00e+08    1.25e+08
## 6       Saccharomyces pombe 2.00e-10 1.00e+07    1.25e+08

If we want to get a tree for these species we need to start by finding the unique ID for each of these species in the Open Tree database. We can use the Taxonomic Name Resolution Service (tnrs) functions to do this. Before we do that we should see if any of the taxonomic contexts, which can be used to narrow a search and avoid conflicts between different codes, apply to our group of species:

library(rotl)
tnrs_contexts()
## Possible contexts:
##    Animals 
##       Birds, Tetrapods, Mammals, Amphibians, Vertebrates 
##       Arthropods, Molluscs, Nematodes, Platyhelminthes, Annelids 
##       Cnidarians, Arachnides, Insects 
##    Bacteria 
##       SAR group, Archaea, Excavata, Amoebae, Centrohelida 
##       Haptophyta, Apusozoa, Diatoms, Ciliates, Forams 
##    Fungi 
##       Basidiomycetes, Ascomycetes 
##    Land plants 
##       Hornworts, Mosses, Liverworts, Vascular plants, Club mosses 
##       Ferns, Seed plants, Flowering plants, Monocots, Eudicots 
##       Rosids, Asterids, Asterales, Asteraceae, Aster 
##       Symphyotrichum, Campanulaceae, Lobelia 
##    All life

Hmm, none of those groups contain all of our species. In this case we can search using the All life context and the function tnrs_match_names:

taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
knitr::kable(taxon_search)
search_string unique_name approximate_match ott_id is_synonym flags number_matches
tetrahymena thermophila Tetrahymena thermophila FALSE 180195 FALSE SIBLING_HIGHER 1
paramecium tetraurelia Paramecium tetraurelia FALSE 568130 FALSE 1
chlamydomonas reinhardtii Chlamydomonas reinhardtii FALSE 33153 FALSE 1
dictyostelium discoideum Dictyostelium discoideum FALSE 160850 FALSE 1
saccharomyces cerevisiae Saccharomyces cerevisiae FALSE 908549 FALSE 1
saccharomyces pombe Schizosaccharomyces pombe FALSE 990004 TRUE 1

Good, all of our species are known to Open Tree. Note, though, that one of the names is a synonym. Saccharomyces pombe is older name for what is now called Schizosaccharomyces pombe. As the name suggests, the Taxonomic Name Resolution Service is designed to deal with these problems (and similar ones like misspellings), but it is always a good idea to check the results of tnrs_match_names closely to ensure the results are what you expect.

In this case we have a good ID for each of our species so we can move on. Before we do that, let’s ensure we can match up our original data to the Open Tree names and IDs by adding them to our data.frame:

mu$ott_name <- taxon_search$unique_name
mu$ott_id <- taxon_search$ott_id

Find a tree with your taxa

Now let’s find a tree. There are two possible options here: we can search for published studies that include our taxa or we can use the ‘synthetic tree’ from Open Tree. We can try both approaches.

Published trees

Before we can search for published studies or trees, we should check out the list of properties we can use to perform such searches:

studies_properties()
## $tree_properties
##  [1] "ot:treebaseOTUId"           "ot:nodeLabelMode"          
##  [3] "ot:originalLabel"           "oti_tree_id"               
##  [5] "ot:ottTaxonName"            "ot:inferenceMethod"        
##  [7] "ot:tag"                     "ot:treebaseTreeId"         
##  [9] "ot:comment"                 "ot:branchLengthDescription"
## [11] "ot:treeModified"            "ot:studyId"                
## [13] "ot:branchLengthTimeUnits"   "ot:ottId"                  
## [15] "is_deprecated"              "ot:branchLengthMode"       
## [17] "ot:treeLastEdited"          "ot:nodeLabelDescription"   
## 
## $study_properties
##  [1] "ot:studyModified"             "ot:focalClade"               
##  [3] "ot:focalCladeOTTTaxonName"    "ot:focalCladeOTTId"          
##  [5] "ot:studyPublication"          "ot:studyLastEditor"          
##  [7] "ot:focalCladeTaxonName"       "ot:tag"                      
##  [9] "ot:comment"                   "ot:studyLabel"               
## [11] "ot:authorContributed"         "ot:studyPublicationReference"
## [13] "ot:curatorName"               "ot:studyId"                  
## [15] "ot:studyYear"                 "ot:studyUploaded"            
## [17] "is_deprecated"                "ot:dataDeposit"              
## [19] "ot:candidateTreeForSynthesis"

We have ottIds for our taxa, so let’s use those IDs to search for trees that contain them. Starting with our first species Tetrahymena thermophila we can use studies_find_trees to do this search.

studies_find_trees(property="ot:ottId", value="180195")
## [1] study_ids       dat             n_matched_trees
## <0 rows> (or 0-length row.names)

Well… that’s not very promising. We can repeat that process for all of the IDs to see if the other species are better represented.

hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
sapply(hits, function(x) sum(x[["n_matched_trees"]]))
## [1]  0  0  2  0 32  3

OK, most of our species are not in any of the published trees available. You can help fix this sort of problem by making sure you submit your published trees to Open Tree.

A part of the synthesis tree

Thankfully, we can still use the complete Tree of Life made from the combined results of all of the published trees and taxonomies that go into Open Tree. The function tol_induced_subtree will fetch a tree relating a set of IDs.

Using the default arguments you can get a tree object into your R session:

tr <- tol_induced_subtree(ott_ids=mu$ott_id)
plot(tr)

Connect your data to the tips of your tree

Now we have a tree for of our species, how can we use the tree and the data together?

The package phylobase provide an object class called phylo4d, which is designed to represent a phylogeny and data associated with its tips. In oder to get our tree and data into one of these objects we have to make sure the labels in the tree and in our data match exactly. That’s not quite the case at the moment (tree labels have underscores and IDs appended):

mu$ott_name[1]
## [1] "Tetrahymena thermophila"
tr$tip.label[4]
## [1] "Dictyostelium_discoideum_ott160850"

rotl provides a convienence function strip_ott_ids to deal with these.

tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label %in% mu$ott_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE

Ok, now the tips are together we can make a new dataset. The phylo4d() functions matches tip labels to the row names of a data.frame, so let’s make a new dataset that contains just the relevant data and has row names to match the tree

library(phylobase)
mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
rownames(mu_numeric) <- mu$ott_name
tree_data <- phylo4d(tr, mu_numeric)

And now we can plot the data and the tree together

plot(tree_data)

Find external data associated with studies, trees and taxa from Open Tree

In the above example we looked for a tree that related species in another dataset. Now we will go the other way, and try to find data associated with Open Tree records in other databases.

Get external data from a study

Let’s imagine you were interested in extending or reproducing the results of a published study. If that study is included in Open Tree you can find it via studies_find_studies or studies_find_trees and retrieve the published trees with get_study. rotl will also help you find external. The function study_external_IDs retrieves the DOI for a given study, and uses that to gather some more data:

extra_data <- study_external_IDs("pg_1980")
extra_data
## External data identifiers for study 
##  $doi:  10.1016/j.ympev.2006.04.016 
##  $pubmed_id:  16762568 
##  $nucleotide_ids: vector of 58 IDs
##  $external_data_url http://purl.org/phylo/treebase/phylows/study/TB2:S1575

Here the returned object contains an external_data_url (in this case a link to the study in Treebase), a pubmed ID for the paper and a vector IDs for the NCBI’s nuleotide database. The packages treebase and rentrez provide functions to make use of these IDs within R.

As an example, let’s use rentrez to download the first two DNA seqences and print them.

library(rentrez)
seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
cat(seqs)
## >AM181011.1 Plectroninia neocaledoniense partial 28S rRNA gene, specimen voucher G316300 (Queensland Museum)
## GCTAGCAAGCGCGTCGGTGGTTCAGCCGGCTGGTCTCGTCGAGTTGTCGGTGTGCGGATCCGAACGGACC
## GCGGCCGATGGCGTCGGCGGGCAAGCTGTGGTGCACTCTGTCGGCGTGCGCGTCAGCGTCGGTTTCGGCC
## GGACGACGAGGCGCTCGGGGAAGGTAGCTGGACCGGTCTTCGGTGCAGTGTTATAGCCCTGGGCCGCTGG
## GTTCGGCGTTTGGGACCGAGGAGAGAGATGATCGCTGCAGCGCCTGTCTCCCTCTCGAGGGGGGCTAGCC
## AGCCGCTGTTTGGGTGGCGTCACTGGCGGAGGACTGCACGCAGTGCTTCGCCGGTGGTCGTGTCCAGGCG
## GGCGGTGTGGGTATAGAGGCGCTTAGGACGCTGGCGTCCAAATGGCCGTGCGCGACCCGTCTTGAAACAC
## GGACCAAGGAGTCTAGCATGTGCGCGAGTCTTAGGGTGTGGAAGCCCTCGGGCGCAATGAAAGTGAAGGG
## CCGTCGTCTCTCGGGGCTGCGGTGTGAGGTGAGAGCCGTCGCCGTCGGGTGGCGGTGCATCATCGGCCGG
## TCCATCCTGCTCTCAGGAGGATCTGCGCAAGAGCGTGTTTGCTGGGACCCGAAAGATGGTGAACTATGCC
## TGAATAGGGTGAAGCCAGAGGAAACTCTGGTGGAGGCTCGTAGCGGTTCTGACGTGCAAATCGATCGTCA
## AATTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGG
## ATAGCTGGAACTCGTCTTGACACAGTTTTATCAGGTAAAGCGAATGATTAGAGGTCTTGGGGGTGAAACA
## CCCTCAACCTATTCTCAAACTTTAAATAGGTAAGAAGCGCGACTTGCTCAATTGAAGTGGCGCGCAGTGA
## ATGTGAGTTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGAACGCTCGGTTA
## AGGTGCCCAAGTCGACGCTCATCAGACCCCAGAAAAGGTGTTGGTCGATATAGACAGCAGGACGGTGGCC
## ATGGAAGTCGGAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCTGAAAATGGAT
## GGCGCTCAAGCGTCGCACCTATACCGAGCCGTCGTGGTAAATGCCAGGCCACGACGAGTAGGAGGGCGCG
## GTGGTCGTGACGCAGCCCTTGGCGCGAGCCTGGGCGAAACGGCCTCCGGTGCAGATCTTGGTGGTAGTAG
## CAAATATTCAAATGAGAGCTTTGAAGACCGAAGTGGAGAAAGGTTCCATGTGAACAGCAGTTGGACATGG
## GTTAGTCGATCCTAAGAGATAGGGAAGTTCCGTGTGAAAGTGCGCAATGCGCTTCTGTGCTGCGCGCCTC
## CTATCGAAAGGGAATCGGGTTAATATTCCCGAACCGGAAGGCGGATATCTCTGGCTCTCGGGTCAGGAGC
## GGCAACGCAAGCGTACTGCGAGACGTCGGCGGGGGCTCCGGGAAGAGTTGTCTTTTCTTTTTAACGCAGT
## CGCCATCCCTGGAATCGGTTTGCCCGGAGATAGGGTTGGCTGGCTCGGTAAAGCAGCACACTTCATGTGC
## TGTCCGGTGCGCTCTCGACGGCCCTTGAAAATCGCAGGTGTGCATCGATTCTCGCATCCGGTCGTACTCA
## TAACCGCATCAGGTCTCCAAGGT
## 
## >AM181010.1 Eilhardia schulzei partial 28S rRNA gene, specimen voucher G316071 (Queensland Museum)
## GCTAGTAATGTACGTTGGTGGTTCAGCCGGCTAGTCTTGTCGAGTCGTCGTGTGGTGGATCCGACTGGAC
## CGTCCGCGGTGGTGTCGGCGGGCGAGCTGTGGTGCACTCTACGGACGTGCGCGTCAGCGTCGGTTCTCGA
## TGGGCGATAAGGTGCGTGGGGGAAGGTGGCTCGGTCCTTGGGAACTGAGTGTTACAGACCCTGGTGCTGG
## GCTCGTCGTGGGACCGAGGAGAGAGAGAGATGATCGCTGCGGCACCTGCCCCGTTGTCATTTTTCGGGGC
## TAGCCAGCCGTTTGTCAGGTGTGCGTCGGACGTTGAGGACTGCACGCAGTGCTGGACGTGGAGGCGTGAT
## CTGATGGCGGTGTGGGCATTAGAGGTGCCTAGGACGCTGGCGTCCAAATGGCCGTGCGCGACCCGTCTTG
## AAACACGGACCAAGGAGTCTAACATGTGCGCGAGTCTTAGGGTGTGCAAGCCCTCGGGCGCAATGAAAGT
## GAAGGCTCGGCGGCGCTAGTCGAGCTGAGGTGAGAGCCGTGGCCGTTGCATGTGGCGGCGGCGGCGCATC
## ATCGGCCGGTCCATCCTGCTCTCAGGGGGATCCGAGCAAGAGCGTATTTGTTGGGACCCGAAAGATGGTG
## AACTATGCCTGAATAGGGTGAAGCCAGAGGAAACTCTGGTGGAGGCTCGTAGCGATTCTGACGTGCAAAT
## CGATCGTCAAATTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTT
## TCCCTCAGGATAGCTGGAGCTCTTGGACACAGTTTTATCAGGTAAAGCGAATGATCAGAGGTCTTGGGGG
## TGAAACACCCTCAACCTATTCTCAAACTTTAAATCGGTAAGAAGCGCGACTTGCTGAATTGAAGCCGCGC
## GCAAGCAATGTGAGTTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGAACGC
## TGGGTTAAGGTGCCAAAGTCGACGCTCATCAGACCCCAGAAAAGGTGTTGGTTGATATAGACAGCAGGAC
## GATGGCCATGGAAGTCGGAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCTGAA
## AATGGATGGCGCTCAAGCGTCGCACCTATACCGGGCCGTCGTCGCAAATGCCAGGCGACGACGAGTAGGA
## GGGCGCAGTGGTCGTCATGCAGCCCTTGGCGTGAGCCTGGGTCAAACGGCCTCTGGTGCAGATCTTGGTG
## GTAGTAGCAAATATTCAAATGAGAGCTTTGAAGACCGAAGTGGAGAAAGGTTCCATGTGAACAGCAGTTG
## GACATGGGTTAGTCGATCCTAAGTGATAGGGGAGCTCCGTATGAAAGTGCGCAATCGGCCCTGCTTGTGT
## CGCCTTGCGCCACCTATCGAAAGGGAATCGGGTTAATATTCCCGAACCGGAAGGCGGATTTTCTCTGGCT
## CTCGGGTCAGGAGCGGCAACGCTAGCGAACCGCGAGACGTCGGCGGGGGCTCCGGGAAGAGTTGTCTTTT
## CTTTTTAACGCAGTCGCCATCCCTGGAATCGGTTTGCCCGGAGATAGGGTTGGCTGGCTCGGTAAAGCAG
## CACACTTCATGTGCTGTCCGGTGCGCTCTCGACGGCCCTTGAAAATCGCGGCGAGTGTAGTCTGATTTTC
## GCATCCGTTCGTACTCATAACCGCATCAGGTCTCCAAGGT

You could further process these sequences in R with the function read.dna from ape or save them to disk by specifying a file name with cat.

Find a OTT taxon in another taxonomic database

It is also possible map an Open Tree taxon to a record in another taxonomic database. For instance, if we wanted to search for data about one of the tips of the sub-tree we fetched in the example above we could do so using taxon_external_IDs:

Tt_ids <- taxon_external_IDs(mu$ott_id[2])
Tt_ids
##   source       id
## 1  silva AY102613
## 2   ncbi     5888
## 3   gbif  5839866

A user could then use rgbif to find locality records using the gbif ID or rentrez to get genetic or bibliometric data about from the NCBI’s databases.

What next

The demonstration gets you to the point of visualizing your data in a phylogenetic context. But there’s a lot more you do with this sort of data in R. For instance, you could use packages like ape, caper, phytools and mcmcGLMM to perform phylogenetic comparative analyses of your data. You could gather more data on your species using packages that connect to trait databases like rfishbase, AntWeb or rnpn which provides data from the US National Phenology Network. You could also use rentrez to find genetic data for each of your species, and use that data to generate branch lengths for the phylogeny.

rotl/tests/0000755000177500001440000000000012705157664012731 5ustar deepayanusersrotl/tests/testthat/0000755000177500001440000000000013056407503014557 5ustar deepayanusersrotl/tests/testthat/test-api-taxonomy.R0000644000177500001440000000554312707503302020307 0ustar deepayanuserscontext("taxonomy API") ############################################################################ ## .taxonomy_taxon_info ## ############################################################################ test_that("ott_id is not null for .taxonomy_taxon_info", { skip_on_cran() expect_error(.taxonomy_taxon_info(NULL), "must supply") }) test_that("ott_id is of length 1 for .taxonomy_taxon_info", { skip_on_cran() expect_error(.taxonomy_taxon_info(c(123, 456, 789)), "Must only supply") }) test_that("ott_id is a numeric for .taxonomy_taxon_info", { skip_on_cran() expect_error(.taxonomy_taxon_info(TRUE), "look like numbers") }) test_that("include_lineage is a flag", { skip_on_cran() expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c(TRUE, FALSE)), "is not a flag") expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c("na")), "is not a flag") expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c(1235)), "is not a flag") }) test_that("list_terminal_descendants is a flag", { skip_on_cran() expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c(TRUE, FALSE)), "is not a flag") expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c("na")), "is not a flag") expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c(1235)), "is not a flag") }) ############################################################################ ## .taxonomy_subtree ## ############################################################################ test_that("ott_id is not null for .taxonomy_subtree", { skip_on_cran() expect_error(.taxonomy_subtree(NULL), "must supply") }) test_that("ott_id is of length 1 for .taxonomy_subtree", { skip_on_cran() expect_error(.taxonomy_subtree(c(123, 456, 789)), "Must only supply") }) test_that("ott_id is a numeric for .taxonomy_subtree", { skip_on_cran() expect_error(.taxonomy_subtree(TRUE), "look like numbers") }) ############################################################################ ## .taxonomy_mrca ## ############################################################################ test_that("ott_id is not null for .taxonomy_lica", { skip_on_cran() expect_error(.taxonomy_mrca(NULL), "must supply") }) test_that("ott_id is a numeric for .taxonomy_lica", { skip_on_cran() expect_error(.taxonomy_mrca(TRUE), "look like numbers") }) rotl/tests/testthat/test-taxonomy.R0000644000177500001440000003024713056114755017547 0ustar deepayanuserscontext("taxonomy") ############################################################################ ## taxonomy about ## ############################################################################ test_that("taxonomy_about is a list", { skip_on_cran() tt <- taxonomy_about() expect_true(inherits(tt, "list")) }) test_that("taxonomy_about has the names listed in documentation (if it breaks update documentation)", { skip_on_cran() tt <- taxonomy_about() expect_true(all(names(tt) %in% c("weburl", "author", "name", "source", "version"))) }) ############################################################################ ## taxon Info ## ############################################################################ test_that("taxonomy taxon info", { skip_on_cran() tid <- 515698 tt <- taxonomy_taxon_info(tid) expect_equal(tt[[1]][["ott_id"]], tid) expect_true(inherits(tt, "taxon_info")) }) test_that("taxonomy with include_lineage=TRUE", { skip_on_cran() tt <- taxonomy_taxon_info(515698, include_lineage = TRUE) expect_true(exists("lineage", tt[[1]])) expect_true(length(tt[[1]]$lineage) > 1) }) test_that("taxonomy with include_lineage=FALSE", { skip_on_cran() tt <- taxonomy_taxon_info(515698, include_lineage = FALSE) expect_false(exists("lineage", tt[[1]])) }) test_that("taxonomy with include_terminal_descendants=TRUE", { skip_on_cran() tt <- taxonomy_taxon_info(515698, include_terminal_descendants = TRUE) expect_true(exists("terminal_descendants", tt[[1]])) expect_true(length(tt[[1]][["terminal_descendants"]]) > 1) }) test_that("taxonomy with include_terminal_descendants=FALSE", { skip_on_cran() tt <- taxonomy_taxon_info(515698, include_terminal_descendants = FALSE) expect_false(exists("terminal_descendants", tt[[1]])) }) if (identical(Sys.getenv("NOT_CRAN"), "true")) { tid <- c(5004030, 337928, 631176) tax_info <- taxonomy_taxon_info(tid) } test_that("taxonomy_taxon tax_rank method", { skip_on_cran() expect_true(inherits(tax_rank(tax_info), c("otl_tax_rank", "list"))) expect_equal(names(tax_rank(tax_info)), c("Holothuria", "Acanthaster", "Diadema (genus in Holozoa)")) expect_equal(unlist(unname(tax_rank(tax_info))), rep("genus", 3)) }) test_that("taxonomy_taxon ott_taxon_name method", { skip_on_cran() expect_true(inherits(tax_name(tax_info), c("otl_tax_info", "list"))) expect_equal(names(tax_name(tax_info)), c("Holothuria", "Acanthaster", "Diadema (genus in Holozoa)")) expect_equal(unlist(unname(tax_name(tax_info))), c("Holothuria", "Acanthaster", "Diadema")) }) test_that("taxonomy_taxon synonyms method", { skip_on_cran() expect_true(inherits(synonyms(tax_info), c("otl_synonyms", "list"))) expect_equal(names(synonyms(tax_info)), c("Holothuria", "Acanthaster", "Diadema (genus in Holozoa)")) expect_true(all(c("Diamema", "Centrechinus") %in% synonyms(tax_info)[[3]])) }) test_that("taxonomy_taxon is_suppressed method", { skip_on_cran() expect_true(inherits(is_suppressed(tax_info), c("otl_is_suppressed", "list"))) expect_equal(names(is_suppressed(tax_info)), c("Holothuria", "Acanthaster", "Diadema (genus in Holozoa)")) expect_equal(unlist(unname(is_suppressed(tax_info))), c(FALSE, FALSE, FALSE)) }) test_that("taxonomy_taxon flags method", { skip_on_cran() expect_true(inherits(flags(tax_info), c("otl_flags", "list"))) expect_equal(names(flags(tax_info)), c("Holothuria", "Acanthaster", "Diadema (genus in Holozoa)")) expect_equal(unlist(unname(flags(tax_info))), NULL) }) test_that("higher taxonomy method", { skip_on_cran() expect_error(tax_lineage(tax_info), "needs to be created") lg <- tax_lineage(taxonomy_taxon_info(tid, include_lineage = TRUE)) expect_true(inherits(lg, "list")) expect_true(inherits(lg[[1]], "data.frame")) expect_true(all(names(lg[[1]]) %in% c("rank", "name", "unique_name", "ott_id"))) expect_true(any(grepl("no rank", lg[[1]][["rank"]]))) expect_true(any(grep("life", lg[[1]][["name"]]))) }) ### ott_id() -------------------------------------------------------------------- test_that("taxonomy_taxon_info with ott_id for tax_info", { skip_on_cran() expect_equivalent(ott_id(tax_info), ott_id(taxonomy_taxon_info(ott_id(tax_info)))) }) test_that("taxonomy_subtree with ott_id for tax_info", { skip_on_cran() expect_error(taxonomy_subtree(ott_id = ott_id(tax_info)), "supply one") }) test_that("tol_node_info with ott_id for tax_info", { skip_on_cran() expect_error(tol_node_info(ott_id(tax_info)), "provide a single") }) test_that("tol_subtree with ott_id for tax_info", { skip_on_cran() expect_error(tol_subtree(ott_id = ott_id(tax_info)), "provide a single") }) test_that("tol_mrca with ott_id for tax_info", { skip_on_cran() expect_equivalent(list("Euleutheroza" = 317277), ott_id(tol_mrca(ott_id(tax_info)))) }) test_that("tol_induced_subtree with ott_id for tax_info", { skip_on_cran() expect_true(inherits(tol_induced_subtree(ott_id(tax_info)), "phylo")) }) test_that("taxonomy_mrca with ott_id for tax_info", { skip_on_cran() expect_equivalent(list("Euleutheroza" = 317277), ott_id(taxonomy_mrca(ott_id(tax_info)))) }) test_that("ott_id subset works", { skip_on_cran() expect_true(inherits(ott_id(tax_info), "otl_ott_id")) expect_true(inherits(ott_id(tax_info)[1], "otl_ott_id")) expect_true(!is.null(names(ott_id(tax_info)))) }) ############################################################################ ## taxon subtree ## ############################################################################ test_that("taxonomy subtree raw output", { skip_on_cran() tt <- taxonomy_subtree(515698, output_format = "raw") expect_true(inherits(tt, "list")) expect_identical(names(tt), "newick") }) test_that("taxonomy subtree returns warning if file is provided with something else than newick output", { skip_on_cran() expect_warning(taxonomy_subtree(515698, output_format = "raw", file = "/foo/bar"), "ignored") }) test_that("taxonomy subtree writes a 'valid' newick file", { skip_on_cran() ff <- tempfile(fileext = ".tre") tt <- taxonomy_subtree(515698, output_format = "newick", file = ff) expect_true(tt) expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE))) }) test_that("taxonomy subtree returns a valid newick string", { skip_on_cran() tt <- taxonomy_subtree(515698, output_format = "newick") expect_true(inherits(ape::read.tree(text = tt), "phylo")) }) test_that("taxonomy subtree returns a valid phylo object", { skip_on_cran() tt <- taxonomy_subtree(515698, output_format = "phylo") expect_true(inherits(tt, "phylo")) }) test_that("taxonomy subtree returns valid internal node names", { skip_on_cran() tt <- taxonomy_subtree(515698, output_format = "taxa") expect_true(inherits(tt, "list")) expect_equal(length(tt), 2) expect_equal(length(tt$tip_label), 14) expect_equal(length(tt$edge_label), 2) }) test_that("taxonomy subtree works if taxa has only 1 descendant", { skip_on_cran() tt <- taxonomy_subtree(ott_id = 3658331, output_format = "taxa") expect_true(inherits(tt, "list")) expect_equal(length(tt), 2) expect_true(inherits(tt$tip_label, "character")) }) ############################################################################ ## taxonomic MRCA ## ############################################################################ if (identical(Sys.getenv("NOT_CRAN"), "true")) { tax_mrca <- taxonomy_mrca(ott_id = c(515698, 590452, 643717)) tax_mrca_mono <- taxonomy_mrca(ott_id = c(79623, 962377)) } test_that("taxonomic most recent common ancestor", { skip_on_cran() expect_true(inherits(tax_mrca, "taxon_mrca")) expect_true(inherits(tax_mrca, "list")) }) test_that("mrca tax_rank method", { skip_on_cran() expect_equal(tax_rank(tax_mrca)[1], list("Asterales" = "order")) }) test_that("mrca tax_name method", { skip_on_cran() expect_equal(tax_name(tax_mrca)[1], list("Asterales" = "Asterales")) }) test_that("mrca ott_id method", { skip_on_cran() expect_equivalent(ott_id(tax_mrca)[1], list("Asterales" = 1042120)) expect_true(inherits(ott_id(tax_mrca), "otl_ott_id")) }) test_that("mrca unique_name method", { skip_on_cran() expect_equal(unique_name(tax_mrca)[1], list("Asterales" = "Asterales")) expect_true(inherits(unique_name(tax_mrca), "otl_unique_name")) }) test_that("mrca tax_sources method", { skip_on_cran() expect_equal(tax_sources(tax_mrca)[1], list("Asterales" = c("ncbi:4209", "worms:234044", "gbif:414", "irmng:10011"))) expect_true(inherits(tax_sources(tax_mrca), "otl_tax_sources")) }) test_that("mrca is_suppressed method", { skip_on_cran() expect_true(inherits(is_suppressed(tax_mrca), c("otl_is_suppressed", "list"))) expect_equal(is_suppressed(tax_mrca)[1], list("Asterales" = FALSE)) }) test_that("mrca flags method", { skip_on_cran() expect_true(inherits(flags(tax_mrca), c("otl_flags", "list"))) expect_equal(flags(tax_mrca)[1], list("Asterales" = NULL)) }) ### ott_id() -------------------------------------------------------------------- test_that("taxonomy_taxon_info with ott_id for tax_mrca", { skip_on_cran() expect_equivalent(ott_id(tax_mrca_mono), ott_id(taxonomy_taxon_info(ott_id(tax_mrca_mono)))) }) test_that("taxonomy_subtree with ott_id for tax_mrca", { skip_on_cran() tt <- taxonomy_subtree(ott_id = ott_id(tax_mrca_mono)) expect_true(length(tt[["tip_label"]]) > 10) expect_true(length(tt[["edge_label"]]) > 1) }) test_that("tol_node_info with ott_id for tax_mrca", { skip_on_cran() expect_equivalent(ott_id(tax_mrca_mono), ott_id(tol_node_info(ott_id(tax_mrca_mono)))) }) test_that("tol_subtree with ott_id for tax_mrca", { skip_on_cran() tt <- tol_subtree(ott_id = ott_id(tax_mrca_mono)) expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_true(length(tt$node.label) > 1) }) test_that("tol_mrca with ott_id for tax_mrca", { skip_on_cran() expect_equivalent(ott_id(tax_mrca_mono), ott_id(tol_mrca(ott_id(tax_mrca_mono)))) }) test_that("tol_induced_subtree with ott_id for tax_mrca", { skip_on_cran() expect_error(tol_induced_subtree(ott_id(tax_mrca_mono)), "least two valid") }) test_that("taxonomy_mrca with ott_id for tax_mrca", { skip_on_cran() expect_equivalent(ott_id(tax_mrca_mono), ott_id(taxonomy_mrca(ott_id(tax_mrca_mono)))) }) test_that("ott_id subset works", { skip_on_cran() expect_true(inherits(ott_id(tax_mrca_mono), "otl_ott_id")) expect_true(inherits(ott_id(tax_mrca_mono)[1], "otl_ott_id")) expect_true(!is.null(names(ott_id(tax_mrca_mono)))) }) ### is_in_tree() --------------------------------------------------------------- if (identical(Sys.getenv("NOT_CRAN"), "true")) { spp <- c("Tyrannosaurus rex", "Velociraptor", "Fabaceae", "Solanaceae") ot_names <- tnrs_match_names(spp) ot_ids <- ott_id(ot_names) } test_that("test is_in_tree", { skip_on_cran() in_tree <- is_in_tree(ot_ids) expect_equal(sum(in_tree), 1) expect_true(all(names(in_tree) %in% spp)) }) rotl/tests/testthat/test-tnrs.R0000644000177500001440000000541212705157664016661 0ustar deepayanuserscontext("tnrs") ############################################################################ ## tnrs_match_names ## ############################################################################ test_that("tnrs_match_names fails if incorrect context is provided", { skip_on_cran() expect_error(tnrs_match_names("felis", context_name = "Cats"), "Check possible values using tnrs_contexts") }) test_that("tnrs_match_names fails if invalid name provided (nothing returned)", { skip_on_cran() expect_error(tnrs_match_names("fluffy", do_approximate_matching = FALSE), "No matches for any of the provided taxa") }) test_that("tnrs_match_names warns if a name is not matched", { skip_on_cran() expect_warning(tnrs_match_names(c("fluffy", "felis"), do_approximate_matching = FALSE), "are not matched") }) test_that("object returned by tnrs_match_names have the correct data type", { skip_on_cran() birds <- c("stercorarius parasiticus", "ficedula albicollis", "sterna dougallii") taxa <- tnrs_match_names(birds, do_approximate_matching = FALSE) expect_true(is.logical(taxa[["approximate_match"]])) expect_true(is.logical(taxa[["is_synonym"]])) }) test_that("tnrs_match_names deals correctly with non-exact matches", { skip_on_cran() birds <- c("stercorarius parasiticus", "ficedula albicollis", "sternadougallii") expect_warning(taxa <- tnrs_match_names(birds, do_approximate_matching = FALSE), "are not matched") expect_equal(nrow(taxa), 3L) expect_equivalent(taxa[match("sternadougallii", taxa[["search_string"]]), ], list("sternadougallii", NA_character_, NA, NA_character_, NA, NA_character_, NA_character_)) }) ## everything else is covered by the match_names + the API tests ############################################################################ ## tnrs_contexts ## ############################################################################ test_that("tnrs_contexts", { skip_on_cran() tc <- tnrs_contexts() expect_true(inherits(tc, "tnrs_contexts")) expect_true(all(names(tc) %in% c("ANIMALS", "MICROBES", "FUNGI", "PLANTS", "LIFE"))) }) ############################################################################ ## tnrs_infer_context ## ############################################################################ test_that("tnrs_infer_context", { skip_on_cran() tic <- tnrs_infer_context(c("Felis", "Leo")) expect_equal(tic[["context_name"]], "Mammals") expect_equal(tic[["context_ott_id"]], 244265) expect_equal(tic[["ambiguous_names"]][[1]], "leo") }) rotl/tests/testthat/test-deduplicate_labels.R0000644000177500001440000000310312567651142021467 0ustar deepayanuserstr_string <- " ((A,A),A 1); ((B.1,B,C),B); ((D,D_1),D.1); ((('A 1','A 1'),A.1),'A 1'); ((('A A A','A A A'),A.1),'A 1'); ((((A_1:0.1,B__2:0.1)cats:0.1,(A_1:0.1,A_1:0.1)dogs:0.1)mammals:0.1):0.1)fur:0.1; " file_dup <- tempfile() cat(tr_string, file = file_dup, sep = "\n") ############################################################################ ## parse_newick ## ############################################################################ context("parse_newick") test_that("parse newick works correctly", { prsed_str <- parse_newick(file_dup) expect_true(is.character(prsed_str)) expect_equal(length(prsed_str), 6L) }) ############################################################################ ## deduplicate_labels ## ############################################################################ context("deduplicate_labels") test_that("deduplicate labels works on made up example", { expect_warning(dedup_tr <- deduplicate_labels(file_dup), "Some tip labels were duplicated") expect_true(file.exists(dedup_tr)) phylo_tr <- rncl::read_newick_phylo(file = dedup_tr) expect_true(inherits(phylo_tr, "multiPhylo")) expect_equal(phylo_tr[[6]]$tip.label, c("A_1_1", "B__2", "A_1_2", "A_1")) }) test_that("deduplicate labels works on a OTL study", { skip_on_cran() expect_warning(get_study_tree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name'), "Some tip labels were duplicated") }) unlink(file_dup) rotl/tests/testthat/test-tol.R0000644000177500001440000003777512770013470016476 0ustar deepayanusers############################################################################ ## tol_about ## ############################################################################ context("test tol_about (and in turn print.tol_summary)") if (identical(Sys.getenv("NOT_CRAN"), "true")) { req <- tol_about(include_source_list = TRUE) } test_that("Names in object returned are correct/match the docs", { skip_on_cran() expect_true(all(names(req) %in% c("source_list", "date_created", "root", "num_source_trees", "taxonomy_version", "num_source_studies", "filtered_flags", "synth_id", "source_id_map"))) expect_true(all(names(req$root) %in% c("taxon", "num_tips", "node_id"))) expect_true(all(names(req$root$taxon) %in% c("tax_sources", "name", "unique_name", "rank", "ott_id"))) expect_true(all(names(source_list(req)) %in% c("study_id", "tree_id", "git_sha"))) expect_error(source_list(tol_about(include_source_list = FALSE)), "has been created using") expect_true(nrow(source_list(req)) > 1) expect_true(all(grepl("^(ot|pg)", source_list(req)[["study_id"]]))) expect_true(all(grepl("^tr", source_list(req)[["tree_id"]], ignore.case = TRUE))) }) test_that("tol_node tax_rank method", { skip_on_cran() expect_true(inherits(tax_rank(req), c("otl_rank", "list"))) expect_equal(tax_rank(req)[[1]], "no rank") }) test_that("tol_node ott_id method", { skip_on_cran() expect_true(inherits(ott_id(req), c("otl_ott_id", "list"))) expect_equal(ott_id(req)[[1]], 93302) expect_equal(names(ott_id(req)), "cellular organisms") }) test_that("tol_node tax_sources", { skip_on_cran() expect_true(inherits(tax_sources(req), c("otl_tax_sources", "list"))) expect_true(any(grepl("ncbi", tax_sources(req)[[1]]))) expect_equal(names(tax_sources(req)), "cellular organisms") }) test_that("tol_node unique_name", { skip_on_cran() expect_true(inherits(unique_name(req), c("otl_unique_name", "list"))) expect_equal(unique_name(req)[[1]], "cellular organisms") expect_equal(names(unique_name(req)), "cellular organisms") }) test_that("tol_node tax_name", { skip_on_cran() expect_true(inherits(tax_name(req), c("otl_name", "list"))) expect_equal(tax_name(req)[[1]], "cellular organisms") expect_equal(names(tax_name(req)), "cellular organisms") }) ### ott_id() -------------------------------------------------------------------- test_that("taxonomy_taxon_info with ott_id for tol_about", { skip_on_cran() expect_equal(ott_id(req), ott_id(taxonomy_taxon_info(ott_id(req)))) }) ## can't do that, it's pulling the whole tree ## test_that("taxonomy_subtree with ott_id for tol_about", { ## taxonomy_subtree(ott_id = ott_id(req)) ## }) test_that("tol_node_info with ott_id for tol_about", { skip_on_cran() expect_equal(ott_id(req), ott_id(tol_node_info(ott_id(req)))) }) ## can't do that, it's pulling the whole tree ## test_that("tol_subtree with ott_id for tol_about", { ## tol_subtree(ott_id = ott_id(req)) ## }) test_that("tol_mrca with ott_id for tol_about", { skip_on_cran() expect_equal(ott_id(req)[1], ott_id(tol_mrca(ott_id(req)))[1]) }) test_that("tol_induced_subtree with ott_id for tol_about", { skip_on_cran() expect_error(tol_induced_subtree(ott_id(req)), "least two valid") }) test_that("taxonomy_mrca with ott_id for tol_about", { skip_on_cran() expect_equal(ott_id(req), ott_id(taxonomy_mrca(ott_id(req)))) }) ############################################################################ ## tol_subtree ## ############################################################################ context("test tol_subtree") test_that("tol_subtree fails if ott_id is invalid", { skip_on_cran() expect_error(tol_subtree(ott_id = 6666666)) }) test_that("tol_subtree fails if more than one ott_id is provided", { skip_on_cran() expect_error(tol_subtree(ott_id = c(666666, 6666667)), "Please provide a single") }) test_that("tol_subtree fails if ott_id doesn't look like a number", { skip_on_cran() expect_error(tol_subtree(ott_id = "111A1111"), "must look like numbers") }) test_that("tol_subtree returns a phylo object by default", { skip_on_cran() expect_true(inherits(tol_subtree(ott_id = 81461), "phylo")) }) test_that("tol_subtree returns a newick file when providing a file argument", { skip_on_cran() ff <- tempfile(fileext = ".tre") tr <- tol_subtree(ott_id = 81461, file = ff) expect_true(tr) expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE))) }) ############################################################################ ## tol_induced_subtree ## ############################################################################ context("test tol_induced_subtree") test_that("warning for node ids that are not in TOL graph", { skip_on_cran() expect_error(tol_induced_subtree(ott_ids = c(357968, 867416, 939325, 9999999)), "not found") }) test_that("error if ott_ids provided don't look like numbers", { skip_on_cran() expect_error(tol_induced_subtree(ott_ids = c("13242", "kitten")), "must look like numbers") }) ## test_that("warning for ott ids not in tree", ## ???) test_that("tol_induced_subtree generates a newick file when providing a file argument", { skip_on_cran() ff <- tempfile(fileext = ".tre") tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104), file = ff) expect_true(tr) expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE))) }) ############################################################################ ## tol_mrca ## ############################################################################ if (identical(Sys.getenv("NOT_CRAN"), "true")) { birds <- tol_mrca(ott_ids = c(412129, 536234)) hol <- tol_mrca(c(431586, 957434)) mono <- tol_mrca(ott_ids = c(962377, 79623)) } test_that("tol_mrca fails if ott_ids are not numbers", { skip_on_cran() expect_error(tol_mrca(ott_ids = c(13243, "a13415")), "must look like numbers") }) test_that("tol_mrca returns a list", { skip_on_cran() expect_true(inherits(birds, "list")) expect_true(inherits(birds, "tol_mrca")) expect_true(all(names(birds) %in% c("mrca", "source_id_map", "nearest_taxon"))) }) test_that("methods for tol_mrca where the node is a taxon", { skip_on_cran() expect_true(inherits(tax_sources(hol), c("otl_tax_sources", "list"))) expect_true(inherits(unique_name(hol), c("otl_unique_name", "list"))) expect_true(inherits(tax_name(hol), c("otl_name", "list"))) expect_true(inherits(tax_rank(hol), c("otl_rank", "list"))) expect_true(inherits(ott_id(hol), c("otl_ott_id", "list"))) expect_true(length(tax_sources(hol)[[1]]) > 1) expect_true(any(grepl("worms", tax_sources(hol)[[1]]))) expect_equal(unique_name(hol)[[1]], "Holothuria") expect_equal(tax_name(hol)[[1]], "Holothuria") expect_equal(tax_rank(hol)[[1]], "genus") expect_equal(ott_id(hol)[[1]], 5004030) expect_equal(names(tax_sources(hol)), "Holothuria") expect_true(all(names(source_list(hol)) %in% c("tree_id", "study_id", "git_sha"))) expect_equal(attr(tax_sources(hol), "taxon_type"), "mrca") }) test_that("methods for tol_mrca where the node is not a taxon", { skip_on_cran() expect_true(inherits(birds, "list")) expect_true(inherits(tax_sources(birds), c("otl_tax_sources", "list"))) expect_true(inherits(unique_name(birds), c("otl_unique_name", "list"))) expect_true(inherits(tax_name(birds), c("otl_name", "list"))) expect_true(inherits(tax_rank(birds), c("otl_rank", "list"))) expect_true(inherits(ott_id(birds), c("otl_ott_id", "list"))) expect_true(length(tax_sources(birds)[[1]]) >= 1) expect_true(any(grepl("ncbi", tax_sources(birds)[[1]]))) expect_equal(unique_name(birds)[[1]], "Neognathae") expect_equal(tax_name(birds)[[1]], "Neognathae") expect_equal(tax_rank(birds)[[1]], "superorder") expect_equal(ott_id(birds)[[1]], 241846) expect_equal(names(ott_id(birds)), "Neognathae") expect_true(all(names(source_list(birds)) %in% c("tree_id", "study_id", "git_sha"))) expect_equal(attr(tax_sources(birds), "taxon_type"), "nearest_taxon") }) ### ott_id() -------------------------------------------------------------------- test_that("taxonomy_taxon_info with ott_id for tol_mrca", { skip_on_cran() expect_equal(ott_id(mono)[1], ott_id(taxonomy_taxon_info(ott_id(mono)))[1]) }) test_that("taxonomy_subtree with ott_id for tol_mrca", { skip_on_cran() tt <- taxonomy_subtree(ott_id = ott_id(mono)) expect_true(length(tt[["tip_label"]]) > 10) expect_true(length(tt[["edge_label"]]) > 7) }) test_that("tol_node_info with ott_id for tol_mrca", { skip_on_cran() expect_equal(ott_id(mono)[1], ott_id(tol_node_info(ott_id(mono)))[1]) }) test_that("tol_subtree with ott_id for tol_mrca", { skip_on_cran() tt <- tol_subtree(ott_id = ott_id(mono)) expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_true(length(tt$node.label) > 1) }) test_that("tol_mrca with ott_id for tol_mrca", { skip_on_cran() expect_equal(ott_id(mono)[1], ott_id(tol_mrca(ott_id(mono)))[1]) }) test_that("tol_induced_subtree with ott_id for tol_mrca", { skip_on_cran() expect_error(tol_induced_subtree(ott_id(mono)), "least two valid") }) test_that("taxonomy_mrca with ott_id for tol_mrca", { skip_on_cran() expect_equivalent(ott_id(mono), ott_id(taxonomy_mrca(ott_id(mono)))) }) ############################################################################ ## strip_ott_ids ## ############################################################################ test_that("OTT ids can be striped from tip labels to allow taxon-matching", { skip_on_cran() genera <- c("Setophaga", "Cinclus", "Struthio") tr <- tol_induced_subtree(ott_ids=c(666104, 267845, 292466)) expect_true(all(strip_ott_ids(tr$tip.label) %in% genera)) }) ############################################################################ ## tol_node_info ## ############################################################################ if (identical(Sys.getenv("NOT_CRAN"), "true")) { tol_info <- tol_node_info(ott_id = 81461) tol_lin <- tol_node_info(ott_id = 81461, include_lineage = TRUE) tol_mono <- tol_node_info(ott_id = 962396) } test_that("tol node info.", { skip_on_cran() expect_true(all(names(tol_info) %in% c("partial_path_of", "supported_by", "source_id_map", "taxon", "num_tips", "terminal", "node_id"))) expect_true(inherits(tol_info, "tol_node")) }) ### methods --------------------------------------------------------------------- test_that("tol_node tax_rank method", { skip_on_cran() expect_true(inherits(tax_rank(tol_info), c("otl_tax_rank", "list"))) expect_equal(tax_rank(tol_info)[[1]], "class") }) test_that("tol_node ott_id method", { skip_on_cran() expect_true(inherits(ott_id(tol_info), c("otl_ott_id", "list"))) expect_equal(ott_id(tol_info)[[1]], 81461) expect_equal(names(ott_id(tol_info)), "Aves") }) test_that("tol_node tax_sources", { skip_on_cran() expect_true(inherits(tax_sources(tol_info), c("otl_tax_sources", "list"))) expect_true(any(grepl("worms", tax_sources(tol_info)[[1]]))) expect_equal(names(tax_sources(tol_info)), "Aves") }) test_that("tol_node unique_name", { skip_on_cran() expect_true(inherits(unique_name(tol_info), c("otl_unique_name", "list"))) expect_equal(unique_name(tol_info)[[1]], "Aves") expect_equal(names(unique_name(tol_info)), "Aves") }) test_that("tol_node tax_name", { skip_on_cran() expect_true(inherits(tax_name(tol_info), c("otl_name", "list"))) expect_equal(tax_name(tol_info)[[1]], "Aves") expect_equal(names(tax_name(tol_info)), "Aves") }) test_that("tol_node source_list method", { skip_on_cran() expect_true(inherits(source_list(tol_info), "data.frame")) expect_true(all(names(source_list(tol_info)) %in% c("study_id", "tree_id", "git_sha"))) }) test_that("tol_node tol_lineage", { skip_on_cran() expect_error(tol_lineage(tol_info), "needs to be created") expect_true(inherits(tol_lineage(tol_lin), "data.frame")) expect_true(nrow(tol_lineage(tol_lin)) > 1) expect_true(all(names(tol_lineage(tol_lin)) %in% c("node_id", "num_tips", "is_taxon"))) expect_true(all(grepl("^(ott|mrca)", tol_lineage(tol_lin)[["node_id"]]))) }) test_that("tol_node tax_lineage", { skip_on_cran() expect_error(tax_lineage(tol_info), "needs to be created") expect_true(inherits(tax_lineage(tol_lin), "data.frame")) expect_true(nrow(tax_lineage(tol_lin)) > 1) expect_true(all(names(tax_lineage(tol_lin)) %in% c("rank", "name", "unique_name", "ott_id"))) expect_true(any(grepl("no rank", tax_lineage(tol_lin)[["rank"]]))) expect_true(any(grepl("cellular organisms", tax_lineage(tol_lin)[["name"]]))) }) ### ott_id() -------------------------------------------------------------------- test_that("taxonomy_taxon_info with ott_id for tol_info", { skip_on_cran() expect_equivalent(ott_id(tol_mono), ott_id(taxonomy_taxon_info(ott_id(tol_mono)))) }) test_that("taxonomy_subtree with ott_id for tol_info", { skip_on_cran() tt <- taxonomy_subtree(ott_id = ott_id(tol_mono)) expect_true(length(tt[["tip_label"]]) > 10) expect_true(length(tt[["edge_label"]]) > 7) }) test_that("tol_node_info with ott_id for tol_info", { skip_on_cran() expect_equivalent(ott_id(tol_mono), ott_id(tol_node_info(ott_id(tol_mono)))) }) test_that("tol_subtree with ott_id for tol_info", { skip_on_cran() tt <- tol_subtree(ott_id = ott_id(tol_mono)) expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_true(length(tt$node.label) > 1) }) test_that("tol_mrca with ott_id for tol_info", { skip_on_cran() expect_equivalent(ott_id(tol_mono), ott_id(tol_mrca(ott_id(tol_mono)))) }) test_that("tol_induced_subtree with ott_id for tol_info", { skip_on_cran() expect_error(tol_induced_subtree(ott_id(tol_mono)), "least two valid") }) test_that("taxonomy_mrca with ott_id for tol_info", { skip_on_cran() expect_equivalent(ott_id(tol_mono), ott_id(taxonomy_mrca(ott_id(tol_mono)))) }) rotl/tests/testthat/test-api-studies.R0000644000177500001440000001350412674100604020106 0ustar deepayanuserscontext("studies API tests") ########################### ## .studies_find_studies ## ########################### test_that("argument verbose needs to be logical for .studies_find_studies", { skip_on_cran() expect_error(.studies_find_studies(NULL, NULL, "123", FALSE), "logical") }) test_that("argument exact needs to be logical for .studies_find_studies", { skip_on_cran() expect_error(.studies_find_studies(NULL, NULL, TRUE, "123"), "logical") }) test_that("argument property needs to be character for .studies_find_studies", { skip_on_cran() expect_error(.studies_find_studies(123, NULL, TRUE, TRUE), "character") }) test_that("argument value needs to be character for .studies_find_studies", { skip_on_cran() expect_error(.studies_find_studies("test", 123, TRUE, TRUE), "character") }) test_that("both property & value need to be provided for .studies_find_studies", { skip_on_cran() expect_error(.studies_find_studies("test", NULL, TRUE, TRUE), "Must supply") }) test_that("both property & value need to be provided for .studies_find_studies", { skip_on_cran() expect_error(.studies_find_studies(NULL, "test", TRUE, TRUE), "Must supply") }) ########################### ## .studies_find_trees ## ########################### test_that("argument verbose needs to be logical for .studies_find_trees", { skip_on_cran() expect_error(.studies_find_trees(NULL, NULL, "123", FALSE), "logical") }) test_that("argument exact needs to be logical for .studies_find_trees", { skip_on_cran() expect_error(.studies_find_trees(NULL, NULL, TRUE, "123"), "logical") }) test_that("argument property needs to be character for .studies_find_trees", { skip_on_cran() expect_error(.studies_find_trees(123, NULL, TRUE, TRUE), "character") }) test_that("argument value needs to be character for .studies_find_trees", { skip_on_cran() expect_error(.studies_find_trees("test", 123, TRUE, TRUE), "character") }) test_that("both property & value need to be provided for .studies_find_trees", { skip_on_cran() expect_error(.studies_find_trees("test", NULL, TRUE, TRUE), "Must supply") }) test_that("both property & value need to be provided for .studies_find_trees", { skip_on_cran() expect_error(.studies_find_trees(NULL, "test", TRUE, TRUE), "Must supply") }) test_that("exact works as intended", { skip_on_cran() expect_equal(length(.studies_find_studies("ot:focalCladeOTTTaxonName", "felidae", exact = TRUE)$matched_studies), 0) }) test_that("exact works as intended", { skip_on_cran() expect_true(length(.studies_find_studies("ot:focalCladeOTTTaxonName", "Felidae", exact = TRUE)$matched_studies) >= 1) }) ############################################################################ ## .get_study ## ############################################################################ test_that("study_id isn't NULL for .get_study", { skip_on_cran() expect_error(.get_study(NULL, "test"), "Must supply") }) test_that("study_id is character for .get_study", { skip_on_cran() expect_error(.get_study(TRUE, "test"), "character") }) ############################################################################ ## .get_study_tree ## ############################################################################ test_that("study_id isn't NULL for .get_study_tree", { skip_on_cran() expect_error(.get_study_tree(NULL, NULL), "Must supply") }) test_that("study_id isn't NULL for .get_study_tree", { skip_on_cran() expect_error(.get_study_tree("123", NULL), "Must supply") }) test_that("study_id isn't NULL for .get_study_tree", { skip_on_cran() expect_error(.get_study_tree(NULL, "123"), "Must supply") }) test_that("study_id is character for .get_study", { skip_on_cran() expect_error(.get_study_tree(TRUE, "test"), "character") }) test_that("study_id is character for .get_study", { skip_on_cran() expect_error(.get_study_tree("test", TRUE), "character") }) ############################################################################ ## .get_study_subtree ## ############################################################################ test_that("study_id isn't NULL for .get_study_subtree", { skip_on_cran() expect_error(.get_study_subtree(NULL, NULL, NULL), "Must supply") }) test_that("tree_id isn't NULL for .get_study_subtree", { skip_on_cran() expect_error(.get_study_subtree("123", NULL, "123"), "Must supply") }) test_that("subtree_id isn't NULL for .get_study_subtree", { skip_on_cran() expect_error(.get_study_subtree(NULL, "123", "123"), "Must supply") }) test_that("study_id isn't NULL for .get_study_subtree", { skip_on_cran() expect_error(.get_study_subtree("123", "123", NULL), "Must supply") }) test_that("study_id is character for .get_study", { skip_on_cran() expect_error(.get_study_subtree(TRUE, "test", "test"), "character") }) test_that("tree_id is character for .get_study", { skip_on_cran() expect_error(.get_study_subtree("test", TRUE, "test"), "character") }) test_that("subtree_id is character for .get_study", { skip_on_cran() expect_error(.get_study_subtree("test", "test", TRUE), "character") }) rotl/tests/testthat/test-external.R0000644000177500001440000000243412707722370017510 0ustar deepayanuserscontext("Study external data") if (identical(Sys.getenv("NOT_CRAN"), "true")) { all_sources <- c("doi", "pubmed_id", "external_data_url", "popset_ids", "nucleotide_ids") all_data <- study_external_IDs("pg_1940") } test_that("We can recover dois, pmids, NCBI IDs", { skip_on_cran() expect_that(all_data, is_a("study_external_data")) expect_named(all_data) }) test_that("We can handle studies with missing external IDs", { skip_on_cran() expect_warning( missing_data <- study_external_IDs("ot_97"), "skipping NCBI" ) expect_named(missing_data) expect_that(missing_data, is_a("study_external_data")) expect_equal( sum(is.na(match(all_sources, names(missing_data)))), 2) #we really skipped the NCBI }) test_that("The print functions for external data objects work", { skip_on_cran() missing_data <- study_external_IDs("ot_91") expect_output(print(all_data), "External data identifiers for study") expect_output(print(missing_data), "External data identifiers for study") }) context("Taxon external data") test_that("We can recover external IDs for Open Tree taxa", { skip_on_cran() gibbon_IDs <- taxon_external_IDs(712902) expect_that(gibbon_IDs, is_a("data.frame")) expect_equal(names(gibbon_IDs), c("source", "id")) }) rotl/tests/testthat/test-api-tnrs.R0000644000177500001440000000413012705157664017424 0ustar deepayanuserscontext("tnrs API") ############################################################################ ## .tnrs_match_names ## ############################################################################ test_that("names argument is provided for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names(NULL, NULL, TRUE, NULL, FALSE), "must supply") }) test_that("names argument is character for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names(TRUE, NULL, TRUE, NULL, FALSE), "character") }) test_that("names and ids have the same lengths for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names("Felis", NULL, TRUE, c("abc", "def"), FALSE), "same length") }) test_that("ids must be character for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names("Felis", NULL, TRUE, TRUE, FALSE), "character") }) test_that("do_approximate_matching is logical for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names("Felis", NULL, "true", NULL, FALSE), "logical") }) test_that("include_suppressed is logical for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names("Felis", NULL, TRUE, NULL, "true"), "logical") }) test_that("context_name is character for .tnrs_match_names", { skip_on_cran() expect_error(.tnrs_match_names("Felis", TRUE, TRUE, NULL, FALSE, TRUE), "character") }) ############################################################################ ## .tnrs_infer_context ## ############################################################################ test_that("names is not NULL for .tnrs_infer_context", { skip_on_cran() expect_error(.tnrs_infer_context(NULL), "Must supply") }) test_that("names is character for .tnrs_infer_context", { skip_on_cran() expect_error(.tnrs_infer_context(TRUE), "character") }) rotl/tests/testthat/test-match_names.R0000644000177500001440000003347612774536352020166 0ustar deepayanuserscontext("match names") ############################################################################ ## check_args_match_names ## ############################################################################ context("check_args_match_names") if (identical(Sys.getenv("NOT_CRAN"), "true")) { rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia")) } test_that("error generated if object provided isn't created by tnrs_match_names", expect_error(rotl:::check_args_match_names(letters), "was not created using")) test_that("error generated if no argument is provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp), "You must specify") }) test_that("error generated if row_number and taxon_name are provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, row_number = 1, taxon_name = "holothuria"), "must use only one of ") }) test_that("error generated if row_number and ott_id are provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, row_number = 1, ott_id = 5004030), "must use only one of") }) test_that("error generated if ott_id and taxon_name are provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, taxon_name = "holothuria", ott_id = 5004030), "must use only one of") }) test_that("error generated if row_number is not numeric", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, row_number = TRUE), "must be a numeric") }) test_that("error generated if ott_id is not numeric", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, ott_id = TRUE), "must look like a number") }) test_that("error generated if taxon_name is not character", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, taxon_name = TRUE), "must be a character") }) test_that("error generated if row_number if not one of the row", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, row_number = 10), "is not a valid row number") expect_error(rotl:::check_args_match_names(rsp, row_number = 1.5), "is not a valid row number") expect_error(rotl:::check_args_match_names(rsp, row_number = 0), "is not a valid row number") }) test_that("error generated if invalid taxon_name", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, taxon_name = "echinodermata"), "Can't find") expect_error(rotl:::check_args_match_names(rsp, taxon_name = NA_character_), "Can't find") }) test_that("error generated if invalid ott id", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, ott_id = 66666), "Can't find") }) test_that("error generated if more than 1 value for row_number is provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, row_number = c(1, 2, 3)), "You must supply a single element") }) test_that("error generated if more than 1 value for taxon_name is provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, taxon_name = c("holothuria", "diadema")), "You must supply a single element") }) test_that("error generated if more than 1 value for ott_id is provided", { skip_on_cran() expect_error(rotl:::check_args_match_names(rsp, ott_id = c(5004030, 4930522, 240396)), "only 1 element should be provided") }) ############################################################################ ## inspect.match_names ## ############################################################################ context("inspect.match_names") if (identical(Sys.getenv("NOT_CRAN"), "true")) { rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia")) expect_warning(rsp_na <- tnrs_match_names(names = c("diadema", "fluffy", "hemichordata", "escherichia"))) diadema_ids <- c(4930522, 631176, 643831) } test_that("correct data is being returned when asked to lookup by taxon name", { skip_on_cran() tt <- inspect(rsp, taxon_name = "diadema")[["ott_id"]] expect_true(all(tt %in% diadema_ids)) }) test_that("correct data is being returned when asked to lookup by ott_id", { skip_on_cran() tt <- inspect(rsp, ott_id = ott_id(rsp)[2])[["ott_id"]] expect_true(all(tt %in% diadema_ids)) }) test_that("correct data is being returned when asked to lookup by row number", { skip_on_cran() tt <- inspect(rsp, row_number = 2)[["ott_id"]] expect_true(all(tt %in% diadema_ids)) }) ## with missing data test_that("correct data is being returned when asked to lookup by taxon name (with missing data)", { skip_on_cran() tt <- inspect(rsp_na, taxon_name = "diadema")[["ott_id"]] expect_true(all(tt %in% diadema_ids)) expect_true(is.na(inspect(rsp_na, taxon_name = "fluffy")[["ott_id"]])) }) test_that("correct data is being returned when asked to lookup by ott_id (with missing data)", { skip_on_cran() tt <- inspect(rsp_na, ott_id = ott_id(rsp)[2])[["ott_id"]] expect_true(all(tt %in% diadema_ids)) }) test_that("correct data is being returned when asked to lookup by row number (with missing data)", { skip_on_cran() tt <- inspect(rsp_na, row_number = 1)[["ott_id"]] expect_true(all(tt %in% diadema_ids)) expect_true(is.na(inspect(rsp_na, row_number = 2)[["ott_id"]])) }) ############################################################################ ## synonyms.match_names ## ############################################################################ context("list_synonym_match_names") if (identical(Sys.getenv("NOT_CRAN"), "true")) { tax_rsp <- c("Holothuria", "Diadema", "Fromia") rsp <- tnrs_match_names(names = tax_rsp) tax_rsp_na <- c("Holothuria", "Diadema", "fluffy", "Fromia") expect_warning(rsp_na <- tnrs_match_names(names = tax_rsp_na)) } test_that("synonyms", { skip_on_cran() tt <- synonyms(rsp) expect_true(inherits(tt, "list")) expect_equal(names(tt), c("Holothuria", "Diadema (genus in Nucletmycea)", "Fromia")) }) test_that("correct synonyms are being returned when asked to look up by taxon name", { skip_on_cran() tt <- synonyms(rsp, taxon_name = "holothuria") expect_true(any(grepl("^Holothuria", names(tt)))) }) test_that("holothuria is present in each element of the list", { skip_on_cran() tt <- synonyms(rsp, taxon_name = "holothuria") expect_true(all(sapply(tt, function(x) any(grepl("holothuria", x, ignore.case = TRUE))))) expect_true(any(grepl("Halodeima", tt[["Holothuria"]]))) }) test_that("correct synonyms are being returned when asked to look up by row number", { skip_on_cran() tt <- synonyms(rsp, row_number = 1) expect_true(any(grepl("^Holothuria", names(tt)))) expect_true(any(grepl("Halodeima", tt[["Holothuria"]]))) }) test_that("correct synonyms are being returned when asked to look up by ott id", { skip_on_cran() tt <- synonyms(rsp, ott_id = 5004030) expect_true(any(grepl("^Holothuria", names(tt)))) expect_true(any(grepl("Halodeima", tt[["Holothuria"]]))) }) ## with missing data test_that("synonyms", { skip_on_cran() tt <- synonyms(rsp_na) expect_true(inherits(tt, "list")) expect_equal(names(tt), c("Holothuria", "Diadema (genus in Nucletmycea)", "Fromia")) }) test_that("correct synonyms are being returned when asked to look up by taxon name", { skip_on_cran() tt <- synonyms(rsp_na, taxon_name = "holothuria") expect_true(any(grepl("^Holothuria", names(tt)))) expect_true(is.na(synonyms(rsp_na, taxon_name = "fluffy")[[1]])) }) test_that("correct synonyms are being returned when asked to look up by row number", { skip_on_cran() tt <- synonyms(rsp_na, row_number = 1) expect_true(any(grepl("^Holothuria", names(tt)))) expect_true(any(grepl("Halodeima", tt[["Holothuria"]]))) expect_true(is.na(synonyms(rsp_na, row_number = 3)[[1]])) }) test_that("correct synonyms are being returned when asked to look up by ott id", { skip_on_cran() tt <- synonyms(rsp_na, ott_id = 5004030) expect_true(any(grepl("^Holothuria", names(tt)))) expect_true(any(grepl("Halodeima", tt[["Holothuria"]]))) }) ############################################################################ ## update.match_names ## ############################################################################ context("update.match_names") if (identical(Sys.getenv("NOT_CRAN"), "true")) { rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia")) } test_that("error message if missing both new arguments", { skip_on_cran() expect_error(update(rsp, row_number = 1), "You must specify either") }) test_that("error message if both new arguments are provided", { skip_on_cran() expect_error(update(rsp, row_number = 1, new_row_number = 1, new_ott_id = 6666), "You must use only") }) test_that("error message if wrong new row number provided", { skip_on_cran() expect_error(update(rsp, row_number = 1, new_row_number = 10), "is not a valid row number") expect_error(update(rsp, row_number = 1, new_row_number = 1.5), "is not a valid row number") }) test_that("error message if wrong new ott id provided", { skip_on_cran() expect_error(update(rsp, row_number = 1, new_ott_id = 66666), "Can't find") }) test_that("it works correctly when providing a new row number", { skip_on_cran() new_rsp <- update(rsp, row_number = 2, new_row_number = 2) expect_equal(new_rsp[new_rsp$search_string == "diadema", "ott_id"], "631176") }) test_that("it works correctly when providing a new ott id", { skip_on_cran() new_rsp <- update(rsp, row_number = 2, new_ott_id = 631176) expect_equal(new_rsp[new_rsp$search_string == "diadema", "ott_id"], "631176") }) test_that("it produces warning when trying to update with unmatched name", { skip_on_cran() expect_warning(new_rsp <- update(rsp_na, row_number = 3, new_row_number = 1)) expect_identical(new_rsp, rsp_na) }) ############################################################################ ## flags method ## ############################################################################ context("flags method for class match_names") if (identical(Sys.getenv("NOT_CRAN"), "true")) { tax_rsp <- c("Tyrannosaurus", "Helicoplacus", "Ctenocystis", "Holothuria", "Echinoidea") rsp <- tnrs_match_names(tax_rsp) } test_that("flags with no arguments", { skip_on_cran() flags_rsp <- flags(rsp) expect_equal(length(flags_rsp), 5) expect_equivalent(sapply(flags_rsp, length), c(1, 3, 2, 0, 0)) }) test_that("flags with row number", { skip_on_cran() flags_rsp <- flags(rsp, 1) expect_true(inherits(flags_rsp, "list")) expect_equal(length(flags_rsp), 1) expect_equal(length(flags_rsp[[1]]), 1) expect_true(inherits(flags_rsp[[1]], "character")) expect_equal(names(flags_rsp), tax_rsp[1]) }) test_that("flags with taxon name", { skip_on_cran() flags_rsp <- flags(rsp, taxon_name = "Tyrannosaurus") expect_true(inherits(flags_rsp, "list")) expect_equal(length(flags_rsp), 1) expect_equal(length(flags_rsp[[1]]), 1) expect_true(inherits(flags_rsp[[1]], "character")) expect_equal(names(flags_rsp), tax_rsp[1]) }) test_that("flags with ott id", { skip_on_cran() flags_rsp <- flags(rsp, ott_id = 664348) expect_true(inherits(flags_rsp, "list")) expect_equal(length(flags_rsp), 1) expect_equal(length(flags_rsp[[1]]), 1) expect_true(inherits(flags_rsp[[1]], "character")) expect_equal(names(flags_rsp), tax_rsp[1]) }) ############################################################################ ## ott_id method ## ############################################################################ context("ott_id method for class match_names") if (identical(Sys.getenv("NOT_CRAN"), "true")) { tax_rsp <- c("Tyrannosaurus", "Helicoplacus", "Ctenocystis", "Holothuria", "Echinoidea") rsp <- tnrs_match_names(tax_rsp) } test_that("ott_id with no arguments", { skip_on_cran() expect_true(inherits(ott_id(rsp), "list")) expect_true(inherits(ott_id(rsp), "otl_ott_id")) expect_equal(names(ott_id(rsp)), tax_rsp) expect_equal(ott_id(rsp)[["Holothuria"]][[1]], 5004030) }) test_that("ott_id with row number", { skip_on_cran() expect_equal(length(ott_id(rsp, 4)), 1) expect_true(inherits(ott_id(rsp, 4), "list")) expect_equivalent(ott_id(rsp, 4)[[1]], 5004030) }) test_that("ott_id with taxon name", { skip_on_cran() expect_equal(length(ott_id(rsp, taxon_name = "Holothuria")), 1) expect_true(inherits(ott_id(rsp, taxon_name = "Holothuria"), "list")) expect_equivalent(ott_id(rsp, taxon_name = "Holothuria")[[1]], 5004030) }) test_that("ott_id with ott id", { skip_on_cran() expect_equal(length(ott_id(rsp, ott_id=5004030)), 1) expect_true(inherits(ott_id(rsp, ott_id=5004030), "list")) expect_equivalent(ott_id(rsp, ott_id=5004030)[[1]], 5004030) }) rotl/tests/testthat/test-api-tol.R0000644000177500001440000000444112707503326017231 0ustar deepayanuserscontext("Tree of Life API") ############################################################################ ## .tol_about ## ############################################################################ test_that("include_source_list is logical for .tol_about", { skip_on_cran() expect_error(.tol_about("true"), "logical") }) ############################################################################ ## .tol_mrca ## ############################################################################ test_that("neither ott_ids nor node_ids are NULL for .tol_mrca", { skip_on_cran() expect_error(.tol_mrca(NULL), "Must provide") }) ############################################################################ ## .tol_subtree ## ############################################################################ test_that("ott_id is not NULL", { skip_on_cran() expect_error(.tol_subtree(ott_id = NULL, node_id = NULL), "Must provide") }) ############################################################################ ## .tol_induced_subtree ## ############################################################################ test_that("ott_ids is not NULL", { skip_on_cran() expect_error(.tol_induced_subtree(ott_ids = NULL), "Must provide") }) test_that("NAs are not accepted for ott_ids", { skip_on_cran() expect_error(.tol_induced_subtree(ott_ids = c(123, NA, 456)), "NAs are not allowed") }) #################### ## .tol_node_info ## #################### test_that("include_lineage must be logical with .tol_node_info", { skip_on_cran() expect_error(.tol_node_info(ott_id = "ott_123", include_lineage = "123"), "logical") }) test_that("ott_id must be a numeric with .tol_node_info", { skip_on_cran() expect_error(.tol_node_info(ott_id = "test"), "look like numbers") }) test_that("node_id must be a character with .tol_node_info", { skip_on_cran() expect_error(.tol_node_info(node_id = 123), "must look like") }) rotl/tests/testthat/test-API.R0000644000177500001440000001410312705157664016301 0ustar deepayanusers#### ## Making use of the shared OpenTree testing architecture #### ## The R, Python and Ruby wrappers for the Open Tree share a very similar design, ## allowing them to make use of a single test suite for the low-level functions ## (thus, the tests both checkvan individual library works as expected, and that ## the different libraries stay in line with each other). ## ## This file pulls the current version of the test from a github repo ## (https://github.com/OpenTreeOfLife/shared-api-tests) and translates the json ## files into tests that run in testthat. This takes a considerable amount of ## infrastructure so I'll briefly described the rational here. ## ## The JSON test-specificaton is defined at the github repo linked above, to ## translate these tests I have created custom testthat expectation-functionals ## (contains(), (key_has_value()... ). Because many of the test blocks in the ## JSON files have multiple expectiatoins (i.e. many key-value pairs for ## test_equals) there are functions starting with `test_` that run an entire ## test block for a given expectation. Since many of these tests require ## translation between R-objects and JSON encoded strings there is a set of ## convienence functions to automate that step and a function "test_map" that ## returns the appropriate test_* function for r given JSON test block. ## ## Finally, testthat_json_test uses the above functions to runs an entire test ## from a JSON object, and run_shared_tests() runs every tests in a JSON file. #functionals that start with a response contains <- function(key_name){ function(x){ expectation(key_name %in% names(x), sprintf("Missing key name: %s", key_name)) } } key_has_value <- function(key, value){ function(x){ if(length(value) == 0){ expectation(length(x[[key]]) == 0, paste("Key", key, "is not empty")) } else if(length(value)==1){ expectation(x[[key]] == value, paste("Key", key, "doesn't have value", value)) } else{ expectation(all(x[[key]] %in% value), paste("Key", key, "doesn't contain all of", value)) } } } value_is_longer_than <- function(key, len){ function(x){ expectation(length(x[[key]]) > len, paste("Value for key", key, "is shorter than", len)) } } value_is_error <- function(key_name){ function(x){ expectation(x[[key_name]] == 'error', sprintf("Key %s is not 'error'",key_name)) } } ## Functions to test entire test blocks with the above expectations test_contains <- function(response, test_block){ key_names <- test_block[,1] sapply(key_names, function(k) expect_that(response, contains(k))) } test_equals <- function(response, test_block){ kv_pairs <- sapply(test_block, "[[", 1) for(i in 1:length(kv_pairs)){ expect_that(response, key_has_value(kv_pairs[[1]], kv_pairs[[2]])) } } test_of_type <- function(response, test_block){ rtype <- type_map(test_block[[1]]) expect_that(response, is_a(rtype)) } test_deep_equals <- function(response, test_block){ cat("*") expect_true(TRUE) } test_length_greater_than <- function(response, test_block){ vl_pairs <- sapply(test_block, "[[", 1) apply(vl_pairs, 2, function(v) expect_that(response, value_is_longer_than(v[[1]], v[[2]]))) } test_contains_error <- function(response, test_block){ errs <- test_block[,1] sapply(errs, function(e) expect_that(reponse, contains_error(e))) } ##convience functions obj_map <- function(input){ if(is.character(input) & length(input)==1){ switch(tolower(input), "true" = TRUE, "false" = FALSE, "null" = NULL, input) } else{ input } } json_to_r <- function(test_input){ if(length(test_input) == 0){ return(test_input) } return(lapply(test_input, obj_map)) } type_map <- function(json_type){ switch(json_type, "dict" = "list", stop(sprintf("unknown json type in testing file: %s", json_type)) ) } test_map <- function(test_type){ switch(test_type, "contains" = test_contains, "equals" = test_equals, "deep_equals" = test_deep_equals, "error" = stop("Error tests should be handled first"), "length_greater_than" = test_length_greater_than, "of_type" = test_of_type, stop(sprintf("Unkown error type in JSON test: %s", test_type)) ) } make_request <- function(json_test){ test_fxn <- paste0(".", json_test$test_function) do.call(what=test_fxn, args=json_to_r(json_test$test_input)) } testthat_json_test <- function(test_obj, test_name){ tests_to_run <- names(test_obj[[test_name]]$tests) if(length(tests_to_run)==1){ if( grepl("error", tests_to_run)){ expect_error( make_request(test_obj[[test_name]]) ) } } else{ response <- make_request(test_obj[[test_name]]) for(i in 1:length(tests_to_run)){ test_block <- test_obj[[test_name]]$tests[[ tests_to_run[i] ]] test_fxn <- test_map(tests_to_run[i]) test_fxn(response, test_block) } } } run_shared_test <- function(json_obj){ all_tests <- names(json_obj) for(i in 1:length(all_tests)) { test_that(all_tests[i], { skip_on_cran() testthat_json_test(json_obj, all_tests[i]) }) } } ## if (identical(Sys.getenv("NOT_CRAN"), "true")) { ## base_url <- "https://raw.githubusercontent.com/OpenTreeOfLife/shared-api-tests/master/" ## apis <- c("graph_of_life", ## "studies", ## "taxonomy", ## "tree_of_life", ## "tnrs" ## ) ## for(i in 1:length(apis)){ ## context( paste(apis[i], "API") ) ## test_text <- httr::GET(paste0(base_url, apis[i], ".json")) ## test_description <- jsonlite::fromJSON(httr::content(test_text)) ## run_shared_test(test_description) ## } ## } rotl/tests/testthat/test-base.R0000644000177500001440000000340612705157664016606 0ustar deepayanuserscontext("base functions") test_that("otl_url returns the correct strings", { skip_on_cran() expect_match(otl_url(dev = TRUE), "^https://devapi.opentreeoflife.org$") expect_match(otl_url(dev = FALSE), "^https://api.opentreeoflife.org$") }) test_that("otl_version", { skip_on_cran() expect_equal(otl_version(), "v3") expect_equal(otl_version("foobar"), "foobar") }) test_that("otl_ottid_from_label", { skip_on_cran() expect_equal(otl_ottid_from_label("flkdjfs_ott314343"), 314343) }) test_that("errors that would otherwise not get caught in phylo_from_otl", { expect_error(phylo_from_otl(list(something = "((A, B), C);")), "Cannot find tree") expect_error(phylo_from_otl(999), "I don't know how to deal with this format") }) ############################################################################ ## check_numeric ## ############################################################################ test_that("check_numeric works on integer", { expect_true(check_numeric("123")) expect_true(check_numeric(123)) expect_true(check_numeric(123L)) expect_true(check_numeric(list(123))) }) test_that("check_numeric fails if there are characters", { expect_false(check_numeric("A123")) expect_false(check_numeric("1A23")) expect_false(check_numeric("123A")) expect_false(check_numeric("12-3")) }) test_that("check_numeric fails with more exotic types", { expect_false(check_numeric(NA)) expect_false(check_numeric(TRUE)) expect_false(check_numeric(1.23)) expect_false(check_numeric(0.9999999999999)) }) test_that("check_numeric fails if more than 1 element provided", expect_error(check_numeric(c(1, 2)))) rotl/tests/testthat/test-tree_to_labels.R0000644000177500001440000000432412541354226020646 0ustar deepayanuserscontext("test tree_to_labels") test_that("basic tree 1", { tree1 <- "((raccon:19.19959,bear:6.80041)InnerNode1:0.84600,((sea_lion:11.99700,seal:12.00300)InnerNode2:7.52973,((monkey:100.85930,cat:47.14069):20.59201,weasel:18.87953):2.09460):3.87382,dog:25.46154);" res_tree1 <- tree_to_labels(tree1) expect_equal(res_tree1$tip_label, c("raccon", "bear", "sea_lion", "seal", "monkey", "cat", "weasel", "dog")) expect_equal(res_tree1$edge_label, c("InnerNode1", "InnerNode2")) }) test_that("basic tree 2", { tree2 <- "(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460):0.10;" res_tree2 <- tree_to_labels(tree2) expect_equal(res_tree2$tip_label, c("Bovine", "Gibbon", "Orang", "Gorilla", "Chimp", "Human", "Mouse")) expect_equal(res_tree2$edge_label, character(0)) }) test_that("basic tree 3", { tree3 <- "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);" res_tree3 <- tree_to_labels(tree3) expect_equal(res_tree3$tip_label, c("Bovine", "Hylobates", "Pongo", "G._Gorilla", "P._paniscus", "H._sapiens", "Rodent")) expect_equal(res_tree3$edge_label, character(0)) }) test_that("only 1 tip", { tree_tip <- "A;" res_tree_tip <- tree_to_labels(tree_tip) expect_equal(res_tree_tip$tip_label, "A") expect_equal(res_tree_tip$edge_label, character(0)) }) test_that("only 1 tip with parentheses", { tree_tip <- "(A);" res_tree_tip <- tree_to_labels(tree_tip) expect_equal(res_tree_tip$tip_label, "A") expect_equal(res_tree_tip$edge_label, character(0)) }) test_that("only 1 tip and 1 internal", { tree_tip <- "(A)B;" res_tree_tip <- tree_to_labels(tree_tip) expect_equal(res_tree_tip$tip_label, "A") expect_equal(res_tree_tip$edge_label, "B") }) test_that("tree with singletons", { tree_sing <- "(((((A)cats,B)dogs,(C,D)ducks)frogs)animals,E)fungi;" res_tree_sing <- tree_to_labels(tree_sing) expect_equal(res_tree_sing$tip_label, LETTERS[1:5]) expect_equal(res_tree_sing$edge_label, c("cats", "dogs", "ducks", "frogs", "animals", "fungi")) }) rotl/tests/testthat/test-studies.R0000644000177500001440000005136213017051564017345 0ustar deepayanuserscontext("test of studies") ############################################################################ ## studies_properties ## ############################################################################ test_that("studies_properties is a list with 2 elements (if breaks, need to update documentation)", { skip_on_cran() expect_true(all(names(studies_properties() %in% c("tree_properties", "study_properties")))) }) ############################################################################ ## get_study ## ############################################################################ test_that("get_study returns an error when asking for a study that doesn't exist", { skip_on_cran() expect_error(get_study("tt_666666")) }) test_that("get_study generates a phylo object", { skip_on_cran() tr <- get_study("pg_719", object_format = "phylo") expect_true(inherits(tr, "multiPhylo")) expect_equal(length(tr), 3) expect_true(length(tr[[1]]$tip.label) > 1) }) test_that("get_study returns an error if file is specied but file_format is not", { skip_on_cran() expect_error(get_study("pg_719", file = "test"), "must be specified") }) test_that("get_study generates a nexml object", { skip_on_cran() tr <- get_study("pg_719", object_format = "nexml") expect_true(inherits(tr, "nexml")) }) test_that("get_study generates a newick file", { skip_on_cran() ff <- tempfile() tr <- get_study("pg_719", file_format = "newick", file = ff) expect_true(tr) expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study generates a nexus file", { skip_on_cran() ff <- tempfile() tr <- get_study("pg_719", file_format = "nexus", file = ff) expect_true(tr) expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study generates a nexml file", { skip_on_cran() ff <- tempfile() tr <- get_study("pg_719", file_format = "nexml", file = ff) expect_true(tr) expect_true(grepl("^<\\?xml", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study generates a json file", { skip_on_cran() ff <- tempfile() tr <- get_study("pg_719", file_format = "json", file = ff) expect_true(tr) expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE))) }) ############################################################################ ## get_study_tree ## ############################################################################ test_that("get_study_tree returns error when tree doesn't exist", { skip_on_cran() expect_error(get_study_tree("2655", "tree5555")) }) test_that("get_study_tree returns error when study doesn't exist", { skip_on_cran() expect_error(get_study_tree("5555555", "tree555555")) }) test_that("get_study_tree generates nexus file", { skip_on_cran() ff <- tempfile(fileext = ".nex") tt <- get_study_tree("pg_1144", "tree2324", file_format = "nexus", file = ff) expect_true(tt) expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study_tree generates newick file", { skip_on_cran() ff <- tempfile(fileext = ".tre") tt <- get_study_tree("pg_1144", "tree2324", file_format = "newick", file = ff) expect_true(tt) expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study_tree generates json file", { skip_on_cran() ff <- tempfile(fileext = ".json") tt <- get_study_tree("pg_1144", "tree2324", file_format = "json", file = ff) expect_true(tt) expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study_tree returns a phylo object", { skip_on_cran() tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo") expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) }) ### Test types of labels with phylo objects test_that("get_study_tree returns a phylo object and ott_id for tip labels", { skip_on_cran() tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo", tip_label = "ott_id") expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_true(grepl("^[0-9]+$", tt$tip.label[1])) }) test_that("get_study_tree returns a phylo object and ott_taxon_names for tip labels", { skip_on_cran() tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo", tip_label = "ott_taxon_name") expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_true(sum(!grepl("^[A-Za-z]+(_[a-z]+)?$", tt$tip.label)) < 3) }) test_that("get_study_tree returns a phylo object and original labels for tip labels", { skip_on_cran() tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo", tip_label = "original_label") expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_equal(sum(!grepl("^[A-Za-z]+_[a-z]+$", tt$tip.label)), 45) }) ### Test types of labels with files (skipping json for now because there is no good way of doing it) test_that("get_study_tree returns an error if file is given but file format is not", { skip_on_cran() expect_error(get_study_tree(study_id="pg_1144", tree_id="tree2324", file = "test"), "must be specified") }) test_that("get_study_tree returns nexus file and ott_id for tip labels", { skip_on_cran() ff <- tempfile(fileext = ".nex") tt <- get_study_tree("pg_1144", "tree2324", file_format = "nexus", tip_label = "ott_id", file = ff) expect_true(tt) tr <- rncl::read_nexus_phylo(ff) expect_true(length(tr$tip.label) > 1) expect_true(grepl("^[0-9]+$", tr$tip.label[1])) }) test_that("get_study_tree returns a phylo object and ott_taxon_names for tip labels", { skip_on_cran() ff <- tempfile(fileext = ".tre") tt <- get_study_tree("pg_1144", "tree2324", file_format = "newick", tip_label = "ott_taxon_name", file = ff) expect_true(tt) tr <- rncl::read_newick_phylo(ff) expect_true(length(tr$tip.label) > 1) expect_true(sum(!grepl("^[A-Za-z]+(_[a-z]+)?$", tr$tip.label)) < 3) }) ############################################################################ ## get_study_subtree ## ############################################################################ test_that("get_study_subtree returns an error when study_id doesn't exist", { skip_on_cran() expect_error(get_study_subtree("pg_55555", "tree55555", subtree_id = "node555555")) }) test_that("get_study_subtree returns an error when tree_id doesn't exist", { skip_on_cran() expect_error(get_study_subtree("pg_1144", "tree55555", subtree_id = "node555555")) }) test_that("get_study_subtree returns an error when the subtree_id is invalid", { skip_on_cran() expect_error(get_study_subtree("pg_1144", "tree2324", "foobar")) }) test_that("get_study_subtree returns a phylo object", { skip_on_cran() tt <- get_study_subtree("pg_420", "tree522", subtree_id = "ingroup", object_format = "phylo") sub_tt <- get_study_subtree("pg_420", "tree522", subtree_id = "node208580", object_format = "phylo") expect_true(inherits(tt, "phylo")) expect_true(length(tt$tip.label) > 1) expect_true(inherits(sub_tt, "phylo")) expect_true(length(sub_tt$tip.label) > 1) expect_true(length(tt$tip.label) > length(sub_tt$tip.label)) }) test_that("get_study_subtree fails if file name is given but no file format", { skip_on_cran() expect_error(get_study_subtree("pg_420", "tree522", subtree_id = "ingroup", file = "test"), "must be specified") }) test_that("get_study_subtree returns a nexus file", { skip_on_cran() ff <- tempfile(fileext = ".nex") tt <- get_study_subtree("pg_420", "tree522", subtree_id = "ingroup", file_format = "nexus", file = ff) expect_true(tt) expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study_subtree returns a newick file", { skip_on_cran() ff <- tempfile(fileext = ".tre") tt <- get_study_subtree("pg_420", "tree522", subtree_id = "ingroup", file_format = "newick", file = ff) expect_true(tt) expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE))) }) test_that("get_study_subtree can deduplicate labels", { skip_on_cran() expect_warning(get_study_subtree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name', subtree_id = "ingroup", deduplicate = TRUE), "and have been modified") }) test_that("get_study_subtree fails with duplicate labels", { skip_on_cran() expect_error(get_study_subtree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name', subtree_id = "ingroup", deduplicate = FALSE), "has already been encountered") }) ############################################################################ ## get_study_meta ## ############################################################################ if (identical(Sys.getenv("NOT_CRAN"), "true")) { sm <- get_study_meta("pg_719") } test_that("get_study meta returns a study_meta object", { skip_on_cran() expect_true(inherits(sm, "study_meta")) }) test_that("get_tree_ids method for study_meta", { skip_on_cran() expect_equal(get_tree_ids(sm), c("tree1294", "tree1295", "tree1296")) }) test_that("get_publication method for study_meta", { skip_on_cran() expect_equal(attr(get_publication(sm), "DOI"), "http://dx.doi.org/10.1600/036364411X605092") }) test_that("candidate_for_synth method for study_meta", { skip_on_cran() expect_true(candidate_for_synth(sm) %in% get_tree_ids(sm)) }) test_that("get_study_year method for study_meta", { skip_on_cran() expect_equal(get_study_year(sm), 2011) }) ############################################################################ ## tol_about ## ############################################################################ test_that("tol_about returns class tol_summary", { skip_on_cran() expect_true(inherits(tol_about(), "tol_summary")) }) test_that("study_about", { skip_on_cran() ta <- source_list(tol_about(TRUE)) expect_true(inherits(ta, "data.frame")) expect_true(nrow(ta) > 100) expect_equal(names(ta), c("study_id","tree_id", "git_sha")) }) ############################################################################ ## studies_find_studies ## ############################################################################ test_that("single study detailed=TRUE", { skip_on_cran() res <- studies_find_studies(property = "ot:studyId", value = "ot_248", detailed = TRUE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "matched_studies")) expect_true(all(names(res) %in% c("study_ids", "n_trees", "tree_ids", "candidate", "study_year", "title", "study_doi"))) expect_true(nrow(res) >= 1L) expect_equal(res[["study_ids"]], "ot_248") expect_equal(res[["n_trees"]], "1") expect_equal(res[["candidate"]], "Tr76302") expect_equal(res[["study_year"]], "2014") expect_equal(res[["study_doi"]], "http://dx.doi.org/10.1016/j.cub.2014.06.060") expect_equal(res[["title"]], "'Phylogenomic Resolution of the Class Ophiuroidea Unlocks a Global Microfossil Record'") expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) test_that("single study detailed=FALSE", { skip_on_cran() res <- studies_find_studies(property = "ot:studyId", value = "ot_248", detailed = FALSE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "study_ids")) expect_true(inherits(res, "matched_studies")) expect_match(attr(res, "found_trees"), "list of the trees associated") expect_equal(names(res), "study_ids") expect_equal(res[1, 1], "ot_248") expect_equal(nrow(res), 1L) expect_equal(ncol(res), 1L) expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) test_that("multiple studies detailed=TRUE", { skip_on_cran() res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName", value = "mammalia", detailed = TRUE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "matched_studies")) expect_true(all(names(res) %in% c("study_ids", "n_trees", "tree_ids", "candidate", "study_year", "title", "study_doi"))) expect_true(nrow(res) >= 8L) expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) test_that("multiple studies detailed=FALSE", { skip_on_cran() res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName", value = "mammalia", detailed = FALSE) expect_true(inherits(res, "study_ids")) expect_true(inherits(res, "matched_studies")) expect_true(inherits(res, "data.frame")) expect_equal(ncol(res), 1L) expect_true(nrow(res) >= 8) expect_equal(names(res), "study_ids") expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) ############################################################################ ## studies_find_trees ## ############################################################################ test_that("studies_find_trees single study detailed=FALSE", { skip_on_cran() res <- studies_find_trees(property = "ot:studyId", value = "ot_248", detailed = FALSE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "matched_studies")) expect_match(attr(res, "found_trees")[[1]], "Tr76302") expect_equal(names(res), c("study_ids", "n_matched_trees", "match_tree_ids")) expect_equal(res[1, 1], "ot_248") expect_equal(nrow(res), 1L) expect_equal(ncol(res), 3L) expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) test_that("studies_find_trees single study detailed=TRUE", { skip_on_cran() res <- studies_find_trees(property = "ot:studyId", value = "ot_248", detailed = TRUE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "matched_studies")) expect_equal(names(res), c("study_ids", "n_trees", "tree_ids", "candidate", "study_year", "title", "study_doi", "n_matched_trees", "match_tree_ids")) expect_equal(nrow(res), 1L) expect_equal(res[["study_ids"]], "ot_248") expect_equal(res[["n_trees"]], "1") expect_equal(res[["candidate"]], "Tr76302") expect_equal(res[["study_year"]], "2014") expect_equal(res[["study_doi"]], "http://dx.doi.org/10.1016/j.cub.2014.06.060") expect_equal(res[["title"]], "'Phylogenomic Resolution of the Class Ophiuroidea Unlocks a Global Microfossil Record'") expect_equal(res[["tree_ids"]], "Tr76302") expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) test_that("studies_find_trees multiple studies detailed=TRUE", { skip_on_cran() res <- studies_find_trees(property = "ot:ottTaxonName", value = "Echinodermata", detailed = TRUE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "matched_studies")) expect_equal(names(res), c("study_ids", "n_trees", "tree_ids", "candidate", "study_year", "title", "study_doi", "n_matched_trees", "match_tree_ids")) expect_true(nrow(res) >= 5L) expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) test_that("studies_find_trees multiple studies detailed=FALSE", { skip_on_cran() res <- studies_find_trees(property = "ot:ottTaxonName", value = "Echinodermata", detailed = FALSE) expect_true(inherits(res, "data.frame")) expect_true(inherits(res, "matched_studies")) expect_equal(names(res), c("study_ids", "n_matched_trees", "match_tree_ids")) expect_true(nrow(res) >= 5L) expect_true(length(attr(res, "metadata")) > 0) expect_true(length(attr(res, "found_trees")) > 0) }) ############################################################################ ## list_trees ## ############################################################################ test_that("list_trees with studies_find_studies and detailed = FALSE", { skip_on_cran() expect_match(list_trees(studies_find_studies( property = "ot:focalCladeOTTTaxonName", value = "Aves", detailed = FALSE)), "If you want to get a list of the trees associated with the studies") }) test_that("list_trees with studies_find_studies and detailed = TRUE", { skip_on_cran() res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName", value = "mammalia", detailed = TRUE) expect_true(inherits(list_trees(res), "list")) expect_true(length(list_trees(res)) >= 8) expect_true(sum(names(list_trees(res)) %in% c("pg_2647", "ot_308", "pg_2812", "ot_109", "pg_2582", "pg_1428", "ot_755", "pg_2550")) >= 8) }) test_that("list_trees with studies_find_trees and detailed=FALSE", { skip_on_cran() res <- studies_find_trees(property = "ot:ottTaxonName", value = "Echinodermata", detailed = FALSE) lt <- list_trees(res) expect_true(inherits(lt, "list")) expect_true(length(names(lt)) >= 5L) expect_true(all(sapply(lt, length) >= 1L)) }) test_that("list_trees with studies_find_trees and detailed=TRUE", { skip_on_cran() res <- studies_find_trees(property = "ot:ottTaxonName", value = "Echinodermata", detailed = TRUE) lt <- list_trees(res) expect_true(inherits(lt, "list")) expect_true(length(names(lt)) >= 5L) expect_true(all(sapply(lt, length) >= 1L)) }) rotl/tests/tree_of_life.json0000644000177500001440000000705612705157664016256 0ustar deepayanusers{ "test_mrca_normal_input": { "test_function": "tol_mrca", "test_input": {"ott_ids":[412129, 536234]}, "tests": { "of_type": ["dict","Response is of wrong type"] , "equals": [ [["nearest_taxon_mrca_rank","'superorder'"],"Fails that nearest_taxon_mrca_rank contains superorder"] ], "contains": [ ["nearest_taxon_mrca_ott_id","Doesn't contain nearest_taxon_mrca_ott_id"] ] } }, "test_mrca_empty_list_input": { "test_function": "tol_mrca", "test_input": {"ott_ids":[]}, "tests": { "error": [ ["ValueError","Return wrong kind of error, or did return error"] ] } }, "test_mrca_empty_list_input_two": { "test_function": "tol_mrca", "test_input": {"ott_ids":[], "node_ids":[]}, "tests": { "error": [ ["ValueError","Return wrong kind of error, or did return error"] ] } }, "test_mrca_non_existing_node": { "test_function": "tol_mrca", "test_input": {"ott_ids":[4259824365942365972436598732]}, "tests": { "error": [ ["OpenTreeService.OpenTreeError","Return wrong kind of error, or did return error"] ] } }, "test_mrca_non_existing_empty": { "test_function": "tol_mrca", "test_input": "null", "tests": { "error": [ ["ValueError","Return wrong kind of error, or did return error"] ] } }, "test_subtree_demo": { "test_function": "tol_subtree", "test_input": {"ott_id":3599390}, "tests": { "of_type": ["dict","Response is of wrong type"] , "contains": [ ["newick","Doesn't contain a newick string"] ] } }, "test_subtree_null": { "test_function": "tol_subtree", "test_input": {}, "tests": { "error": [ ["ValueError","Return wrong kind of error, or did return error"] ], "of_type": ["jfdsm"] } }, "test_induced_tree_good": { "test_function": "tol_induced_tree", "test_input": {"ott_ids":[292466, 501678, 267845, 666104, 316878, 102710, 176458]}, "tests": { "of_type": ["dict","Response is of wrong type"] , "contains": [ ["subtree","Doesn't contain a subtree string"] ] } }, "test_induced_tree_null": { "test_function": "tol_induced_tree", "test_input": {}, "tests": { "error": [ ["ValueError","Return wrong kind of error, or did return error"] ], "of_type": ["something"] } }, "test_about": { "test_function": "tol_about", "test_input": {}, "tests": { "contains": [ ["root_taxon_name","Output doesn't contain root_taxon_name"], ["num_source_studies","Output doesn't contain num_source_studies"], ["taxonomy_version","Output doesn't contain taxonomy_version"], ["root_ott_id","Output doesn't contain root_ott_id"], ["num_tips","Output doesn't contain num_tips"] ] } } } rotl/tests/test-all.R0000644000177500001440000000005112650461700014561 0ustar deepayanusers### library(testthat) test_check('rotl') rotl/NAMESPACE0000644000177500001440000000625613056072255013007 0ustar deepayanusers# Generated by roxygen2: do not edit by hand S3method("[",otl_ott_id) S3method(candidate_for_synth,study_meta) S3method(flags,match_names) S3method(flags,taxon_info) S3method(flags,taxon_mrca) S3method(get_publication,study_meta) S3method(get_study_year,study_meta) S3method(get_tree_ids,study_meta) S3method(inspect,match_names) S3method(is_in_tree,otl_ott_id) S3method(is_suppressed,match_names) S3method(is_suppressed,taxon_info) S3method(is_suppressed,taxon_mrca) S3method(list_trees,matched_studies) S3method(ott_id,match_names) S3method(ott_id,taxon_info) S3method(ott_id,taxon_mrca) S3method(ott_id,tol_mrca) S3method(ott_id,tol_node) S3method(ott_id,tol_summary) S3method(print,study_external_data) S3method(print,study_ids) S3method(print,study_meta) S3method(print,tnrs_contexts) S3method(print,tol_mrca) S3method(print,tol_node) S3method(print,tol_summary) S3method(source_list,tol_mrca) S3method(source_list,tol_node) S3method(source_list,tol_summary) S3method(synonyms,match_names) S3method(synonyms,taxon_info) S3method(tax_lineage,taxon_info) S3method(tax_lineage,tol_node) S3method(tax_name,match_names) S3method(tax_name,taxon_info) S3method(tax_name,taxon_mrca) S3method(tax_name,tol_mrca) S3method(tax_name,tol_node) S3method(tax_name,tol_summary) S3method(tax_rank,match_names) S3method(tax_rank,taxon_info) S3method(tax_rank,taxon_mrca) S3method(tax_rank,tol_mrca) S3method(tax_rank,tol_node) S3method(tax_rank,tol_summary) S3method(tax_sources,match_names) S3method(tax_sources,taxon_info) S3method(tax_sources,taxon_mrca) S3method(tax_sources,tol_mrca) S3method(tax_sources,tol_node) S3method(tax_sources,tol_summary) S3method(tol_lineage,tol_node) S3method(unique_name,match_names) S3method(unique_name,taxon_info) S3method(unique_name,taxon_mrca) S3method(unique_name,tol_mrca) S3method(unique_name,tol_node) S3method(unique_name,tol_summary) S3method(update,match_names) export(candidate_for_synth) export(flags) export(get_publication) export(get_study) export(get_study_meta) export(get_study_subtree) export(get_study_tree) export(get_study_year) export(get_tree_ids) export(inspect) export(is_in_tree) export(is_suppressed) export(list_trees) export(ott_id) export(source_list) export(strip_ott_ids) export(studies_find_studies) export(studies_find_trees) export(studies_properties) export(study_external_IDs) export(synonyms) export(tax_lineage) export(tax_name) export(tax_rank) export(tax_sources) export(taxon_external_IDs) export(taxonomy_about) export(taxonomy_mrca) export(taxonomy_subtree) export(taxonomy_taxon_info) export(tnrs_contexts) export(tnrs_infer_context) export(tnrs_match_names) export(tol_about) export(tol_induced_subtree) export(tol_lineage) export(tol_mrca) export(tol_node_info) export(tol_subtree) export(unique_name) import(ape) importFrom(assertthat,assert_that) importFrom(assertthat,is.flag) importFrom(assertthat,is.string) importFrom(httr,GET) importFrom(httr,POST) importFrom(httr,content) importFrom(httr,parse_url) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) importFrom(jsonlite,unbox) importFrom(rentrez,entrez_link) importFrom(rentrez,entrez_search) importFrom(rncl,read_newick_phylo) importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(stats,update) rotl/NEWS.md0000644000177500001440000001136213056075023012654 0ustar deepayanusers## rotl 3.0.2 ### New features * The function `get_study_subtree` gains the argument `tip_label` to control the formatting of the tip labels, #90, reported by @bomeara * The new function `is_in_tree` takes a list of OTT ids (i.e., the output of `ott_id()`), and returns a vector of logical indiicating whether they are included in the synthetic tree (workaround #31). ### Bug fixes * The function `get_study_subtree` ignored the argument `subtree_id`, #89 reported by @bomeara ### Other chaanges * `citation("rotl")` now includes the reference to the Open Tree of Life publication. * The "How to use rotl?" vignette was updated to document the behavior of v3 of the OTL API which returns an HTTP error code 400 when the request for induced subtree includes taxa that are not in the synthetic tree (fix #84) ## rotl 3.0.1 * Fix tests and vignette to reflect changes accompanying release 6.1 of the synthetic tree * Add section in vignette "How to use rotl?" about how to get the higher taxonomy from a given taxon. * Add `CITATION` file with MEE manuscript information (#82) ## rotl 3.0.0 * `rotl` now interacts with v3.0 of the Open Tree of Life APIs. The documentation has been updated to reflect the associated changes. More information about the v3.0 of the Open Tree of Life APIs can be found [on their wiki](https://github.com/OpenTreeOfLife/germinator/wiki/Open-Tree-of-Life-Web-APIs). ### New features * New methods: `tax_sources`, `is_suppressed`, `tax_rank`, `unique_name`, `name`, `ott_id`, for objects returned by `tnrs_match_names()`, `taxonomy_taxon_info()`, `taxonomy_taxon_mrca()`, `tol_node_info()`, `tol_about()`, and `tol_mrca()`. Each of these methods have their own class. * New method `tax_lineage()` to extract the higher taxonomy from an object returned by `taxonomy_taxon_info()` (initally suggested by Matt Pennell, #57). * New method `tol_lineage()` to extract the nodes towards the root of the tree. * New print methods for `tol_node_info()` and `tol_mrca()`. * New functions `study_external_IDs()` and `taxon_external_IDs()` that return the external identifiers for a study and associated trees (e.g., DOI, TreeBase ID); and the identifiers of taxon names in taxonomic databases. The vignette "Data mashup" includes an example on how to use it. * The function `strip_ott_id()` gains the argument `remove_underscores` to remove underscores from tips in trees returned by OTL. ### Changes * Rename method `ott_taxon_name()` to `tax_name()` for consistency. * Rename method `synth_sources()` and `study_list()` to `source_list()`. * Refactor how result of query is checked and parsed (invisible to the user). ### Bug fixes * Fix bug in `studies_find_studies()`, the arguments `verbose` and `exact` were ignored. * The argument `only_current` has been dropped for the methods associated with objects returned by `tnrs_match_names()` * The print method for `tnrs_context()` duplicated some names. * `inspect()`, `update()` and `synonyms()` methods for `tnrs_match_names()` did not work if the query included unmatched taxa. ## rotl 0.5.0 * New vignette: `meta-analysis` * Added arguments `include_lineage` and `list_terminal_descendants` to `taxonomy_taxon()` * Improve warning and format of the result if one of the taxa requested doesn't match anything `tnrs_match_names`. * In the data frame returned by `tnrs_match_names`, the columns `approximate_match`, `is_synonym` and `is_deprecated` are now `logical` (instead of `character`) [issue #54] * New utility function `strip_ott_ids` removes OTT id information from a character vector, making it easier to match tip labels in trees returned by `tol_induced_subtree` to taxonomic names in other data sources. This function can also remove underscores from the taxon names. * New method `list_trees` returns a list of tree ids associated with studies. The function takes the output of `studies_find_studies` or `studies_find_trees`. * `studies_find_studies` and `studies_find_trees` gain argument `detailed` (default set to `TRUE`), that produces a data frame summarizing information (title of the study, year of publication, DOI, ids of associated trees, ...) about the studies matching the search criteria. * `get_study_tree` gains argument `deduplicate`. When `TRUE`, if the tree returned for a given study contains duplicated tip labels, they will be made unique before being parsed by NCL by appending a suffix (`_1`, `_2`, `_3`, etc.). (#46, reported by @bomeara) * New method `get_study_year` for objects of class `study_meta` that returns the year of publication of the study. * A more robust approach is used by `get_tree_ids` to identify the tree ids in the metadata returned by the API ## rotl 0.4.1 * Initial CRAN release on July, 24th 2015 rotl/R/0000755000177500001440000000000013056407503011756 5ustar deepayanusersrotl/R/studies-utils.R0000644000177500001440000000510212770010460014707 0ustar deepayanusers## Unexported function that generates a data frame summarizing the metadata. ## This function is used by both studies_find_studies and studies_find_trees, ## to generate the output when using the argument detailed=TRUE ##' @importFrom stats setNames summarize_meta <- function(study_ids) { fill <- function(x) { if (length(unlist(x))) { x } else { "" } } meta_raw <- lapply(study_ids, function(x) get_study_meta(x)) ## Extract the metadata meta <- lapply(meta_raw, function(m) { c(tree_ids = fill(list(get_tree_ids(m))), study_year = fill(get_study_year(m)), publication = fill(get_publication(m)), doi = fill(attr(get_publication(m), "DOI")), candidate = fill(list(candidate_for_synth(m))) ) }) ## Convert into a data frame dat <- lapply(meta, function(m) { c(n_trees = length(m[["tree_ids"]]), tree_ids = limit_trees(m[["tree_ids"]]), candidate = paste(m[["candidate"]], collapse = ", "), study_year = m[["study_year"]], title = fill(extract_title(m[["publication"]])), study_doi = m[["doi"]]) }) dat <- do.call("rbind", dat) dat <- cbind(study_ids = study_ids, dat) rownames(dat) <- NULL dat <- data.frame(dat, stringsAsFactors = FALSE) ## Add list of found trees as attributes found_trees <- lapply(meta, function(m) { m[["tree_ids"]] }) found_trees <- stats::setNames(found_trees, study_ids) attr(dat, "found_trees") <- found_trees attr(dat, "metadata") <- meta_raw dat } ## Unexported function that attempts to extract title from the ## citation information associated with the study information. The ## function gets the element that follows what looks like a year in ## the string. ## pub_orig: the publication string extracted from the study metadata ## split_char: the character on which the bibliographic elements are ## separated with. (currently only deals with . and ,) extract_title <- function(pub_orig, split_char = "\\.") { pub <- unlist(strsplit(pub_orig, split = split_char)) pub <- gsub("^\\s|\\s$", "", pub) which_year <- grep("^\\d{4}[a-z]?$", pub) res <- pub[which_year + 1] if (length(res) > 0) return(res) else if (split_char == ",") { return(character(0)) } else { extract_title(pub_orig, ",") } } ## Unexported function that limit the display of tree_ids to the first ## 5 values. limit_trees <- function(x) { if (length(x) > 5) x <- c(x[1:5], "...") paste(x, collapse = ", ") } rotl/R/tnrs.R0000644000177500001440000002355413056070101013066 0ustar deepayanusers ##' Match taxonomic names to the Open Tree Taxonomy. ##' ##' Accepts one or more taxonomic names and returns information about ##' potential matches for these names to known taxa in the Open Tree ##' Taxononmy. ##' ##' This service uses taxonomic contexts to disambiguate homonyms and ##' misspelled names; a context may be specified using the ##' \code{context_name} argument. If no context is specified, then the ##' context will be inferred (i.e., the shallowest taxonomic context ##' that contains all unambiguous names in the input). Taxonomic ##' contexts are uncontested higher taxa that have been selected to ##' allow limits to be applied to the scope of TNRS searches ##' (e.g. 'match names only within flowering plants'). Once a context ##' has been identified (either user-specified or inferred), all taxon ##' name matches will performed only against taxa within that ##' context. For a list of available taxonomic contexts, see ##' \code{\link{tnrs_contexts}}. ##' ##' A name is considered unambiguous if it is not a synonym and has ##' only one exact match to any taxon name in the entire taxonomy. ##' ##' Several functions listed in the \sQuote{See also} section can be ##' used to inspect and manipulate the object generated by this ##' function. ##' ##' ##' @title Match names to the Open Tree Taxonomy ##' @param names taxon names to be queried. Currently limited to ##' 10,000 names for exact matches and 2,500 names for approximate ##' matches (character vector) ##' @param context_name name of the taxonomic context to be searched ##' (length-one character vector). Must match (case sensitive) one ##' of the values returned by \code{\link{tnrs_contexts}}. ##' @param do_approximate_matching A logical indicating whether or not ##' to perform approximate string (a.k.a. \dQuote{fuzzy}) ##' matching. Using \code{FALSE} will greatly improve ##' speed. Default, however, is \code{TRUE}. ##' @param ids A vector of ids to use for identifying names. These ##' will be assigned to each name in the names array. If ids is ##' provided, then ids and names must be identical in length. ##' @param include_suppressed Ordinarily, some quasi-taxa, such as ##' incertae sedis buckets and other non-OTUs, are suppressed from ##' TNRS results. If this parameter is true, these quasi-taxa are ##' allowed as possible TNRS results. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return A data frame summarizing the results of the query. The ##' original query output is appended as an attribute to the ##' returned object (and can be obtained using \code{attr(object, ##' "original_response")}). ##' @seealso \code{\link{inspect.match_names}}, ##' \code{\link{update.match_names}}, ##' \code{\link{synonyms.match_names}}. ##' @examples \dontrun{ ##' deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha", ##' "chordata", "hemichordata")) ##' } ##' @importFrom stats setNames ##' @export tnrs_match_names <- function(names = NULL, context_name = NULL, do_approximate_matching = TRUE, ids = NULL, include_suppressed = FALSE, ...) { if (!is.null(context_name) && !context_name %in% unlist(tnrs_contexts(...))) { stop("The ", sQuote("context_name"), " is not valid. Check possible values using tnrs_contexts()") } res <- .tnrs_match_names(names = names, context_name = context_name, do_approximate_matching = do_approximate_matching, ids = ids, include_suppressed = include_suppressed, ...) check_tnrs(res) summary_match <- build_summary_match(res, res_id = seq_along(res[["results"]]), match_id = 1, initial_creation = TRUE) summary_match$search_string <- gsub("\\\\", "", summary_match$search_string) summary_match <- summary_match[match(tolower(names), summary_match$search_string), ] summary_match[["approximate_match"]] <- convert_to_logical(summary_match[["approximate_match"]]) summary_match[["is_synonym"]] <- convert_to_logical(summary_match[["is_synonym"]]) summary_match[["flags"]] <- convert_to_logical(summary_match[["flags"]]) attr(summary_match, "original_order") <- as.numeric(rownames(summary_match)) rownames(summary_match) <- NULL attr(summary_match, "original_response") <- res attr(summary_match, "match_id") <- rep(1, nrow(summary_match)) attr(summary_match, "has_original_match") <- !is.na(summary_match[["number_matches"]]) class(summary_match) <- c("match_names", "data.frame") summary_match } ##' @importFrom stats na.omit convert_to_logical <- function(x) { if (all(stats::na.omit(x) %in% c("TRUE", "FALSE"))) { x <- as.logical(x) } else { x } } check_tnrs <- function(req) { if (length(req$results) < 1) { stop("No matches for any of the provided taxa") } if (length(req[["unmatched_names"]]) > 0) { warning(paste(req$unmatched_names, collapse=", "), " are not matched") } } tnrs_columns <- list( "search_string" = function(x) x[["search_string"]], "unique_name" = function(x) .tax_unique_name(x[["taxon"]]), "approximate_match" = function(x) x[["is_approximate_match"]], "ott_id" = function(x) .tax_ott_id(x[["taxon"]]), "is_synonym" = function(x) x[["is_synonym"]], "flags" = function(x) paste(.tax_flags(x[["taxon"]]), collapse = ", ") ) summary_row_factory <- function(res, res_id, match_id, columns = tnrs_columns) { res_address <- res[["results"]][[res_id]][["matches"]][[match_id]] ret <- sapply(columns, function(f) f(res_address)) n_match <- length(res[["results"]][[res_id]][["matches"]]) c(ret, number_matches = n_match) } build_summary_match <- function(res, res_id, match_id = NULL, initial_creation) { build_empty_row <- function(x) { no_match_row <- stats::setNames( rep(NA, length(tnrs_columns) + 1), c(names(tnrs_columns), "number_matches")) no_match_row[1] <- x no_match_row } if (length(res_id) > 1 && (!is.null(match_id) && length(match_id) > 1)) { stop("Something is wrong. Please contact us.") } build_summary_row <- function(rid) { if (is.null(match_id)) { match_id <- seq_len(length(res[["results"]][[rid]][["matches"]])) } res <- lapply(match_id, function(mid) { summary_row_factory(res, rid, mid) }) if (identical(length(match_id), 1L)) { unlist(res) } else res } summary_row <- lapply(res_id, build_summary_row) if (identical(length(res_id), 1L)) { summary_row <- unlist(summary_row, recursive = FALSE) } ## Needed if only 1 row returned if (!inherits(summary_row, "list")) { summary_row <- list(summary_row) } ## Add potential unmatched names if (initial_creation && length(res[["unmatched_names"]])) { no_match <- lapply(res[["unmatched_names"]], build_empty_row) summary_row <- c(summary_row, no_match) } summary_match <- do.call("rbind", summary_row) summary_match <- data.frame(summary_match, stringsAsFactors=FALSE) names(summary_match) <- c(names(tnrs_columns), "number_matches") summary_match } ##' This function returns a list of pre-defined taxonomic contexts ##' (i.e. clades) which can be used to limit the scope of tnrs ##' queries. ##' ##' Taxonomic contexts are available to limit the scope of TNRS ##' searches. These contexts correspond to uncontested higher taxa ##' such as 'Animals' or 'Land plants'. This service returns a list ##' containing all available taxonomic context names, which may be ##' used as input (via the \code{context_name} argument in other ##' functions) to limit the search scope of other services including ##' \code{\link{tnrs_match_names}}. ##' @title TNRS contexts ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return Returns invisibly a list for each major clades (e.g., ##' animals, microbes, plants, fungi, life) whose elements ##' contains the possible contexts. ##' @export tnrs_contexts <- function(...) { res <- .tnrs_contexts(...) class(res) <- "tnrs_contexts" res } ##' @export print.tnrs_contexts <- function(x, ...) { cat("Possible contexts:\n") lapply(x, function(t) { res <- unlist(t) cat(" ", res[1], "\n") if (length(res) > 1) { lapply(seq(2, length(res), by = 5), function(l) { m <- ifelse(l + 5 <= length(res), l+4, length(res)) cat(" ", paste(res[l:m], collapse = ", "), "\n") }) } }) } ##' Return a taxonomic context given a list of taxonomic names ##' ##' Find the least inclusive taxonomic context that includes all the ##' unambiguous names in the input set. Unambiguous names are names ##' with exact matches to non-homonym taxa. Ambiguous names (those ##' without exact matches to non-homonym taxa) are indicated in ##' results. ##' ##' @title Infer the taxonomic context from a list of names ##' @param names Vector of taxon names. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return A list including the context name, the context ott id and ##' possibly the names in the query that have an ambiguous ##' taxonomic meaning in the query. ##' @examples ##' \dontrun{ ##' res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus")) ##' } ##' @export tnrs_infer_context <- function(names=NULL, ...) { res <- .tnrs_infer_context(names = names, ...) return(res) } rotl/R/api-taxonomy.R0000644000177500001440000000456412707502314014535 0ustar deepayanusers##' @importFrom httr content ## Summary information about the OpenTree Taxaonomy (OTT) .taxonomy_about <- function(...) { res <- otl_POST(path="/taxonomy/about", body=list(), ...) res } ##' @importFrom jsonlite unbox ##' @importFrom httr content ##' @importFrom assertthat is.flag ##' @importFrom assertthat assert_that ## Information about an OpenTree Taxonomy (OTT) taxon .taxonomy_taxon_info <- function(ott_id=NULL, include_children = FALSE, include_lineage = FALSE, include_terminal_descendants = FALSE, ...) { ott_id <- check_ott_ids(ott_id) if (length(ott_id) > 1) { stop("Must only supply one ", sQuote("ott_id"), " argument") } assertthat::assert_that(assertthat::is.flag(include_children)) assertthat::assert_that(assertthat::is.flag(include_lineage)) assertthat::assert_that(assertthat::is.flag(include_terminal_descendants)) q <- list(ott_id=jsonlite::unbox(ott_id), include_children = jsonlite::unbox(include_children), include_lineage = jsonlite::unbox(include_lineage), include_terminal_descendants = jsonlite::unbox(include_terminal_descendants)) res <- otl_POST(path="/taxonomy/taxon_info", body=q, ...) res } ##' @importFrom jsonlite unbox ##' @importFrom httr content ## Get a subtree from the OpenTree Taxonomy (OTT) taxonomic tree .taxonomy_subtree <- function(ott_id=NULL, label_format=NULL, ...) { ott_id <- check_ott_ids(ott_id) if (length(ott_id) > 1) { stop("Must only supply one ", sQuote("ott_id"), " argument") } q <- list(ott_id=jsonlite::unbox(ott_id)) if (!is.null(label_format)) { if (!check_label_format(label_format)) { stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ", sQuote("id"), ", or ", sQuote("name_and_id")) } q$label_format <- jsonlite::unbox(label_format) } res <- otl_POST(path="/taxonomy/subtree", body=q, ...) res } ##' @importFrom httr content ## Get the most recent common ancestor (MRCA) from nodes in the OpenTree Taxonomy (OTT) .taxonomy_mrca <- function (ott_ids = NULL, ...) { ott_ids <- check_ott_ids(ott_ids) q <- list(ott_ids=ott_ids) res <- otl_POST(path="/taxonomy/mrca", body=q, ...) res } rotl/R/api-collections.R0000644000177500001440000000177112674100604015171 0ustar deepayanusers##' @importFrom assertthat assert_that is.flag ## This endpoint currently returns JSON in XML with mime type as text/html .collection_find_collections <- function(property = NULL, value = NULL, verbose = FALSE, ...) { assertthat::assert_that(assertthat::is.flag(verbose)) req_body <- list() req_body$verbose <- verbose res <- otl_POST(path = "collections/find_collections", body = req_body, ...) res } .collection_properties <- function(...) { req_body <- list() res <- otl_POST(path = "collections/properties", body = req_body, ...) res } .get_collection <- function(owner_id = NULL, collection_name = NULL, ...) { assertthat::assert_that(assertthat::is.string(owner_id)) assertthat::assert_that(assertthat::is.string(collection_name)) req_body <- list() res <- otl_GET(path = paste("collections", owner_id, collection_name, sep = "/"), ...) res } rotl/R/is_in_tree.R0000644000177500001440000000400413056073352014220 0ustar deepayanusers##' Some valid taxonomic names do not occur in the Synthetic ##' Tree. This convenience function allows you to check whether a ##' given Open Tree Taxonomy identifier (OTT id) is in the tree. A taxonomic ##' name may not occur in the synthetic tree because (1) it is an ##' extinct or invalid taxon, or (2) it is part of a group that is not ##' monophyletic in the tree. ##' ##' @title Check that OTT ids occur in the Synthetic Tree ##' @param ott_ids a vector of Open Tree Taxonomy identifiers ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return A named logical vector. \code{TRUE} indicates that the OTT ##' id is in the synthetic tree, and \code{FALSE} that it is not. ##' @examples ##' \dontrun{ ##' plant_families <- c("Asteraceae", "Solanaceae", "Poaceae", "Amaranthaceae", ##' "Zamiaceae", "Araceae", "Juncaceae") ##' matched_names <- tnrs_match_names(plant_families) ##' ## This fails because some ott ids are not in the tree ##' ## plant_tree <- tol_induced_subtree(ott_id(matched_names)) ##' ## So let's check which ones are actually in the tree first: ##' in_tree <- is_in_tree(ott_id(matched_names)) ##' ## This now works: ##' plant_tree <- tol_induced_subtree(ott_id(matched_names)[in_tree]) ##' } ##' ##' @export is_in_tree <- function(ott_ids, ...) UseMethod("is_in_tree") ##' @export is_in_tree.otl_ott_id <- function(ott_ids, ...) { in_tree <- vapply(ott_ids, function(ottid) { test <- try(tol_node_info(ottid, ...), silent = TRUE) if (inherits(test, "try-error")) { if (grepl("not find any synthetic tree nodes corresponding to the OTT id provided", test) && grepl(paste0("(", ottid, ")"), test)) { } else { warning("something seems off, check your internet connection?") } return(FALSE) } else { ott_id(test)[[1]] == ottid } }, logical(1), USE.NAMES = TRUE) in_tree } rotl/R/tax_utils.R0000644000177500001440000000450213056112635014115 0ustar deepayanusers## all extended-taxon-descriptors have: ## - ott_id ## - name ## - rank ## - unique_name ## - tax_sources ## and they may have ## - flags ## - synonyms ## - is_suppressed ## builds the functions to access the content of the taxon descriptors. ## slot: the name of the list element we need to access ## flatten: if the list element is a list, make it a vector ## optional: is the slot found in all taxon descriptors or only in some tax_access_factory <- function(slot, flatten, optional) { function(tax) { if ((!exists(slot, tax))) { if (optional) { warning("This object doesn't have ", sQuote(slot), call. = FALSE) return(NULL) } else { stop("Invalid taxon object", call. = FALSE) } } else { if (flatten) { unlist(tax[[slot]]) } else { tax[[slot]] } } } } .tax_ott_id <- tax_access_factory("ott_id", flatten = FALSE, optional = FALSE) .tax_name <- tax_access_factory("name", flatten = FALSE, optional = FALSE) .tax_rank <- tax_access_factory("rank", flatten = FALSE, optional = FALSE) .tax_sources <- tax_access_factory("tax_sources", flatten = TRUE, optional = FALSE) .tax_unique_name <- tax_access_factory("unique_name", flatten = FALSE, optional = FALSE) ## optional .tax_flags <- tax_access_factory("flags", flatten = TRUE, optional = TRUE) .tax_is_suppressed <- tax_access_factory("is_suppressed", flatten = FALSE, optional = TRUE) .tax_synonyms <- tax_access_factory("synonyms", flatten = TRUE, optional = TRUE) ## Does the slot element represent a taxon? is_taxon <- function(slot) { if (all(c("ott_id", "name", "rank", "tax_sources", "unique_name") %in% names(slot))) { TRUE } else { FALSE } } ### adds a class to the objects returned by the methods add_otl_class <- function(res, .f) { ## we need a prefix to avoid class name conflict ## apparently the class "name" already exists class(res) <- c(paste0("otl_", as.list(environment(.f))[["slot"]]), class(res)) res } ##' @export `[.otl_ott_id` <- function(x, i, ...) { r <- NextMethod() class(r) <- class(x) r } rotl/R/methods.R0000644000177500001440000000625112770014122013541 0ustar deepayanusers############################################################################ ## methods ## ############################################################################ ##' Methods for dealing with objects containing taxonomic information ##' (Taxonomy, TNRS endpoints) ##' ##' This is the page for the generic methods. See the help pages for ##' \code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, and ##' \code{\link{tnrs_match_names}} for more information. ##' ##' @title Methods for Taxonomy ##' @param tax an object returned by \code{\link{taxonomy_taxon_info}}, ##' \code{\link{taxonomy_mrca}}, or \code{\link{tnrs_match_names}} ##' @param ... additional arguments (see ##' \code{\link{tnrs_match_names}}) ##' @rdname taxonomy-methods ##' @export tax_rank <- function(tax, ...) { UseMethod("tax_rank") } ##' @export ##' @rdname taxonomy-methods ott_id <- function(tax, ...) { UseMethod("ott_id") } ##' @export ##' @rdname taxonomy-methods synonyms <- function(tax, ...) { UseMethod("synonyms") } ##' @export ##' @rdname taxonomy-methods tax_sources <- function(tax, ...) UseMethod("tax_sources") ##' @export ##' @rdname taxonomy-methods is_suppressed <- function(tax, ...) UseMethod("is_suppressed") ##' @export ##' @rdname taxonomy-methods unique_name <- function(tax, ...) UseMethod("unique_name") ##' @export ##' @rdname taxonomy-methods tax_name <- function(tax, ...) UseMethod("tax_name") ### flags ----------------------------------------------------------------------- ##' @export ##' @rdname match_names-methods flags <- function(tax, ...) UseMethod("flags") ### ##' Retrieve the detailed information for the list of studies used in ##' the Tree of Life. ##' ##' @title List of studies used in the Tree of Life ##' ##' @details This function takes the object resulting from ##' \code{tol_about(study_list = TRUE)}, \code{tol_mrca()}, ##' \code{tol_node_info()}, and returns a data frame listing the ##' \code{tree_id}, \code{study_id} and \code{git_sha} for the ##' studies currently included in the Tree of Life. ##' ##' @param tax a list containing a \code{source_id_map} slot. ##' @param ... additional arguments (currently unused) ##' ##' @return a data frame ##' @export source_list <- function(tax, ...) UseMethod("source_list") ##' Extract the lineage information (higher taxonomy) from an object ##' returned by \code{\link{taxonomy_taxon_info}}. ##' ##' The object passed to this function must have been created using ##' the argument \code{include_lineage=TRUE}. ##' ##' @title Lineage of a taxon ##' @param tax an object created by \code{\link{taxonomy_taxon_info}} ##' using the argument \code{include_lineage=TRUE}. ##' @param ... additional arguments (currently unused). ##' @return A list with one slot per taxon that contains a data frame ##' with 3 columns: the taxonomy rank, the name, and unique name ##' for all taxa included in the lineage of the taxon up to the ##' root of the tree. ##' @rdname tax_lineage ##' @export tax_lineage <- function(tax, ...) UseMethod("tax_lineage") ##' @export ##' @rdname tol_node_info tol_lineage <- function(tax, ...) UseMethod("tol_lineage") rotl/R/api-studies.R0000644000177500001440000001355713003747366014351 0ustar deepayanusers##' @importFrom jsonlite unbox ##' @importFrom httr content ## Return a list of studies from the OpenTree docstore that match a given properties .studies_find_studies <- function(property = NULL, value = NULL, verbose = FALSE, exact = FALSE, ...) { if (!is.logical(verbose)) stop("Argument \'verbose\' should be logical") if (!is.logical(exact)) stop("Argument \'exact\' should be logical") req_body <- list() if (!is.null(property)) { if (!is.character(property)) { stop("Argument \'property\' must be of class \"character\"") } req_body$property <- jsonlite::unbox(property) } else { stop("Must supply a \'property\' argument") } if (!is.null(value)) { if (!is.character(value)) { stop("Argument \'value\' must be of class \"character\"") } req_body$value <- jsonlite::unbox(value) } else { stop("Must supply a \'value\' argument") } req_body$verbose <- jsonlite::unbox(verbose) req_body$exact <- jsonlite::unbox(exact) res <- otl_POST(path="studies/find_studies/", body=req_body, ...) res } ##' @importFrom jsonlite unbox ##' @importFrom httr content ## Return a list of trees from the OpenTree docstore that match a given properties .studies_find_trees <- function(property=NULL, value=NULL, verbose=FALSE, exact=FALSE, ...) { if (!is.logical(verbose)) { stop("Argument \'verbose\' must be of class \"logical\"") } if (!is.logical(exact)) { stop("Argument \'exact\' must be of class \"logical\"") } req_body <- list() if (!is.null(property)) { if (!is.character(property)) { stop("Argument \'property\' must be of class \"character\"") } req_body$property <- jsonlite::unbox(property) } else { stop("Must supply a \'property\' argument") } if (!is.null(value)) { if (!is.character(value)) { stop("Argument \'value\' must be of class \"character\"") } req_body$value <- jsonlite::unbox(value) } else { stop("Must supply a \'value\' argument") } res <- otl_POST(path="studies/find_trees/", body=c(req_body, jsonlite::unbox(verbose), jsonlite::unbox(exact)), ...) res } ##' @importFrom httr content ## Return a list of properties that can be used to search studies and trees .studies_properties <- function() { res <- otl_POST(path="studies/properties/", body=list()) res } ##' @importFrom httr content ## Get a study from the OpenTree docstore .get_study <- function(study_id = NULL, format = c("", "nexus", "newick", "nexml", "json"), ...) { if (is.null(study_id)) { stop("Must supply a \'study_id\' argument") } else if (!is.character(study_id)) { stop("Argument \'study_id\' must be of class \"character\"") } format <- match.arg(format) res <- otl_GET(path=paste("study", paste0(study_id, otl_formats(format)), sep="/"), ...) res } ##' @importFrom httr content ## Get a tree in a study from the OpenTree docstore .get_study_tree <- function(study_id=NULL, tree_id=NULL, format=c("json", "newick", "nexus"), tip_label = c("ot:originallabel", "ot:ottid", "ot:otttaxonname"), ...) { if (is.null(study_id)) { stop("Must supply a \'study_id\' argument") } else if (!is.character(study_id)) { stop("Argument \'study_id\' must be of class \"character\"") } if (is.null(tree_id)) { stop("Must supply a \'tree\' argument") } else if (!is.character(tree_id)) { stop("Argument \'tree\' must be of class \"character\"") } format <- match.arg(format) tip_label <- match.arg(tip_label) tip_label <- paste0("/?tip_label=", tip_label) tree_file <- paste0(tree_id, otl_formats(format), tip_label) res <- otl_GET(path=paste("study", study_id, "tree", tree_file, sep="/"), ...) res } ##' @importFrom httr content .get_study_meta <- function(study_id, ...) { otl_GET(path= paste("study", study_id, "meta", sep="/"), ...) } ##' @importFrom httr content .get_study_subtree <- function(study_id, tree_id, subtree_id, format=c("newick", "nexus", "nexml"), tip_label = c("ot:originallabel", "ot:ottid", "ot:otttaxonname"), ...) { if (is.null(study_id)) { stop("Must supply a \'study_id\' argument") } else if (!is.character(study_id)) { stop("Argument \'study_id\' must be of class \"character\"") } if (is.null(tree_id)) { stop("Must supply a \'tree\' argument") } else if (!is.character(tree_id)) { stop("Argument \'tree\' must be of class \"character\"") } if (is.null(subtree_id)) { stop("Must supply a \'subtree\' argument") } else if (!is.character(subtree_id)) { stop("Argument \'subtree\' must be of class \"character\"") } format <- match.arg(format) format <- otl_formats(format) tip_label <- match.arg(tip_label) url_stem <- paste("study", study_id, "subtree", paste0(tree_id, format), sep="/") res <- otl_GET(path=paste0(url_stem, "?subtree_id=", subtree_id, "&tip_label=", tip_label), ...) res } ### Let's not worry about those for now, as their results could be ### obtained using get_study_tree get_study_otu <- function(study_id, otu=NULL, ...) { otl_GET(path=paste("study", study_id, "otu", otu, sep="/"), ...) } get_study_otus <- function(study_id, otus, ...) { otl_GET(path=paste("study", study_id, "otu", otus, sep="/"), ...) } get_study_otumap <- function(study_id, ...) { otl_GET(path=paste("study", study_id,"otumap", sep="/")) } rotl/R/taxonomy.R0000644000177500001440000002506212707504372013770 0ustar deepayanusers##' Summary information about the Open Tree Taxaonomy (OTT) ##' ##' Return metadata and information about the taxonomy ##' itself. Currently, the available metadata is fairly sparse, but ##' includes (at least) the version, and the location from which the ##' complete taxonomy source files can be downloaded. ##' ##' @title Information about the Open Tree Taxonomy ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return A list with the following properties: ##' \itemize{ ##' ##' \item {weburl} {String. The release page for this version ##' of the taxonomy.} ##' ##' \item {author} {String. The author string.} ##' ##' \item {name} {String. The name of the taxonomy.} ##' ##' \item {source} {String. The full identifying information for ##' this version of the taxonomy.} ##' ##' \item {version} {String. The version number of the taxonomy.} ##' } ##' @examples ##' \dontrun{ ##' taxonomy_about() ##' } ##' @export taxonomy_about <- function (...) { res <- .taxonomy_about(...) return(res) } ##' Information about taxa. ##' ##' Given a vector of ott ids, \code{taxonomy_taxon_info} returns ##' information about the specified taxa. ##' ##' The functions \code{tax_rank}, \code{tax_name}, and ##' \code{synonyms} can extract this information from an object ##' created by the \code{taxonomy_taxon_info()}. ##' ##' @title Taxon information ##' @param ott_ids the ott ids of the taxon of interest (numeric or ##' character containing only numbers) ##' @param include_children whether to include information about all ##' the children of this taxon. Default \code{FALSE}. ##' @param include_lineage whether to include information about all ##' the higher level taxa that include the \code{ott_ids}. ##' Default \code{FALSE}. ##' @param include_terminal_descendants whether to include the list of ##' terminal \code{ott_ids} contained in the \code{ott_ids} ##' provided. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @param tax an object generated by the \code{taxonomy_taxon_info} ##' function ##' @return \code{taxonomy_taxon_info} returns a list detailing ##' information about the taxa. \code{tax_rank} and ##' \code{tax_name} return a vector. \code{synonyms} returns a ##' list whose elements are the synonyms for each of the ##' \code{ott_id} requested. ##' ##' @seealso \code{\link{tnrs_match_names}} to obtain \code{ott_id} ##' from a taxonomic name. ##' @examples ##' \dontrun{ ##' req <- taxonomy_taxon_info(ott_id=515698) ##' tax_rank(req) ##' tax_name(req) ##' synonyms(req) ##' } ##' @export taxonomy_taxon_info <- function (ott_ids, include_children = FALSE, include_lineage = FALSE, include_terminal_descendants = FALSE, ...) { res <- lapply(ott_ids, function(x) { .taxonomy_taxon_info( ott_id = x, include_children = include_children, include_lineage = include_lineage, include_terminal_descendants = include_terminal_descendants, ... ) }) names(res) <- ott_ids class(res) <- "taxon_info" return(res) } ##' Given an ott id, return the inclusive taxonomic subtree descended ##' from the specified taxon. ##' ##' If the output of this function is exported to a file, the only ##' possible value for the \code{output_format} argument is ##' \dQuote{\code{newick}}. If the file provided already exists, it ##' will be silently overwritten. ##' ##' @title Taxonomy subtree ##' @param ott_id The ott id of the taxon of interest. ##' @param output_format the format of the object to be returned. See ##' the \sQuote{Return} section. ##' @param label_format Character. Defines the label type; one of ##' \dQuote{\code{name}}, \dQuote{\code{id}}, or ##' \dQuote{\code{name_and_id}} (the default). ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @param file the file name where to save the output of the ##' function. Ignored unless \code{output_format} is set to ##' \dQuote{\code{phylo}}. ##' @return If the \code{file} argument is missing: \itemize{ ##' ##' \item{\dQuote{\code{taxa}}} { a list of the taxa names ##' (species) in slot \code{tip_label}, and higher-level taxanomy ##' (e.g., families, genera) in slot \code{edge_label}, descending ##' from the taxa corresponding to the \code{ott_id} provided. } ##' ##' \item{\dQuote{\code{newick}}} { a character vector containing ##' the newick formatted string corresponding to the taxonomic ##' subtree for the \code{ott_id} provided. } ##' ##' \item{\dQuote{\code{phylo}}} { an object of the class ##' \code{phylo} from the \code{\link[ape]{ape}} package. } ##' ##' \item{\dQuote{\code{raw}}} { the direct output from the API, ##' i.e., a list with an element named \sQuote{newick} that ##' contains the subtree as a newick formatted string. } ##' ##' } ##' ##' If a \code{file} argument is provided (and ##' \code{output_format} is set to \dQuote{\code{phylo}}), a ##' logical indicating whether the file was successfully created. ##' ##' @examples ##' \dontrun{ ##' req <- taxonomy_subtree(ott_id=515698) ##' plot(taxonomy_subtree(ott_id=515698, output_format="phylo")) ##' } ##' @export taxonomy_subtree <- function (ott_id=NULL, output_format = c("taxa", "newick", "phylo", "raw"), label_format=NULL, file, ...) { output_format <- match.arg(output_format) res <- .taxonomy_subtree(ott_id = ott_id, label_format = label_format, ...) if (!missing(file) && !identical(output_format, "newick")) warning(sQuote("file"), " argument is ignored, you can only write newick tree strings to a file.") if (identical(output_format, "raw")) { return(res) } else if (identical(output_format, "newick")) { res <- res$newick if (!missing(file)) { unlink(file) cat(res, file = file) invisible(return(file.exists(file))) } } else if (identical(output_format, "phylo")) { res <- phylo_from_otl(res) } else { ## in all other cases use tree_to_labels res <- tree_to_labels(res) } return(res) } ##' Taxonomic Least Inclusive Common Ancestor (MRCA) ##' ##' Given a set of OTT ids, get the taxon that is the most recent common ##' ancestor (the MRCA) of all the identified taxa. ##' ##' @title Taxonomic MRCA ##' @param ott_ids a vector of ott ids for the taxa whose MRCA is to ##' be found (numeric). ##' @param tax an object generated by the \code{taxonomy_mrca} ##' function ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return \itemize{ ##' ##' \item{\code{taxonomy_mrca}} { returns a list about the ##' taxonomic information relating to the MRCA for the ott_ids ##' provided. } ##' ##' \item{\code{tax_rank}} { returns a character vector of the ##' taxonomic rank for the MRCA. } ##' ##' \item{\code{tax_name}} { returns a character vector the ##' Open Tree Taxonomy name for the MRCA. } ##' ##' \item{\code{ott_id}} { returns a numeric vector of the ott id ##' for the MRCA. } ##' ##' } ##' @examples ##' \dontrun{ ##' req <- taxonomy_mrca(ott_ids=c(515698,590452,643717)) ##' tax_rank(req) ##' tax_name(req) ##' ott_id(req) ##' } ##' @export taxonomy_mrca <- function (ott_ids=NULL, ...) { res <- .taxonomy_mrca(ott_ids = ott_ids, ...) class(res) <- c("taxon_mrca", class(res)) return(res) } ### methods for taxonomy_taxon_info --------------------------------------------- taxon_info_method_factory <- function(.f) { function(tax, ...) { res <- lapply(tax, .f) names(res) <- vapply(tax, .tax_unique_name, character(1)) res <- add_otl_class(res, .f) res } } ##' @export ##' @rdname taxonomy_taxon_info tax_rank.taxon_info <- taxon_info_method_factory(.tax_rank) ##' @export ##' @rdname taxonomy_taxon_info tax_name.taxon_info <- taxon_info_method_factory(.tax_name) ##' @export ##' @rdname taxonomy_taxon_info unique_name.taxon_info <- taxon_info_method_factory(.tax_unique_name) ##' @export ##' @rdname taxonomy_taxon_info synonyms.taxon_info <- taxon_info_method_factory(.tax_synonyms) ##' @export ##' @rdname taxonomy_taxon_info ott_id.taxon_info <- taxon_info_method_factory(.tax_ott_id) ##' @export ##' @rdname taxonomy_taxon_info tax_sources.taxon_info <- taxon_info_method_factory(.tax_sources) ##' @export ##' @rdname taxonomy_taxon_info is_suppressed.taxon_info <- taxon_info_method_factory(.tax_is_suppressed) ##' @export ##' @rdname taxonomy_taxon_info flags.taxon_info <- taxon_info_method_factory(.tax_flags) ### methods for taxonomy_mrca --------------------------------------------------- taxon_mrca_method_factory <- function(.f) { function(tax, ...) { res <- list(.f(tax[["mrca"]])) names(res) <- .tax_unique_name(tax[["mrca"]]) res <- add_otl_class(res, .f) res } } ##' @export ##' @rdname taxonomy_mrca tax_rank.taxon_mrca <- taxon_mrca_method_factory(.tax_rank) ##' @export ##' @rdname taxonomy_mrca tax_name.taxon_mrca <- taxon_mrca_method_factory(.tax_name) ##' @export ##' @rdname taxonomy_mrca ott_id.taxon_mrca <- taxon_mrca_method_factory(.tax_ott_id) ##' @export ##' @rdname taxonomy_mrca unique_name.taxon_mrca <- taxon_mrca_method_factory(.tax_unique_name) ##' @export ##' @rdname taxonomy_mrca tax_sources.taxon_mrca <- taxon_mrca_method_factory(.tax_sources) ##' @export ##' @rdname taxonomy_mrca flags.taxon_mrca <- taxon_mrca_method_factory(.tax_flags) ##' @export ##' @rdname taxonomy_mrca is_suppressed.taxon_mrca <- taxon_mrca_method_factory(.tax_is_suppressed) ### method for extracting higher taxonomy from taxonomy_taxon_info calls ------- get_lineage <- function(tax) { check_lineage(tax) lg <- lapply(tax[["lineage"]], build_lineage) lg <- do.call("rbind", lg) as.data.frame(lg, stringsAsFactors = FALSE) } build_lineage <- function(x) { c("rank" = .tax_rank(x), "name" = .tax_name(x), "unique_name" = .tax_unique_name(x), "ott_id" = .tax_ott_id(x)) } check_lineage <- function(tax) { if (!exists("lineage", tax)) { stop("The object needs to be created using ", sQuote("include_lineage=TRUE")) } } ##' @export ##' @rdname tax_lineage tax_lineage.taxon_info <- function(tax, ...) { lapply(tax, get_lineage) } rotl/R/api-tnrs.R0000644000177500001440000000455512705157664013661 0ustar deepayanusers##' @importFrom jsonlite unbox ##' @importFrom httr content ##' @importFrom assertthat is.string is.flag ## Match taxon names .tnrs_match_names <- function(names=NULL, context_name=NULL, do_approximate_matching=TRUE, ids=NULL, include_suppressed=FALSE, ...) { if (is.null(names)) { stop("You must supply a ", sQuote("names"), " argument") } else if (!is.character(names)) { stop("Argument ", sQuote("names"), " must be of class ", sQuote("character")) } if (!is.null(ids)) { if (length(ids) != length(names)) { stop("Arguments ", sQuote("ids"), " and ", sQuote("names"), " must be of the same length") } else if (!is.character(ids)) { stop("Argument ", sQuote("ids"), " must be of class ", sQuote("character")) } } if (!assertthat::is.flag(do_approximate_matching)) { stop("Argument ", sQuote("do_approximate_matching"), " must be of class ", sQuote("logical")) } if (!assertthat::is.flag(include_suppressed)) { stop("Argument ", sQuote("include_deprecated"), " must be of class ", sQuote("logical")) } if (!is.null(context_name)){ if(!assertthat::is.string(context_name)) { stop("Argument ", sQuote("context_name"), " must be of class ", sQuote("character")) } context_name <- jsonlite::unbox(context_name) } q <- list(names = names, context_name = context_name, do_approximate_matching = jsonlite::unbox(do_approximate_matching), ids = ids, include_suppressed = jsonlite::unbox(include_suppressed)) toKeep <- sapply(q, is.null) q <- q[!toKeep] res <- otl_POST("tnrs/match_names", body=q, ...) res } ##' @importFrom httr content ## Get OpenTree TNRS contexts .tnrs_contexts <- function(...) { res <- otl_POST("tnrs/contexts", body=list(), ...) res } ## Infer taxonomic context from a set of names .tnrs_infer_context <- function(names=NULL, ...) { if (is.null(names)) { stop("Must supply a \'names\' argument") } else if (!is.character(names)) { stop("Argument \'names\' must be of class \"character\"") } q <- list(names=names) res <- otl_POST("tnrs/infer_context", body=q, ...) res } rotl/R/api-tol.R0000644000177500001440000001125612707503354013456 0ustar deepayanusers##' @importFrom jsonlite unbox ##' @importFrom httr content ##' @importFrom assertthat is.flag ## Summary information about the OpenTree Tree of Life .tol_about <- function(include_source_list=FALSE, ...) { if (!assertthat::is.flag(include_source_list)) { stop("Argument ", sQuote("include_ource_list"), " must be of class ", sQuote("logical")) } q <- list(include_source_list=jsonlite::unbox(include_source_list)) res <- otl_POST(path="tree_of_life/about", body=q, ...) res } ##' @importFrom jsonlite unbox ##' @importFrom httr content ## Get summary information about a node in the OpenTree Tree of Life .tol_node_info <- function(ott_id=NULL, node_id=NULL, include_lineage=FALSE, ...) { if (!is.logical(include_lineage)) { stop("Argument \'include_lineage\' must be of class \"logical\"") } if (is.null(ott_id) && is.null(node_id)) { stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id")) } if (!is.null(ott_id) && !is.null(node_id)) { stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"), ", not both.") } if (!is.null(ott_id)) { ott_id <- check_ott_ids(ott_id) if (length(ott_id) != 1) stop("Please provide a single ", sQuote("ott_id"), call. = FALSE) q <- list(ott_id=jsonlite::unbox(ott_id), include_lineage=jsonlite::unbox(include_lineage)) } else { if (!check_valid_node_id(node_id)) { stop("Argument ", sQuote("node_id"), " must look like \'ott123\' or \'mrcaott123ott456\'.") } q <- list(node_id=jsonlite::unbox(node_id), include_lineage=jsonlite::unbox(include_lineage)) } res <- otl_POST(path="tree_of_life/node_info", body=q, ...) res } ##' @importFrom httr content ## Get the MRCA of a set of nodes .tol_mrca <- function(ott_ids=NULL, node_ids=NULL, ...) { if (is.null(ott_ids) && is.null(node_ids)) { stop("Must provide ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " (or both).") } q <- list() if (!is.null(ott_ids)) { ott_ids <- check_ott_ids(ott_ids) q$ott_ids <- ott_ids } if (!is.null(node_ids)) { check_node_ids(node_ids) q$node_ids <- node_ids } res <- otl_POST(path="tree_of_life/mrca", body=q, ...) res } # ignoring 'include_lineage' for subtree below. arguson only ##' @importFrom jsonlite unbox ##' @importFrom httr content ## Get a subtree from the OpenTree Tree of Life .tol_subtree <- function(ott_id=NULL, node_id=NULL, label_format=NULL, ...) { if (is.null(ott_id) && is.null(node_id)) { stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id")) } if (!is.null(ott_id) && !is.null(node_id)) { stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"), ", not both.") } if (!is.null(ott_id)) { ott_id <- check_ott_ids(ott_id) if (length(ott_id) != 1) stop("Please provide a single ", sQuote("ott_id")) q <- list(ott_id=jsonlite::unbox(ott_id)) } else { if (!check_valid_node_id(node_id)) { stop("Argument ", sQuote("node_id"), " must look like \'ott123\' or \'mrcaott123ott456\'.") } q <- list(node_id=jsonlite::unbox(node_id)) } if (!is.null(label_format)) { if (!check_label_format(label_format)) { stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ", sQuote("id"), ", or ", sQuote("name_and_id")) } q$label_format <- jsonlite::unbox(label_format) } res <- otl_POST(path="tree_of_life/subtree", body=q, ...) res } ##' @importFrom httr content ## Get an induced subtree from the OpenTree Tree of Life from a set of nodes .tol_induced_subtree <- function(ott_ids=NULL, node_ids=NULL, label_format=NULL, ...) { if (is.null(ott_ids) && is.null(node_ids)) { stop("Must provide ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " (or both).") } q <- list() if (!is.null(label_format)) { if (!check_label_format(label_format)) { stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ", sQuote("id"), ", or ", sQuote("name_and_id")) } q$label_format <- jsonlite::unbox(label_format) } if (!is.null(ott_ids)) { ott_ids <- check_ott_ids(ott_ids) q$ott_ids <- ott_ids } if (!is.null(node_ids)) { check_node_ids(node_ids) q$node_ids <- node_ids } if ((length(ott_ids) + length(node_ids)) < 2) { stop("At least two valid ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " must be provided.") } res <- otl_POST("tree_of_life/induced_subtree", body=q, ...) res } rotl/R/match_names.R0000644000177500001440000003500312707472154014367 0ustar deepayanusers## internal function that match the arguments provided to the correct ## row number in the data frame representing the Open Tree Taxonomy ## for a series of matched names. check_args_match_names <- function(response, row_number, taxon_name, ott_id) { orig_order <- attr(response, "original_order") if (is.null(orig_order)) { stop(sQuote(substitute(response)), " was not created using ", sQuote("tnrs_match_names")) } if (missing(row_number) && missing(taxon_name) && missing(ott_id)) { stop("You must specify one of ", sQuote("row_number"), sQuote("taxon_name"), " or ", sQuote("ott_id")) } else if (!missing(row_number) && missing(taxon_name) && missing(ott_id)) { if (!is.numeric(row_number)) stop(sQuote("row_number"), " must be a numeric.") if (!all(row_number %in% orig_order)) { stop(sQuote("row_number"), " is not a valid row number.") } i <- orig_order[row_number] } else if (missing(row_number) && !missing(taxon_name) && missing(ott_id)) { if (!is.character(taxon_name)) stop(sQuote("taxon_name"), " must be a character.") i <- orig_order[match(tolower(taxon_name), response$search_string)] if (any(is.na(i))) stop("Can't find ", taxon_name) } else if (missing(row_number) && missing(taxon_name) && !missing(ott_id)) { if (!check_numeric(ott_id)) stop(sQuote("ott_id"), " must look like a number.") i <- orig_order[match(ott_id, response$ott_id)] if (any(is.na(i))) stop("Can't find ", ott_id) } else { stop("You must use only one of ", sQuote("row_number"), sQuote("taxon_name"), " or ", sQuote("ott_id"), ".") } if (length(i) > 1) stop("You must supply a single element for each argument.") i } match_row_number <- function(response, row_number, taxon_name, ott_id) { ## all the checks on the validity of the arguments are taken care ## by check_args_match_names() if (missing(row_number) && missing(taxon_name) && missing(ott_id)) { stop("You must specify one of ", sQuote("row_number"), " ", sQuote("taxon_name"), " ", sQuote("ott_id")) } else if (!missing(row_number) && (missing(taxon_name) && missing(ott_id))) { i <- row_number } else if (!missing(taxon_name) && (missing(row_number) && missing(ott_id))) { i <- match(tolower(taxon_name), response[["search_string"]]) } else if (!missing(ott_id) && (missing(row_number) && missing(taxon_name))) { i <- match(ott_id, response[["ott_id"]]) } else { stop("You must use only one of ", sQuote("row_number"), " ", sQuote("taxon_name"), " ", sQuote("ott_id")) } if (length(i) > 1) stop("You must supply a single element for each argument.") i } ##' Taxonomic names may have different meanings in different taxonomic ##' contexts, as the same genus name can be applied to animals and ##' plants for instance. Additionally, the meaning of a taxonomic name ##' may have change throughout its history, and may have referred to a ##' different taxon in the past. In such cases, a given names might ##' have multiple matches in the Open Tree Taxonomy. These functions ##' allow users to inspect (and update) alternative meaning of a given ##' name and its current taxonomic status according to the Open Tree ##' Taxonomy. ##' ##' To inspect alternative taxonomic meanings of a given name, you ##' need to provide the object resulting from a call to the ##' tnrs_match_names function, as well as one of either the row number ##' corresponding to the name in this object, the name itself (as used ##' in the original query), or the ott_id listed for this name. ##' ##' To update one of the name, you also need to provide the row number ##' in which the name to be replaced appear or its ott id. ##' ##' @title Inspect and Update alternative matches for a name returned ##' by tnrs_match_names ##' @param response an object generated by the ##' \code{\link{tnrs_match_names}} function ##' @param row_number the row number corresponding to the name to ##' inspect ##' @param taxon_name the taxon name corresponding to the name to ##' inspect ##' @param ott_id the ott id corresponding to the name to inspect ##' @param ... currently ignored ##' @return a data frame ##' @seealso \code{\link{tnrs_match_names}} ##' @examples ##' \dontrun{ ##' matched_names <- tnrs_match_names(c("holothuria", "diadema", "boletus")) ##' inspect(matched_names, taxon_name="diadema") ##' new_matched_names <- update(matched_names, taxon_name="diadema", ##' new_ott_id = 631176) ##' new_matched_names ##' } ##' @export ##' @rdname match_names inspect.match_names <- function(response, row_number, taxon_name, ott_id, ...) { i <- check_args_match_names(response, row_number, taxon_name, ott_id) j <- match_row_number(response, row_number, taxon_name, ott_id) if (attr(response, "has_original_match")[j]) { res <- attr(response, "original_response") summary_match <- build_summary_match(res, res_id = i, match_id = NULL, initial_creation = FALSE) } else { summary_match <- response[j, ] } summary_match } ##' @export ##' @rdname match_names inspect <- function(response, ...) UseMethod("inspect") ##' @param object an object created by \code{\link{tnrs_match_names}} ##' @param new_row_number the row number in the output of ##' \code{\link{inspect}} to replace the taxa specified by ##' \code{row_number}, \code{taxon_name}, or \code{ott_id}. ##' @param new_ott_id the ott id of the taxon to replace the taxa ##' specified by \code{row_number}, \code{taxon_name}, or ##' \code{ott_id}. ##' @export ##' @rdname match_names ##' @importFrom stats update update.match_names <- function(object, row_number, taxon_name, ott_id, new_row_number, new_ott_id, ...) { response <- object i <- check_args_match_names(response, row_number, taxon_name, ott_id) j <- match_row_number(response, row_number, taxon_name, ott_id) res <- attr(response, "original_response") if (!attr(response, "has_original_match")[j]) { warning("There is no match for this name, ", "so there is nothing to replace it with.") return(response) } tmpRes <- res$results[[i]] if (missing(row_number)) { if (!missing(taxon_name)) { rnb <- match(tolower(taxon_name), response$search_string) } else if (!missing(ott_id)) { rnb <- match(ott_id, response$ott_id) } } else { rnb <- row_number } if (missing(new_row_number) && missing(new_ott_id)) { stop("You must specify either ", sQuote("new_row_number"), " or ", sQuote("new_ott_id")) } else if (!missing(new_row_number) && missing(new_ott_id)) { if (! new_row_number %in% seq_len(length(tmpRes$matches))) stop(sQuote("new_row_number"), " is not a valid row number.") j <- new_row_number } else if (missing(new_row_number) && !missing(new_ott_id)) { all_ott_id <- sapply(lapply(tmpRes[["matches"]], function(x) x[["taxon"]]), function(x) .tax_ott_id(x)) j <- match(new_ott_id, all_ott_id) if (any(is.na(j))) stop("Can't find ", new_ott_id) } else { stop("You must use only one of ", sQuote("new_row_number"), " or ", sQuote("new_ott_id")) } if (length(j) > 1) stop("You must supply a single element for each argument") summ_match <- summary_row_factory(res, res_id = i, match_id = j) response[rnb, ] <- summ_match attr(response, "match_id")[rnb] <- j response } ## Access the elements for a given match: ## is_synonym, score, nomenclature_code, is_approximate_match, taxon get_list_element <- function(response, i, list_name) { list_content <- lapply(response[["results"]][[i]][["matches"]], function(x) { x[[list_name]] }) list_content } match_names_method_factory <- function(list_name) { function(tax, row_number, taxon_name, ott_id, ...) { response <- tax res <- attr(response, "original_response") no_args <- all(c(missing(row_number), missing(taxon_name), missing(ott_id))) if (no_args) { res_i <- attr(response, "original_order")[attr(response, "has_original_match")] ret <- lapply(res_i, function(i) { get_list_element(res, i, list_name) }) names(ret) <- sapply(res_i, function(i) { get_list_element(res, i, "matched_name")[[1]] }) ## ret is already in the correct order so we can use a sequence ## to extract the correct element ret <- mapply(function(x, i) { ret[[x]][i] }, seq_along(ret), attr(response, "match_id")[attr(response, "has_original_match")]) if (all(sapply(ret, length) == 1)) { ret <- unlist(ret, use.names = TRUE) } } else { i <- check_args_match_names(response, row_number, taxon_name, ott_id) j <- match_row_number(response, row_number, taxon_name, ott_id) if (attr(response, "has_original_match")[j]) { ret <- get_list_element(res, i, list_name)[attr(response, "match_id")[j]] } else { ret <- list(ott_id = NA_character_, name = response[["search_string"]][j], unique_name = NA_character_, rank = NA_character_, tax_sources = NA_character_, flags = NA_character_, synonyms = NA_character_, is_suppressed = NA_character_) ret <- list(ret) } } ret } } match_names_taxon_method_factory <- function(.f) { function(tax, row_number, taxon_name, ott_id, ...) { extract_tax_list <- match_names_method_factory("taxon") tax_info <- extract_tax_list(tax, row_number = row_number, taxon_name = taxon_name, ott_id = ott_id) res <- lapply(tax_info, function(x) .f(x)) names(res) <- vapply(tax_info, function(x) .tax_unique_name(x), character(1)) res <- add_otl_class(res, .f) res } } ##' \code{rotl} provides a collection of functions that allows users ##' to extract relevant information from an object generated by ##' \code{\link{tnrs_match_names}} function. ##' ##' These methods optionally accept one of the arguments ##' \code{row_number}, \code{taxon_name} or \code{ott_id} to retrieve ##' the corresponding information for one of the matches in the object ##' returned by the \code{\link{tnrs_match_names}} function. ##' ##' If these arguments are not provided, these methods can return ##' information for the matches currently listed in the object ##' returned by \code{\link{tnrs_match_names}}. ##' ##' @title \code{ott_id} and \code{flags} for taxonomic names matched ##' by \code{tnrs_match_names} ##' @param tax an object returned by \code{\link{tnrs_match_names}} ##' @param row_number the row number corresponding to the name for ##' which to list the synonyms ##' @param taxon_name the taxon name corresponding to the name for ##' which to list the synonyms ##' @param ott_id the ott id corresponding to the name for which to ##' list the synonyms ##' @param ... currently ignored ##' @return A list of the ott ids or flags for the taxonomic names ##' matched with \code{\link{tnrs_match_names}}, for either one or ##' all the names. ##' @examples ##' \dontrun{ ##' rsp <- tnrs_match_names(c("Diadema", "Tyrannosaurus")) ##' rsp$ott_id # ott id for match currently in use ##' ott_id(rsp) # similar as above but elements are named ##' ##' ## flags() is useful for instance to determine if a taxon is extinct ##' flags(rsp, taxon_name="Tyrannosaurus") ##' } ##' @export ##' @rdname match_names-methods ott_id.match_names <- match_names_taxon_method_factory(.tax_ott_id) ##' @export ##' @rdname match_names-methods flags.match_names <- match_names_taxon_method_factory(.tax_flags) ##' When querying the Taxonomic Name Resolution Services for a ##' particular taxonomic name, the API returns as possible matches all ##' names that include the queried name as a possible synonym. This ##' function allows you to explore other synonyms for an accepted ##' name, and allows you to determine why the name you queried is ##' returning an accepted synonym. ##' ##' To list synonyms for a given taxonomic name, you need to provide ##' the object resulting from a call to the ##' \code{\link{tnrs_match_names}} function, as well as one of either ##' the row number corresponding to the name in this object, the name ##' itself (as used in the original query), or the ott_id listed for ##' this name. Otherwise, the synonyms for all the currently matched ##' names are returned. ##' ##' @title List the synonyms for a given name ##' @param tax a data frame generated by the ##' \code{\link{tnrs_match_names}} function ##' @param row_number the row number corresponding to the name for ##' which to list the synonyms ##' @param taxon_name the taxon name corresponding to the name for ##' which to list the synonyms ##' @param ott_id the ott id corresponding to the name for which to ##' list the synonyms ##' @param ... currently ignored ##' @return a list whose elements are all synomym names (as vectors of ##' character) for the taxonomic names that match the query (the ##' names of the elements of the list). ##' @examples ##' \dontrun{ ##' echino <- tnrs_match_names(c("Diadema", "Acanthaster", "Fromia")) ##' ## These 3 calls are identical ##' synonyms(echino, taxon_name="Acanthaster") ##' synonyms(echino, row_number=2) ##' synonyms(echino, ott_id=337928) ##' } ##' @export synonyms.match_names <- match_names_taxon_method_factory(.tax_synonyms) ##' @export tax_sources.match_names <- match_names_taxon_method_factory(.tax_sources) ##' @export tax_rank.match_names <- match_names_taxon_method_factory(.tax_rank) ##' @export is_suppressed.match_names <- match_names_taxon_method_factory(.tax_is_suppressed) ##' @export unique_name.match_names <- match_names_taxon_method_factory(.tax_unique_name) ##' @export tax_name.match_names <- match_names_taxon_method_factory(.tax_name) rotl/R/studies.R0000644000177500001440000005264113003750634013567 0ustar deepayanusers##' Return the list of study properties that can be used to search ##' studies and trees used in the synthetic tree. ##' ##' The list returned has 2 elements \code{tree_properties} and ##' \code{studies_properties}. Each of these elements lists additional ##' arguments to customize the API request properties that can be used ##' to search for trees and studies that are contributing to the ##' synthetic tree. The definitions of these properties are available ##' from ##' \url{https://github.com/OpenTreeOfLife/phylesystem-api/wiki/NexSON} ##' ##' @title Properties of the Studies ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return A list of the study properties that can be used to find ##' studies and trees that are contributing to the synthetic tree. ##' @seealso \code{\link{studies_find_trees}} ##' @export ##' @examples ##' \dontrun{ ##' all_the_properties <- studies_properties() ##' unlist(all_the_properties$tree_properties) ##' } studies_properties <- function(...) { res <- .studies_properties(...) lapply(res, unlist) } ##' Return the identifiers of studies that match given properties ##' ##' @title Find a Study ##' @param exact Should exact matching be used? (logical, default ##' \code{FALSE}) ##' @param property The property to be searched on (character) ##' @param value The property value to be searched on (character) ##' @param detailed If \code{TRUE} (default), the function will return ##' a data frame that summarizes information about the study (see ##' \sQuote{Value}). Otherwise, it only returns the study ##' identifiers. ##' @param verbose Should the output include all metadata (logical ##' default \code{FALSE}) ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return If \code{detailed=TRUE}, the function returns a data frame ##' listing the study id (\code{study_ids}), the number of trees ##' associated with this study (\code{n_trees}), the tree ids (at ##' most 5) associated with the studies (\code{tree_ids}), the ##' tree id that is a candidate for the synthetic tree if any ##' (\code{candidate}), the year of publication of the study ##' (\code{study_year}), the title of the publication for the ##' study (\code{title}), and the DOI (Digital Object Identifier) ##' for the study (\code{study_doi}). ##' ##' If \code{detailed=FALSE}, the function returns a data frame ##' with a single column containing the study identifiers. ##' @seealso \code{\link{studies_properties}} which lists properties ##' against which the studies can be ##' searched. \code{\link{list_trees}} that returns a list for all ##' tree ids associated with a study. ##' @export ##' @examples ##' \dontrun{ ##' ## To match a study for which the identifier is already known ##' one_study <- studies_find_studies(property="ot:studyId", value="pg_719") ##' list_trees(one_study) ##' ##' ## To find studies pertaining to Mammals ##' mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName", ##' value="mammalia") ##' ## To extract the tree identifiers for each of the studies ##' list_trees(mammals) ##' ## ... or for a given study ##' list_trees(mammals, "ot_308") ##' ##' ## Just the identifiers without other information about the studies ##' mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName", ##' value="mammalia", detailed=FALSE) ##' } studies_find_studies <- function(property=NULL, value=NULL, verbose=FALSE, exact=FALSE, detailed = TRUE, ...) { .res <- .studies_find_studies(property = property, value = value, verbose = verbose, exact = exact, ...) res <- vapply(.res[["matched_studies"]], function(x) x[["ot:studyId"]], character(1)) if (detailed) { dat <- summarize_meta(res) } else { meta_raw <- .res dat <- data.frame(study_ids = res, stringsAsFactors = FALSE) attr(dat, "found_trees") <- paste("If you want to get a list of the", "trees associated with the studies,", "use", sQuote("detailed = TRUE")) class(dat) <- c("study_ids", class(dat)) attr(dat, "metadata") <- meta_raw } class(dat) <- c("matched_studies", class(dat)) dat } ##' @export print.study_ids <- function(x, ...) { print(format(x), ...) } ##' Return a list of studies for which trees match a given set of ##' properties ##' ##' The list of possible values to be used as values for the argument ##' \code{property} can be found using the function ##' \code{\link{studies_properties}}. ##' ##' @title Find Trees ##' @param property The property to be searched on (character) ##' @param value The property-value to be searched on (character) ##' @param verbose Should the output include all metadata? (logical, ##' default \code{FALSE}) ##' @param exact Should exact matching be used for the value? ##' (logical, default \code{FALSE}) ##' @param detailed Should a detailed report be provided? If ##' \code{TRUE} (default), the output will include metadata about ##' the study that include trees matching the property. Otherwise, ##' only information about the trees will be provided. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return A data frame that summarizes the trees found (and their ##' associated studies) for the requested criteria. If a study has ##' more than 5 trees, the \code{tree_ids} of the first ones will ##' be shown, followed by \code{...} to indicate that more are ##' present. ##' ##' If \code{detailed=FALSE}, the data frame will include the ##' study ids of the study (\code{study_ids}), the number of trees ##' in this study that match the search criteria ##' (\code{n_matched_trees}), the tree ids that match the search ##' criteria (\code{match_tree_ids}). ##' ##' If \code{detailed=TRUE}, in addition of the fields listed ##' above, the data frame will also contain the total number of ##' trees associated with the study (\code{n_trees}), all the tree ##' ids associated with the study (\code{tree_ids}), the tree id ##' that is a potential candidate for inclusion in the synthetic ##' tree (if any) (\code{candidate}), the year the study was ##' published (\code{study_year}), the title of the study ##' (\code{title}), the DOI for the study (\code{study_doi}). ##' ##' @seealso \code{\link{studies_properties}} which lists properties ##' the studies can be searched on. \code{\link{list_trees}} for ##' listing the trees that match the query. ##' @export ##' @importFrom stats setNames ##' @examples ##' \dontrun{ ##' res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia", ##' detailed=FALSE) ##' ## summary of the trees and associated studies that match this criterion ##' res ##' ## With metadata about the studies (default) ##' res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia", ##' detailed=TRUE) ##' ## The list of trees for each study that match the search criteria ##' list_trees(res) ##' ## the trees for a given study ##' list_trees(res, study_id = "pg_2769") ##' } studies_find_trees <- function(property=NULL, value=NULL, verbose=FALSE, exact=FALSE, detailed = TRUE, ...) { .res <- .studies_find_trees(property = property, value = value, verbose = verbose, exact = exact, ...) study_ids <- vapply(.res[["matched_studies"]], function(x) x[["ot:studyId"]], character(1)) n_matched_trees <- vapply(.res[["matched_studies"]], function(x) length(x[["matched_trees"]]), numeric(1)) match_tree_ids <- lapply(.res[["matched_studies"]], function(x) { sapply(x[["matched_trees"]], function(y) y[["nexson_id"]]) }) # this one doesn't return all of the treeids. confusing, bc trees are what is wanted #tree_str <- vapply(match_tree_ids, limit_trees, character(1)) tree_str <- sapply(match_tree_ids, function(x) paste(x, collapse = ", ")) res <- data.frame(study_ids, n_matched_trees, match_tree_ids = tree_str, stringsAsFactors = FALSE) if (detailed) { meta <- summarize_meta(study_ids) # the next bit seems really slow (JWB) res <- merge(meta, res) attr(res, "metadata") <- attr(meta, "metadata") } else { attr(res, "metadata") <- .res } attr(res, "found_trees") <- stats::setNames(match_tree_ids, study_ids) class(res) <- c("matched_studies", class(res)) res } ##' Returns the trees associated with a given study ##' ##' If \code{file_format} is missing, the function returns an object ##' of the class \code{phylo} from the \code{\link[ape]{ape}} package ##' (default), or an object of the class \code{nexml} from the ##' \code{RNeXML} package. ##' ##' Otherwise \code{file_format} can be either \code{newick}, ##' \code{nexus}, \code{nexml} or \code{json}, and the function will ##' generate a file of the selected format. In this case, a file name ##' needs to be provided using the argument \code{file}. If a file ##' with the same name already exists, it will be silently ##' overwritten. ##' ##' @title Get all the trees associated with a particular study ##' @param study_id the study ID for the study of interest (character) ##' @param object_format the class of the object the query should ##' return (either \code{phylo} or \code{nexml}). Ignored if ##' \code{file_format} is specified. ##' @param file_format the format of the file to be generated ##' (\code{newick}, \code{nexus}, \code{nexml} or \code{json}). ##' @param file the file name where the output of the function will be ##' saved. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return if \code{file_format} is missing, an object of class ##' \code{phylo} or \code{nexml}, otherwise a logical indicating ##' whether the file was successfully created. ##' @seealso \code{\link{get_study_meta}} ##' @export ##' @importFrom jsonlite toJSON ##' @examples ##' \dontrun{ ##' that_one_study <- get_study(study_id="pg_719", object_format="phylo") ##' if (require(RNeXML)) { ## if RNeXML is installed get the object directly ##' nexml_study <- get_study(study_id="pg_719", object_format="nexml") ##' } else { ## otherwise write it to a file ##' get_study(study_id="pg_719", file_format="nexml", file=tempfile(fileext=".nexml")) ##' } ##' } get_study <- function(study_id = NULL, object_format = c("phylo", "nexml"), file_format, file, ...) { object_format <- match.arg(object_format) if (!missing(file)) { if (!missing(file_format)) { file_format <- match.arg(file_format, c("newick", "nexus", "nexml", "json")) res <- .get_study(study_id, format = file_format) unlink(file) if (identical(file_format, "json")) { cat(jsonlite::toJSON(res), file=file) } else { cat(res, file=file) } return(invisible(file.exists(file))) } else { stop(sQuote("file_format"), " must be specified.") } } else if (identical(object_format, "phylo")) { file_format <- "newick" res <- .get_study(study_id = study_id, format=file_format, ...) res <- phylo_from_otl(res) } else if (identical(object_format, "nexml")) { file_format <- "nexml" res <- .get_study(study_id = study_id, format = file_format, ...) res <- nexml_from_otl(res) } else stop("Something is very wrong. Contact us.") res } ##' Returns a specific tree from within a study ##' ##' @title Study Tree ##' @param study_id the identifier of a study (character) ##' @param tree_id the identifier of a tree within the study ##' @param object_format the class of the object to be returned ##' (default and currently only possible value \code{phylo} from ##' the \code{\link[ape]{ape}} package). ##' @param tip_label the format of the tip ##' labels. \dQuote{\code{original_label}} (default) returns the ##' original labels as provided in the study, ##' \dQuote{\code{ott_id}} labels are replaced by their ott IDs, ##' \dQuote{\code{ott_taxon_name}} labels are replaced by their ##' Open Tree Taxonomy taxon name. ##' @param file_format the format of the file to be generated ##' (\code{newick} default, \code{nexus}, or \code{json}). ##' @param file the file name where the output of the function will be ##' saved. ##' @param deduplicate logical (default \code{TRUE}). If the tree ##' returned by the study contains duplicated taxon names, should they ##' be made unique? It is normally illegal for NEXUS/Newick tree ##' strings to contain duplicated tip names. This is a workaround to ##' circumvent this requirement. If \code{TRUE}, duplicated tip labels ##' will be appended \code{_1}, \code{_2}, etc. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @return if \code{file_format} is missing, an object of class ##' \code{phylo}, otherwise a logical indicating whether the file ##' was successfully created. ##' @export ##' @importFrom jsonlite toJSON ##' @examples ##' \dontrun{ ##' tree <- get_study_tree(study_id="pg_1144", tree_id="tree2324") ##' ##' ## comparison of the first few tip labels depending on the options used ##' head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="original_label")$tip.label) ##' head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_id")$tip.label) ##' head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_taxon_name")$tip.label) ##' } get_study_tree <- function(study_id = NULL, tree_id = NULL, object_format = c("phylo"), tip_label = c("original_label", "ott_id", "ott_taxon_name"), file_format, file, deduplicate = TRUE, ...) { object_format <- match.arg(object_format) tip_label <- match.arg(tip_label) tip_label <- switch(tip_label, original_labels = "ot:originallabel", ott_id = "ot:ottid", ott_taxon_name = "ot:otttaxonname") if (!missing(file)) { if (!missing(file_format)) { file_format <- match.arg(file_format, c("newick", "nexus", "json")) if (missing(file)) stop("You must specify a file to write your output") res <- .get_study_tree(study_id = study_id, tree_id = tree_id, format=file_format, tip_label = tip_label, ...) unlink(file) if (identical(file_format, "json")) { cat(jsonlite::toJSON(res), file=file) } else { cat(res, file=file) } return(invisible(file.exists(file))) } else { stop(sQuote("file_format"), " must be specified.") } } else if (identical(object_format, "phylo")) { file_format <- "newick" res <- .get_study_tree(study_id = study_id, tree_id = tree_id, format=file_format, tip_label = tip_label, ...) res <- phylo_from_otl(res, dedup = deduplicate) } else stop("Something is very wrong. Contact us.") res } ##' Retrieve metadata about a study in the Open Tree of Life datastore. ##' ##' \code{get_study_meta} returns a long list of attributes for the ##' studies that are contributing to the synthetic tree. To help with ##' the extraction of relevant information from this list, several ##' helper functions exists: \itemize{ ##' ##' \item {get_tree_ids} { The identifiers of the trees ##' associated with the study } ##' ##' \item {get_publication} { The citation information of the ##' publication for the study. The DOI (or URL) for the study is ##' available as an attribute to the returned object (i.e., ##' \code{attr(object, "DOI")} ) }. ##' ##' \item {candidate_for_synth} { The identifier of the tree(s) from ##' the study used in the synthetic tree. This is a subset of the ##' result of \code{get_tree_ids}. ##' ##' \item {get_study_year} { The year of publication of the study. } ##' ##' } ##' } ##' ##' @title Study Metadata ##' @param study_id the study identifier (character) ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @param sm an object created by \code{get_study_meta} ##' @return named-list containing the metadata associated with the ##' study requested ##' @export ##' @examples ##' \dontrun{ ##' req <- get_study_meta("pg_719") ##' get_tree_ids(req) ##' candidate_for_synth(req) ##' get_publication(req) ##' get_study_year(req) ##' } get_study_meta <- function(study_id, ...) { res <- .get_study_meta(study_id = study_id, ...) class(res) <- "study_meta" attr(res, "study_id") <- study_id res } ##' @export print.study_meta <- function(x, ...) { cat("Metadata for OToL study ", attr(x, "study_id"), ". Contents:\n", sep="") cat(paste0(" $nexml$", names(x$nexml)), sep="\n") } ##' Retrieve subtree from a specific tree in the Open Tree of Life data store ##' ##' @title Study Subtree ##' @param study_id the study identifier (character) ##' @param tree_id the tree identifier (character) ##' @param object_format the class of the object returned by the ##' function (default, and currently only possibility \code{phylo} ##' from the \code{\link[ape]{ape}} package) ##' @param tip_label the format of the tip ##' labels. \dQuote{\code{original_label}} (default) returns the ##' original labels as provided in the study, ##' \dQuote{\code{ott_id}} labels are replaced by their ott IDs, ##' \dQuote{\code{ott_taxon_name}} labels are replaced by their ##' Open Tree Taxonomy taxon name. ##' @param file_format character, the file format to use to save the ##' results of the query (possible values, \sQuote{newick} or ##' \sQuote{nexus}). ##' @param file character, the path and file name where the output ##' should be written. ##' @param deduplicate logical (default \code{TRUE}). If the tree ##' returned by the study contains duplicated taxon names, should ##' they be made unique? It is normally illegal for NEXUS/Newick ##' tree strings to contain duplicated tip names. This is a ##' workaround to circumvent this requirement. If \code{TRUE}, ##' duplicated tip labels will be appended \code{_1}, \code{_2}, ##' etc. ##' @param subtree_id, either a node id that specifies a subtree or ##' \dQuote{ingroup} which returns the ingroup for this subtree. ##' @param ... additional arguments to customize the API request (see ##' \code{\link{rotl}} package documentation). ##' @export ##' @examples ##' \dontrun{ ##' small_tr <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="node991044") ##' ingroup <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup") ##' nexus_file <- tempfile(fileext=".nex") ##' get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup", file=nexus_file, ##' file_format="nexus") ##' } get_study_subtree <- function(study_id, tree_id, subtree_id, object_format=c("phylo"), tip_label = c("original_label", "ott_id", "ott_taxon_name"), file_format, file, deduplicate = TRUE, ...) { object_format <- match.arg(object_format) tip_label <- match.arg(tip_label) tip_label <- switch(tip_label, original_labels = "ot:originallabel", ott_id = "ot:ottid", ott_taxon_name = "ot:otttaxonname") if (!missing(file)) { if (!missing(file_format)) { if (missing(file)) stop("You must specify a file to write your output") file_format <- match.arg(file_format, c("newick", "nexus")) res <- .get_study_subtree(study_id = study_id, tree_id = tree_id, subtree_id = subtree_id, format=file_format, tip_label = tip_label, ...) unlink(file) cat(res, file=file) return(invisible(file.exists(file))) } else { stop(sQuote("file_format"), " must be specified.") } } else if (identical(object_format, "phylo")) { file_format <- "newick" res <- .get_study_subtree(study_id = study_id, tree_id = tree_id, subtree_id = subtree_id, format=file_format, tip_label = tip_label, ...) res <- phylo_from_otl(res, dedup = deduplicate) ## NeXML should be possible for both object_format and file_format but it seems there ## is something wrong with the server at this time (FM - 2015-06-07) ## } else if (identical(object_format, "nexml")) { ## file_format <- "nexml" ## res <- .get_study_subtree(study_id, tree_id, subtree_id, format=file_format) ## res <- nexml_from_otl(res) } else stop("Something is very wrong. Contact us.") res } rotl/R/tree_to_labels.R0000644000177500001440000000301412567643646015102 0ustar deepayanusers## Function to extract tip and edge labels from newick formatted strings ## useful when the tree is too small to be read in by ape/rncl. ## tr needs to be a newick formatted tree string ## - missing tips are removed (OK for OTL as it won't happen) tree_to_labels <- function(tr, remove_quotes = TRUE) { n_right <- unlist(gregexpr("\\)", tr)) n_left <- unlist(gregexpr("\\(", tr)) if (n_right[1] == -1) n_right <- 0 else n_right <- length(n_right) if (n_left[1] == -1) n_left <- 0 else n_left <- length(n_left) if (!identical(n_right, n_left)) { stop("invalid newick string, numbers of ( and ) don't match") } ## remove white spaces tr <- gsub("\\s+", "", tr) ## remove branch lengths tr <- gsub(":[0-9]+(\\.[0-9]+)?", "", tr) ## TODO?: remove comments if (n_right < 1) { ## if only 1 tip tip_lbl <- gsub(";$", "", tr) edge_lbl <- character(0) } else { ## extract edge labels edge_lbl <- unlist(strsplit(tr, ")")) edge_lbl <- grep("^[^\\(]", edge_lbl, value = T) edge_lbl <- gsub("(,|;).*$", "", edge_lbl) edge_lbl <- edge_lbl[nzchar(edge_lbl)] ## extract tips tip_lbl <- unlist(strsplit(tr, ",")) tip_lbl <- gsub("^\\(*", "", tip_lbl) tip_lbl <- gsub("\\).*$", "", tip_lbl) tip_lbl <- tip_lbl[nzchar(tip_lbl)] } if (remove_quotes) { tip_lbl <- gsub("^(\\\"|\\\')(.+)(\\\'|\\\")$", "\\2", tip_lbl) } list(tip_label = tip_lbl, edge_label = edge_lbl) } rotl/R/base.R0000644000177500001440000001321312707501110013002 0ustar deepayanusersotl_url <- function(dev=FALSE) { if (dev) { "https://devapi.opentreeoflife.org" } else { "https://api.opentreeoflife.org" } } otl_version <- function(version) { if (missing(version)) { return("v3") } else { return(version) } } # Take a request object and return list (if JSON) or plain text (if another # type) ##' @importFrom httr content ##' @importFrom jsonlite fromJSON otl_parse <- function(req) { if (grepl("application/json", req[["headers"]][["content-type"]]) ){ return(jsonlite::fromJSON(httr::content(req, "text", encoding = "UTF-8"), simplifyVector = FALSE)) } txt <- httr::content(req, as="text", encoding = "UTF-8") if(identical(txt, "")){ stop("No output to parse; check your query.", call. = FALSE) } txt } otl_check_error <- function(cont) { if (is.list(cont)) { if (exists("description", cont)) { if (exists("Error", cont$description)) { stop(paste("Error: ", cont$description$error, "\n", sep = "")) } else if (exists("message", cont)) { stop(paste("Message: ", cont$descrption$message, "\n", sep = "")) } } } } ## Check and parse result of query otl_check <- function(req) { if (!req$status_code < 400) { msg <- otl_parse(req) stop("HTTP failure: ", req$status_code, "\n", msg, call. = FALSE) } desc <- otl_parse(req) otl_check_error(desc) desc } ##' @importFrom httr GET otl_GET <- function(path, url = otl_url(...), otl_v = otl_version(...), ...) { req <- httr::GET(url, path=paste(otl_v, path, sep="/"), ...) otl_check(req) } ##' @importFrom jsonlite toJSON ##' @importFrom httr POST otl_POST <- function(path, body, url = otl_url(...), otl_v = otl_version(...), ...) { stopifnot(is.list(body)) body_json <- ifelse(length(body), jsonlite::toJSON(body), "") req <- httr::POST(url, path=paste(otl_v, path, sep="/"), body=body_json, ...) otl_check(req) } otl_formats <- function(format) { switch(tolower(format), "nexus" = ".nex", "newick" = ".tre", "nexml" = ".nexml", "json" = ".json", "") #fall through is no extension = nex(j)son } ## Strip all characters except the ottId from a OpenTree label (internal or terminal) otl_ottid_from_label <- function(label) { return(as.numeric(gsub("(.+[ _]ott)([0-9]+)", "\\2", label))); } ##' @importFrom rncl read_newick_phylo phylo_from_otl <- function(res, dedup = FALSE) { if (is.list(res)) { if (!is.null(res$newick)) { tree <- res$newick } else if (!is.null(res$subtree)) { tree <- res$subtree } else { stop("Cannot find tree") } } else if (is.character(res)) { tree <- res } else stop("I don't know how to deal with this format.") if (grepl("\\(", tree)) { fnm <- tempfile() cat(tree, file = fnm) if (!dedup) { phy <- rncl::read_newick_phylo(fnm) } else { dedup_tr <- deduplicate_labels(fnm) phy <- rncl::read_newick_phylo(dedup_tr) unlink(dedup_tr) } unlink(fnm) } else { phy <- tree_to_labels(tree)$tip_label } return(phy) } nexml_from_otl <- function(res) { if (!requireNamespace("RNeXML", quietly = TRUE)) { stop("The RNeXML package is needed to use the nexml file format") } fnm <- tempfile() cat(res, file=fnm) phy <- RNeXML::nexml_read(x=fnm) unlink(fnm) phy } ## check if the argument provided looks like a number (can be coerced ## to integer/numeric). check_numeric <- function(x) { if (is.null(x)) { return(FALSE) } if (length(x) != 1) { stop("only 1 element should be provided") } if (!is.numeric(x)) { x <- as.character(x) if (any(is.na(x))) return(FALSE) return(grepl("^[0-9]+$", x)) } else { return(x %% 1 == 0) } } ## Check that ott_ids are not NULL, not NAs and look like numbers check_ott_ids <- function(ott_ids) { if (!is.null(ott_ids)) { if (inherits(ott_ids, "otl_ott_id")) { ## convert objects returned by ott_id method to a vector ott_ids <- unlist(ott_ids) } if (any(is.na(ott_ids))) { stop("NAs are not allowed") } if (!all(sapply(ott_ids, check_numeric))) { stop(sQuote("ott_ids"), " must look like numbers.") } } else { stop("You must supply some OTT ids.") } ott_ids } ## all nodes have a node_id (character, e.g. "ott12345" or "mrcaott123ott456") check_valid_node_id <- function(x) { if (length(x) != 1) { stop("only 1 element should be provided") } if (!is.character(x)) { return(FALSE) } if (grepl('^mrcaott\\d+ott\\d+', x) || grepl('^ott\\d+', x)) { return(TRUE) } else { return(FALSE) } } check_node_ids <- function(node_ids) { if (!is.null(node_ids)) { if (!is.character(node_ids)) { stop("Argument ", sQuote("node_ids"), " must be of type character.") } if (any(is.na(node_ids))) { stop("NAs are not allowed") } if (!all(sapply(node_ids, check_valid_node_id))) { stop(sQuote("node_ids"), " must look like \'ott123\' or \'mrcaott123ott456\'.") } } } # node labels for tree_of_life subtree and induced_subtree # might also be useful for taxonomy queries check_label_format <- function (x) { if (x %in% c("name", "id", "name_and_id")) { return(TRUE) } else { return(FALSE) } } rotl/R/deduplicate_labels.R0000644000177500001440000000267013003747740015715 0ustar deepayanusers## Create a vector (character) that contains the NEWICK tree strings ## found in a file parse_newick <- function(file) { trs <- readLines(file, warn = FALSE) trs <- strsplit(trs, split = ";") trs <- sapply(trs, function(x) gsub("^\\s+|\\s+$", "", x)) trs <- unlist(trs) trs <- gsub("\\s", "_", trs) trs <- trs[nchar(trs) > 0] trs } ## Internal function to be used by `deduplicate_labels` that: ## 1. identify duplicated labels ## 2. make them unique ## 3. replace the duplicated labels by their unique counterparts dedup_lbl <- function(tr_str) { tr_lbl <- tree_to_labels(tr_str, remove_quotes = TRUE)$tip_label tr_lbl_unq <- make.unique(tr_lbl, sep = "_") if (!identical(tr_lbl, tr_lbl_unq)) { for (i in seq_along(tr_lbl)) { tr_str <- sub(paste0("([\\(|,]\\'?)\\Q", tr_lbl[i], "\\E(\\'?[:|\\)|,])"), paste0("\\1", tr_lbl_unq[i], "\\2"), tr_str) } warning("Some tip labels were duplicated and have been modified: ", paste(tr_lbl[duplicated(tr_lbl)], collapse = ", "), call. = FALSE) } paste0(tr_str, ";") } ## Main function: takes a file with potentially duplicated tip labels ## and reate a new file with unique labels deduplicate_labels <- function(file) { tr_strs <- parse_newick(file) tr_dedup <- sapply(tr_strs, dedup_lbl) tmp_tr <- tempfile() cat(tr_dedup, file = tmp_tr, sep = "\n") tmp_tr } rotl/R/external_data.R0000644000177500001440000001164612706240532014722 0ustar deepayanusers##' Get external identifiers for data associated with an Open Tree study ##' ##' Data associated with studies contributing to the Open Tree synthesis may ##' be available from other databases. In particular, trees and alignments ##' may be available from treebase and DNA sequences and bibliographic ##' information associated with a given study may be available from the NCBI. ##' This function retrieves that information for a given study. ##' ##' @param study_id An open tree study ID ##' @return A study_external_data object (which inherits from a list) which ##' contains some of the following. ##' @return doi, character, the DOI for the paper describing this study ##' @return external_data_url, character, a URL to an external data repository ##' (e.g. a treebase entry) if one exists. ##' @return pubmed_id character, the unique ID for this study in the NCBI's pubmed database ##' @return popset_ids character, vector of IDs for the NCBI's popset database ##' @return nucleotide_ids character, vector of IDs for the NCBI's nucleotide database ##' @seealso studies_find_studies (used to discover study IDs) ##' @importFrom httr parse_url ##' @importFrom rentrez entrez_search ##' @importFrom rentrez entrez_link ##' @examples ##' \dontrun{ ##' flies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Drosophilidae") ##' study_external_IDs(flies[2,]$study_ids) ##' } ##' @export study_external_IDs <- function(study_id){ meta <- get_study_meta(study_id) data_deposit <- meta[["nexml"]][["^ot:dataDeposit"]][["@href"]] url <- attr(get_publication(meta), "DOI") doi <- parse_url(url)$path pmid <- get_pmid(doi, study_id) res <- list( doi = doi, pubmed_id = pmid, external_data_url = data_deposit) if(!is.null(pmid)){ res$popset_ids <- entrez_link(dbfrom="pubmed", db="popset", id=pmid)[["links"]][["pubmed_popset"]] res$nucleotide_ids <- entrez_link(dbfrom="pubmed", db="nuccore", id=pmid)[["links"]][["pubmed_nuccore"]] } structure(res, class=c("study_external_data", "list"), id=study_id) } ##' Get external identifiers for data associated with an Open Tree taxon ##' ##' The Open Tree taxonomy is a synthesis of multiple reference taxonomies. This ##' function retrieves identifiers to external taxonomic records that have ##' contributed the rank, position and definition of a given Open Tree taxon. ##' ##' @param taxon_id An open tree study ID ##' @return a data.frame in which each row represents a unique record in an ##' external databse. The column "source" provides and abbreviated name for the ##' database, and "id" the unique ID for the record. ##' @seealso tnrs_matchnames, which can be used to search for taxa by name. ##' @seealso taxonomy_taxon, for more information about a given taxon. ##' @examples ##' \dontrun{ ##' gibbon_IDs <- taxon_external_IDs(712902) ##' } ##' @export taxon_external_IDs <- function(taxon_id){ taxon_info <- taxonomy_taxon_info(taxon_id) srcs <- taxon_info[[1]][["tax_sources"]] res <- do.call(rbind.data.frame, strsplit(unlist(srcs), ":")) names(res) <- c("source", "id") res } #'@export print.study_external_data <- function(x, ...){ cat("External data identifiers for study", attr(x, "study_id"), "\n") cat(" $doi: ", x[["doi"]], "\n") if(!is.null(x$pubmed_id)){ cat(" $pubmed_id: ", x[["pubmed_id"]], "\n") } if(!is.null(x$popset_ids)){ cat(" $popset_ids: vector of", length(x[["popset_ids"]]), "IDs \n") } if(!is.null(x$nucleotide_ids)){ cat(" $nucleotide_ids: vector of", length(x[["nucleotide_ids"]]), "IDs\n") } if(nchar(x[["external_data_url"]]) > 0){ cat(" $external_data_url", x[["external_data_url"]], "\n") } cat("\n") } ##Maybe include these functions to get summary information about a ## set of linked sequences? #summarize_nucleotide_data <- function(id_vector){ # summs <- entrez_summary(db="nuccore", id=id_vector) # interesting <- extract_from_esummary(summs, c("uid", "title", "slen", "organism", "completeness"), simplify=FALSE) # do.call(rbind.data.frame, interesting) #} # #summarize_popset_data <- function(id_vector){ # summs <- entrez_summary(db="popset", id=id_vector) # interesting <- extract_from_esummary(summs, c("uid", "title"), simplify=FALSE) # do.call(rbind.data.frame, interesting) #} # #Un-exported function to convert doi->pmid. Also takes study_id as an argument in #order to provide a helpful error message when 0 or >1 pmids are returned. get_pmid <- function(doi, study_id){ pubmed_search <- entrez_search(db="pubmed", term=paste0(doi, "[DOI]")) if(length(pubmed_search$ids) == 0){ warning("Could not find PMID for study'", study_id, "', skipping NCBI data") return(NULL) } if(length(pubmed_search$ids) > 1){ warning("Found more than one PMID matching study'", study_id, "', skipping NCBI data") return(NULL) } pubmed_search$ids } rotl/R/tol.R0000644000177500001440000006162513056043307012707 0ustar deepayanusers .source_list <- function(tax, ...) { if (! exists("source_id_map", tax)) { ## it should only be missing with tol_about when using ## include_source_list=FALSE stop("Make sure that your object has been created using ", sQuote("tol_about(include_source_list = TRUE)")) } tt <- lapply(tax[["source_id_map"]], function(x) { c(x[["study_id"]], x[["tree_id"]], x[["git_sha"]]) }) tt <- do.call("rbind", tt) setNames(as.data.frame(tt, stringsAsFactors=FALSE), c("study_id", "tree_id", "git_sha")) } ##' Basic information about the Open Tree of Life (the synthetic tree) ##' ##' @title Information about the Tree of Life ##' ##' @details Summary information about the current draft tree of life, ##' including information about the list of trees and the taxonomy ##' used to build it. The object returned by \code{tol_about} can ##' be passed to the taxonomy methods (\code{tax_name()}, ##' \code{tax_rank()}, \code{tax_sources()}, \code{ott_id}), to ##' extract relevant taxonomic information for the root of the ##' synthetic tree. ##' ##' @param include_source_list Logical (default = ##' \code{FALSE}). Return an ordered list of source trees. ##' @param tax an object created with a call to \code{tol_about}. ##' @param ... additional arguments to customize the API call (see ##' \code{\link{rotl}} for more information). ##' ##' @return An invisible list of synthetic tree summary statistics: ##' ##' \itemize{ ##' ##' \item {date_created} {String. The creation date of the tree.} ##' ##' \item {num_source_studies} {Integer. The number of studies ##' (publications)used as sources.} ##' ##' \item {num_source_trees} {The number of trees used as sources ##' (may be >1 tree per study).} ##' ##' \item {taxonomy_version} {The Open Tree Taxonomy version used ##' as a source.} ##' ##' \item {filtered_flags} {List. Taxa with these taxonomy flags were ##' not used in construction of the tree.} ##' ##' \item {root} {List. Describes the root node:} ##' \itemize{ ##' \item {node_id} {String. The canonical identifier of the node.} ##' ##' \item {num_tips} {Numeric. The number of descendent tips.} ##' ##' \item {taxon} {A list of taxonomic properties:} ##' \itemize{ ##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ott_id).} ##' ##' \item {name} {String. The taxonomic name of the queried node.} ##' ##' \item {unique_name} {String. The string that uniquely ##' identifies the taxon in OTT.} ##' ##' \item {rank} {String. The taxonomic rank of the taxon in OTT.} ##' ##' \item {tax_sources} {List. A list of identifiers for taxonomic ##' sources, such as other taxonomies, that define taxa judged ##' equivalent to this taxon.} ##' } ##' } ##' ##' \item {source_list} {List. Present only if ##' \code{include_source_list} is \code{TRUE}. The sourceid ##' ordering is the precedence order for synthesis, with ##' relationships from earlier trees in the list having priority ##' over those from later trees in the list. See ##' \code{source_id_map} below for study details.} ##' ##' \item {source_id_map} {Named list of lists. Present only if ##' \code{include_source_list} is \code{TRUE}. Names correspond to ##' the \sQuote{sourceids} used in \code{source_list} ##' above. Source trees will have the following properties:} ##' ##' \itemize{ ##' \item {git_sha} {String. The git SHA identifying a particular source ##' version.} #' ##' \item {tree_id} {String. The tree id associated with the study id used.} ##' ##' \item {study_id} {String. The study identifier. Will typically include ##' a prefix ("pg_" or "ot_").} ##' } ##' ##' \item {synth_id} {The unique string for this version of the tree.} ##' } ##' @seealso \code{\link{source_list}} to explore the list of studies ##' used in the synthetic tree (see example). ##' ##' @examples ##' \dontrun{ ##' res <- tol_about() ##' tax_sources(res) ##' ott_id(res) ##' studies <- source_list(tol_about(include_source_list=TRUE))} ##' @rdname tol_about ##' @export tol_about <- function(include_source_list=FALSE, ...) { res <- .tol_about(include_source_list=include_source_list, ...) class(res) <- c("tol_summary", class(res)) res } ##' @export print.tol_summary <- function(x, ...) { cat("\nOpenTree Synthetic Tree of Life.\n\n") cat("Tree version: ", x$synth_id, "\n", sep="") cat("Taxonomy version: ", x$taxonomy, "\n", sep="") cat("Constructed on: ", x$date_created, "\n", sep="") cat("Number of terminal taxa: ", x$root$num_tips, "\n", sep="") cat("Number of source trees: ", x$num_source_trees, "\n", sep="") cat("Number of source studies: ", x$num_source_studies, "\n", sep = "") cat("Source list present: ", ifelse(exists("source_list", x), "true", "false"), "\n", sep="") cat("Root taxon: ", x$root$taxon$name, "\n", sep="") cat("Root ott_id: ", x$root$taxon$ott_id, "\n", sep="") cat("Root node_id: ", x$root$node_id, "\n", sep="") } tol_about_method_factory <- function(.f) { function(tax, ...) { res <- list(.f(tax[["root"]][["taxon"]])) names(res) <- .tax_unique_name(tax[["root"]][["taxon"]]) res <- add_otl_class(res, .f) res } } ##' @export ##' @rdname tol_about tax_rank.tol_summary <- tol_about_method_factory(.tax_rank) ##' @export ##' @rdname tol_about tax_sources.tol_summary <- tol_about_method_factory(.tax_sources) ##' @export ##' @rdname tol_about unique_name.tol_summary <- tol_about_method_factory(.tax_unique_name) ##' @export ##' @rdname tol_about tax_name.tol_summary <- tol_about_method_factory(.tax_name) ##' @export ##' @rdname tol_about ott_id.tol_summary <- tol_about_method_factory(.tax_ott_id) ##' @export ##' @rdname source_list source_list.tol_summary <- .source_list ##' Most Recent Common Ancestor for a set of nodes ##' ##' @title MRCA of taxa from the synthetic tree ##' ##' @details Get the MRCA of a set of nodes on the current synthetic ##' tree. Accepts any combination of node ids and ott ids as ##' input. Returns information about the most recent common ##' ancestor (MRCA) node as well as the most recent taxonomic ##' ancestor (MRTA) node (the closest taxonomic node to the MRCA ##' node in the synthetic tree; the MRCA and MRTA may be the same ##' node). If they are the same, the taxonomic information will be ##' in the \code{mrca} slot, otherwise they will be in the ##' \code{nearest_taxon} slot of the list. If any of the specified ##' nodes is not in the synthetic tree an error will be returned. ##' ##' Taxonomic methods (\code{tax_sources()}, \code{ott_id()}, ##' \code{unique_name()}, ...) are availble on the objects ##' returned by \code{tol_mrca()}. If the MRCA node is MRTA, the ##' name of the object returned by these methods will start with ##' \sQuote{ott}, otherwise it will start with \sQuote{mrca}. ##' ##' @param ott_ids Numeric vector. The ott ids for which the MRCA is desired. ##' @param node_ids Character vector. The node ids for which the MRCA is desired. ##' @param tax an object returned by \code{tol_mrca()}. ##' @param ... additional arguments to customize the API call (see ##' \code{\link{rotl}} for more information). ##' ##' @return An invisible list of the MRCA node properties: ##' ##' \itemize{ ##' ##' \item {mrca} {List of node properties.} ##' ##' \itemize{ ##' \item {node_id} {String. The canonical identifier of the node.} ##' ##' \item {num_tips} {Numeric. The number of descendent tips.} ##' ##' \item {taxon} {A list of taxonomic properties. Only returned if ##' the queried node is a taxon. (If the node is not a taxon, a ##' \code{nearest_taxon} list is returned (see below)).} ##' ##' \itemize{ ##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).} ##' ##' \item {name} {String. The taxonomic name of the queried node.} ##' ##' \item {unique_name} {String. The string that uniquely ##' identifies the taxon in OTT.} ##' ##' \item {rank} {String. The taxonomic rank of the taxon in OTT.} ##' ##' \item {tax_sources} {List. A list of identifiers for taxonomic ##' sources, such as other taxonomies, that define taxa judged ##' equivalent to this taxon.} ##' } ##' ##' The following properties list support/conflict for the node across ##' synthesis source trees. All properties involve sourceid keys and ##' nodeid values (see \code{source_id_map} below) Not all properties are ##' are present for every node. ##' ##' \item {partial_path_of} {List. The edge below this synthetic tree node ##' is compatible with the edge below each of these input tree nodes (one ##' per tree). Each returned element is reported as sourceid:nodeid.} ##' ##' \item {supported_by} {List. Input tree nodes (one per tree) that support ##' this synthetic tree node. Each returned element is reported as ##' sourceid:nodeid.} ##' ##' \item {terminal} {List. Input tree nodes (one per tree) that are equivalent ##' to this synthetic tree node (via an exact mapping, or the input tree ##' terminal may be the only terminal descended from this synthetic tree node. ##' Each returned element is reported as sourceid:nodeid.} ##' ##' \item {conflicts_with} {Named list of lists. Names correspond to ##' sourceid keys. Each list contains input tree node ids (one or more per tree) ##' that conflict with this synthetic node.} ##' } ##' ##' \item {nearest_taxon} {A list of taxonomic properties of the nearest rootward ##' taxon node to the MRCA node. Only returned if the MRCA node is a not taxon ##' (otherwise the \code{taxon} list above is returned).} ##' ##' \itemize{ ##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).} ##' ##' \item {name} {String. The taxonomic name of the queried node.} ##' ##' \item {unique_name} {String. The string that uniquely ##' identifies the taxon in OTT.} ##' ##' \item {rank} {String. The taxonomic rank of the taxon in OTT.} ##' ##' \item {tax_sources} {List. A list of identifiers for taxonomic ##' sources, such as other taxonomies, that define taxa judged ##' equivalent to this taxon.} ##' } ##' ##' \item {source_id_map} {Named list of lists. Names correspond to the ##' sourceid keys used in the support/conflict properties of the \code{mrca} ##' list above. Source trees will have the following properties:} ##' ##' \itemize{ ##' \item {git_sha} {The git SHA identifying a particular source ##' version.} ##' ##' \item {tree_id} {The tree id associated with the study id used.} ##' ##' \item {study_id} {The study identifier. Will typically include ##' a prefix ("pg_" or "ot_").} ##' } ##' The only sourceid that does not correspond to a source tree is the taxonomy, ##' which will have the name "ott"+`taxonomy_version`, and the value is the ##' ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever ##' appear in \code{supported_by}. ##' ##' } ##' ##' @examples ##' \dontrun{ ##' birds_mrca <- tol_mrca(ott_ids=c(412129, 536234)) ##' ott_id(birds_mrca) ##' tax_sources(birds_mrca)} ##' @rdname tol_mrca ##' @export tol_mrca <- function(ott_ids=NULL, node_ids=NULL, ...) { res <- .tol_mrca(ott_ids=ott_ids, node_ids=node_ids, ...) class(res) <- c("tol_mrca", class(res)) return(res) } ##' @export print.tol_mrca <- function(x, ...) { cat("\nOpenTree MRCA node.\n\n") cat("Node id: ", x$mrca$node_id, "\n", sep="") cat("Number of terminal descendants: ", x$mrca$num_tips, "\n", sep="") if (is_taxon(x[["mrca"]][["taxon"]])) { cat("Is taxon: TRUE\n") cat("Name: ", x$mrca$taxon$name, "\n", sep="") cat("ott id: ", x$mrca$taxon$ott_id, "\n", sep="") } else { cat("Is taxon: FALSE\n") cat("Nearest taxon:\n") cat(" Name: ", x$nearest_taxon$name, "\n", sep="") cat(" ott id: ", x$nearest_taxon$ott_id, "\n", sep="") } } tol_mrca_method_factory <- function(.f) { function(tax, ...) { if (is_taxon(tax[["mrca"]][["taxon"]])) { res <- list(.f(tax[["mrca"]][["taxon"]])) names(res) <- .tax_unique_name(tax[["mrca"]][["taxon"]]) attr(res, "taxon_type") <- "mrca" } else { res <- list(.f(tax[["nearest_taxon"]])) names(res) <- .tax_unique_name(tax[["nearest_taxon"]]) attr(res, "taxon_type") <- "nearest_taxon" } res <- add_otl_class(res, .f) res } } ##' @export ##' @rdname tol_mrca tax_sources.tol_mrca <- tol_mrca_method_factory(.tax_sources) ##' @export ##' @rdname tol_mrca unique_name.tol_mrca <- tol_mrca_method_factory(.tax_unique_name) ##' @export ##' @rdname tol_mrca tax_name.tol_mrca <- tol_mrca_method_factory(.tax_name) ##' @export ##' @rdname tol_mrca tax_rank.tol_mrca <- tol_mrca_method_factory(.tax_rank) ##' @export ##' @rdname tol_mrca ott_id.tol_mrca <- tol_mrca_method_factory(.tax_ott_id) ##' @export ##' @rdname tol_mrca source_list.tol_mrca <- .source_list ##' Extract a subtree from the synthetic tree from an Open Tree node id. ##' ##' @title Extract a subtree from the synthetic tree ##' ##' @details Given a node, return the subtree of the synthetic tree descended ##' from that node. The start node may be specified using either a node id ##' or an ott id, but not both. If the specified node is not in the ##' synthetic tree an error will be returned. There is a size limit of ##' 25000 tips for this method. ##' ##' @param ott_id Numeric. The ott id of the node in the tree that should ##' serve as the root of the tree returned. ##' @param node_id Character. The node id of the node in the tree that should ##' serve as the root of the tree returned. ##' @param label_format Character. Defines the label type; one of ##' \dQuote{\code{name}}, \dQuote{\code{id}}, or ##' \dQuote{\code{name_and_id}} (the default). ##' @param file If specified, the function will write the subtree to a ##' file in newick format. ##' @param ... additional arguments to customize the API call (see ##' \code{\link{rotl}} for more information). ##' ##' @return If no value is specified to the \code{file} argument ##' (default), a phyogenetic tree of class \code{phylo}. ##' Otherwise, the function returns invisibly a logical indicating ##' whether the file was successfully created. ##' ##' @examples ##' \dontrun{ ##' res <- tol_subtree(ott_id=241841)} ##' @export tol_subtree <- function(ott_id=NULL, node_id=NULL, label_format=NULL, file, ...) { res <- .tol_subtree(ott_id=ott_id, node_id=node_id, label_format=label_format, ...) if (!missing(file)) { unlink(file) cat(res$newick, file=file) return(invisible(file.exists(file))) } else { phy <- phylo_from_otl(res) return(phy) } } ##' Return the induced subtree on the synthetic tree that relates a list of nodes. ##' ##' @title Subtree from the Open Tree of Life ##' ##' @details Return a tree with tips corresponding to the nodes identified in ##' the input set that is consistent with the topology of the current ##' synthetic tree. This tree is equivalent to the minimal subtree ##' induced on the draft tree by the set of identified nodes. ##' ##' @param ott_ids Numeric vector. OTT ids indicating nodes to be used ##' as tips in the induced tree. ##' @param node_ids Character vector. Node ids indicating nodes to be used ##' as tips in the induced tree. ##' @param label_format Character. Defines the label type; one of ##' \dQuote{\code{name}}, \dQuote{\code{id}}, or ##' \dQuote{\code{name_and_id}} (the default). ##' @param file If specified, the function will write the subtree to a ##' file in newick format. ##' @param ... additional arguments to customize the API call (see ##' \code{\link{rotl}} for more information). ##' ##' @return If no value is specified to the \code{file} argument ##' (default), a phyogenetic tree of class \code{phylo}. ##' ##' Otherwise, the function returns invisibly a logical indicating ##' whether the file was successfully created. ##' ##' @examples ##' \dontrun{ ##' res <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710)) ##' tree_file <- tempfile(fileext=".tre") ##' tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710), ##' file=tree_file)} ##' @export tol_induced_subtree <- function(ott_ids=NULL, node_ids=NULL, label_format=NULL, file, ...) { res <- .tol_induced_subtree(ott_ids=ott_ids, node_ids=node_ids, label_format=label_format, ...) if (!missing(file)) { unlink(file) cat(res$newick, file=file) return(file.exists(file)) } else { phy <- phylo_from_otl(res) return(phy) } } ##' Strip OTT ids from tip labels ##' @param tip_labels a character vector containing tip labels (most ##' likely the \code{tip.label} element from a tree returned by ##' \code{\link{tol_induced_subtree}} ##' @param remove_underscores logical (defaults to FALSE). If set to ##' TRUE underscores in tip labels are converted to spaces ##' @return A character vector containing the contents of ##' \code{tip_labels} with any OTT ids removed. ##' ##' @examples ##' \dontrun{ ##' genera <- c("Perdix", "Dendroica", "Cinclus", "Selasphorus", "Struthio") ##' tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 102710)) ##' tr$tip.label %in% genera ##' tr$tip.label <- strip_ott_ids(tr$tip.label) ##' tr$tip.label %in% genera} ##'@export strip_ott_ids <- function(tip_labels, remove_underscores=FALSE){ stripped <- sub("_ott\\d+$", "", tip_labels) if(remove_underscores){ return(gsub("_", " ", stripped)) } stripped } ##' Get summary information about a node in the synthetic tree ##' ##' @title Node info ##' ##' @details Returns summary information about a node in the graph. The ##' node of interest may be specified using either a node id or an ##' taxon id, but not both. If the specified node or OTT id is not ##' in the graph, an error will be returned. ##' ##' If the argument \code{include_lineage=TRUE} is used, you can ##' use \code{tax_lineage()} or \code{tol_lineage} to return the ##' taxonomic information or the node information for all the ##' ancestors to this node, down to the root of the tree. ##' ##' ##' @param ott_id Numeric. The OpenTree taxonomic identifier. ##' @param node_id Character. The OpenTree node identifier. ##' @param include_lineage Logical (default = FALSE). Whether to return the ##' lineage of the node from the synthetic tree. ##' @param ... additional arguments to customize the API call (see ##' ?rotl for more information) ##' ##' @return \code{tol_node_info} returns an invisible list of summary ##' information about the queried node: ##' ##' \itemize{ ##' ##' \item {node_id} {String. The canonical identifier of the node.} ##' ##' \item {num_tips} {Numeric. The number of descendent tips.} ##' ##' \item {taxon} {A list of taxonomic properties. Only returned if ##' the queried node is a taxon. Each source has:} ##' ##' \itemize{ ##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).} ##' ##' \item {name} {String. The taxonomic name of the queried node.} ##' ##' \item {unique_name} {String. The string that uniquely ##' identifies the taxon in OTT.} ##' ##' \item {rank} {String. The taxonomic rank of the taxon in OTT.} ##' ##' \item {tax_sources} {List. A list of identifiers for taxonomic ##' sources, such as other taxonomies, that define taxa judged ##' equivalent to this taxon.} ##' } ##' ##' The following properties list support/conflict for the node across ##' synthesis source trees. All properties involve sourceid keys and ##' nodeid values (see \code{source_id_map} below). ##' ##' \item {partial_path_of} {List. The edge below this synthetic tree node ##' is compatible with the edge below each of these input tree nodes (one ##' per tree). Each returned element is reported as sourceid:nodeid.} ##' ##' \item {supported_by} {List. Input tree nodes (one per tree) that support ##' this synthetic tree node. Each returned element is reported as ##' sourceid:nodeid.} ##' ##' \item {terminal} {List. Input tree nodes (one per tree) that are equivalent ##' to this synthetic tree node (via an exact mapping, or the input tree ##' terminal may be the only terminal descended from this synthetic tree node. ##' Each returned element is reported as sourceid:nodeid.} ##' ##' \item {conflicts_with} {Named list of lists. Names correspond to ##' sourceid keys. Each list contains input tree node ids (one or more per tree) ##' that conflict with this synthetic node.} ##' ##' \item {source_id_map} {Named list of lists. Names correspond to the ##' sourceid keys used in the 4 properties above. Source trees will have the ##' following properties:} ##' ##' \itemize{ ##' \item {git_sha} {The git SHA identifying a particular source ##' version.} ##' ##' \item {tree_id} {The tree id associated with the study id used.} ##' ##' \item {study_id} {The study identifier. Will typically include ##' a prefix ("pg_" or "ot_").} ##' } ##' The only sourceid that does not correspond to a source tree is the taxonomy, ##' which will have the name "ott"+`taxonomy_version`, and the value is the ##' ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever ##' appear in \code{supported_by}. ##' ##' } ##' ##' \code{tol_lineage} and \code{tax_lineage} return data ##' frames. \code{tol_lineage} indicate for each ancestor its ##' node identifier, the number of tips descending from that ##' node, and whether it corresponds to a taxonomic level. ##' ##' @examples ##' \dontrun{ ##' birds <- tol_node_info(ott_id=81461, include_lineage=TRUE) ##' source_list(birds) ##' tax_rank(birds) ##' ott_id(birds) ##' tax_lineage(birds) ##' tol_lineage(birds)} ##' @export tol_node_info <- function(ott_id=NULL, node_id=NULL, include_lineage=FALSE, ...) { res <- .tol_node_info(ott_id=ott_id, node_id=node_id, include_lineage=include_lineage, ...) class(res) <- c("tol_node", class(res)) return(res) } tol_node_method_factory <- function(.f) { function(tax, ...) { res <- setNames(list(.f(tax[["taxon"]])), .tax_unique_name(tax[["taxon"]])) res <- add_otl_class(res, .f) res } } ##' @export print.tol_node <- function(x, ...) { cat("\nOpenTree node.\n\n") cat("Node id: ", x$node_id, "\n", sep="") cat("Number of terminal descendants: ", x$num_tips, "\n", sep="") if (is_taxon(x[["taxon"]])) { cat("Is taxon: TRUE\n") cat("Name: ", x$taxon$name, "\n", sep="") cat("Rank: ", x$taxon$rank, "\n", sep="") cat("ott id: ", x$taxon$ott_id, "\n", sep="") } else { cat("Is taxon: FALSE\n") } } ##' @export ##' @param tax an object returned by \code{tol_node_info}. ##' @rdname tol_node_info tax_rank.tol_node <- tol_node_method_factory(.tax_rank) ##' @export ##' @rdname tol_node_info tax_sources.tol_node <- tol_node_method_factory(.tax_sources) ##' @export ##' @rdname tol_node_info unique_name.tol_node <- tol_node_method_factory(.tax_unique_name) ##' @export ##' @rdname tol_node_info tax_name.tol_node <- tol_node_method_factory(.tax_name) ##' @export ##' @rdname tol_node_info ott_id.tol_node <- tol_node_method_factory(.tax_ott_id) ##' @export ##' @rdname tol_node_info source_list.tol_node <- .source_list ##' @export ##' @rdname tol_node_info tax_lineage.tol_node <- function(tax, ...) { check_lineage(tax) lg <- lapply(tax[["lineage"]], function(x) { if (exists("taxon", x)) { build_lineage(x[["taxon"]]) } else { NULL } }) lg <- do.call("rbind", lg) as.data.frame(lg, stringsAsFactors = FALSE) } ##' @export ##' @rdname tol_node_info tol_lineage.tol_node <- function(tax, ...) { check_lineage(tax) lg <- lapply(tax[["lineage"]], function(x) { c("node_id" = x[["node_id"]], "num_tips" = x[["num_tips"]], "is_taxon" = exists("taxon", x)) }) lg <- do.call("rbind", lg) as.data.frame(lg, stringsAsFactors = FALSE) } rotl/R/studies-methods.R0000644000177500001440000000520312602532772015225 0ustar deepayanusers ### list_trees ----------------------------------------------------------------- ##' List trees ids in objects returned by ##' \code{\link{studies_find_studies}} and ##' \code{\link{studies_find_trees}}. ##' ##' \code{list_trees} returns all trees associated with a particular ##' study when used on an object returned by ##' \code{\link{studies_find_studies}}, but only the trees that match ##' the search criteria when used on objects returned by ##' \code{\link{studies_find_trees}}. ##' ##' @param matched_studies an object created by ##' \code{studies_find_trees} or \code{studies_find_studies}. ##' @param study_id a \code{study_id} listed in the object returned by ##' \code{studies_find_trees} ##' @param ... Currently unused ##' @return \code{list_trees} returns a list of the tree_ids for each ##' study that match the requested criteria. If a \code{study_id} ##' is provided, then only the trees for this study are returned ##' as a vector. ##' @seealso \code{\link{studies_find_studies}} and ##' \code{\link{studies_find_trees}}. The help for these functions ##' have examples demonstrating the use of \code{list_trees}. ##' @export list_trees <- function(matched_studies, ...) UseMethod("list_trees") ##' @rdname list_trees ##' @export list_trees.matched_studies <- function(matched_studies, study_id, ...) { res <- attr(matched_studies, "found_trees") if (missing(study_id)) { res } else { if (is.na(match(study_id, names(res)))) stop(sQuote(study_id), " isn't a valid id.") else res[[study_id]] } } ##' @export ##' @rdname get_study_meta get_tree_ids <- function(sm) UseMethod("get_tree_ids") ##' @export ##' @rdname get_study_meta get_publication <- function(sm) UseMethod("get_publication") ##' @export ##' @rdname get_study_meta candidate_for_synth <- function(sm) UseMethod("candidate_for_synth") ##' @export ##' @rdname get_study_meta get_study_year <- function(sm) UseMethod("get_study_year") ##' @export ##' @rdname get_study_meta get_tree_ids.study_meta <- function(sm) { unlist(sm[["nexml"]][["treesById"]][[sm[["nexml"]][["^ot:treesElementOrder"]][[1]]]][["^ot:treeElementOrder"]]) } ##' @export ##' @rdname get_study_meta get_publication.study_meta <- function(sm) { pub <- sm[["nexml"]][["^ot:studyPublicationReference"]] attr(pub, "DOI") <- sm[["nexml"]][["^ot:studyPublication"]][["@href"]] pub } ##' @export ##' @rdname get_study_meta candidate_for_synth.study_meta <- function(sm) { unlist(sm[["nexml"]][["^ot:candidateTreeForSynthesis"]]) } ##' @export ##' @rdname get_study_meta get_study_year.study_meta <- function(sm) { sm[["nexml"]][["^ot:studyYear"]] } rotl/R/rotl-package.R0000644000177500001440000000374012554042726014462 0ustar deepayanusers##' An Interface to the Open Tree of Life API ##' ##' The Open Tree of Life is an NSF funded project that is generating ##' an online, comprehensive phylogenetic tree for 1.8 million ##' species. \code{rotl} provides an interface that allows you to ##' query and retrive the parts of the tree of life that is of ##' interest to you. ##' ##' \code{rotl} provides function to most of the end points the API ##' provides. The documentation of the API is available at: ##' \url{https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs} ##' ##' @section Customizing API calls: ##' ##' All functions that use API end points can take 2 arguments to ##' customize the API call and are passed as \code{...} arguments. ##' ##' \itemize{ ##' ##' \item{ \code{otl_v} } { This argument controls which version ##' of the API your call is using. The default value for this ##' argument is a call to the non-exported function ##' \code{otl_version()} which returns the current version of the ##' Open Tree of Life APIs (v2).} ##' ##' \item{ \code{dev_url} } { This argument controls whether to use ##' the development version of the API. By default, \code{dev_url} ##' is set to \code{FALSE}, using \code{dev_url = TRUE} in your ##' function calls will use the development version.} ##' ##' } ##' ##' For example, to use the development version of the API, you ##' could use: \code{tnrs_match_names("anas", dev_url=TRUE)} ##' ##' Additional arguments can also be passed to the ##' \code{\link[httr]{GET}} and \code{\link[httr]{POST}} methods. ##' ##' ##' @section Acknowledgments: ##' ##' This package was started during the Open Tree of Life ##' \href{http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/}{Hackathon} ##' organized by OpenTree, the NESCent Hackathon Interoperability ##' Phylogenetic group, and Arbor. ##' ##' @name rotl ##' @docType package ##' @import ape NULL rotl/vignettes/0000755000177500001440000000000013056407503013565 5ustar deepayanusersrotl/vignettes/data_mashups.Rmd0000644000177500001440000001764312706240532016713 0ustar deepayanusers--- title: "Connecting data to Open Tree trees" author: "David Winter" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Connecting data to Open Tree trees} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Combining data from OToL and other sources. One of the major goals of `rotl` is to help users combine data from other sources with the phylogenetic trees in the Open Tree database. This examples document describes some of the ways in whih a user might connect data to trees from Open Tree. ## Get Open Tree IDs to match your data. Let's say you have a dataset where each row represents a measurement taken from one species, and your goal is to put these measurements in some phylogenetic context. Here's a small example: the best estimate of the mutation rate for a set of unicellular Eukaryotes along with some other property of those species which might explain the mutation rate: ```{r, data} csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl") mu <- read.csv(csv_path, stringsAsFactors=FALSE) mu ``` If we want to get a tree for these species we need to start by finding the unique ID for each of these species in the Open Tree database. We can use the Taxonomic Name Resolution Service (`tnrs`) functions to do this. Before we do that we should see if any of the taxonomic contexts, which can be used to narrow a search and avoid conflicts between different codes, apply to our group of species: ```{r, context} library(rotl) tnrs_contexts() ``` Hmm, none of those groups contain all of our species. In this case we can search using the `All life` context and the function `tnrs_match_names`: ```{r, match} taxon_search <- tnrs_match_names(names=mu$species, context_name="All life") knitr::kable(taxon_search) ``` Good, all of our species are known to Open Tree. Note, though, that one of the names is a synonym. _Saccharomyces pombe_ is older name for what is now called _Schizosaccharomyces pombe_. As the name suggests, the Taxonomic Name Resolution Service is designed to deal with these problems (and similar ones like misspellings), but it is always a good idea to check the results of `tnrs_match_names` closely to ensure the results are what you expect. In this case we have a good ID for each of our species so we can move on. Before we do that, let's ensure we can match up our original data to the Open Tree names and IDs by adding them to our `data.frame`: ```{r, munge} mu$ott_name <- taxon_search$unique_name mu$ott_id <- taxon_search$ott_id ``` ## Find a tree with your taxa Now let's find a tree. There are two possible options here: we can search for published studies that include our taxa or we can use the 'synthetic tree' from Open Tree. We can try both approaches. ### Published trees Before we can search for published studies or trees, we should check out the list of properties we can use to perform such searches: ```{r, properties} studies_properties() ``` We have `ottIds` for our taxa, so let's use those IDs to search for trees that contain them. Starting with our first species _Tetrahymena thermophila_ we can use `studies_find_trees` to do this search. ```{r taxon_count} studies_find_trees(property="ot:ottId", value="180195") ``` Well... that's not very promising. We can repeat that process for all of the IDs to see if the other species are better represented. ```{r, all_taxa_count} hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE) sapply(hits, function(x) sum(x[["n_matched_trees"]])) ``` OK, most of our species are not in any of the published trees available. You can help fix this sort of problem by [making sure you submit your published trees to Open Tree](https://tree.opentreeoflife.org/curator). ### A part of the synthesis tree Thankfully, we can still use the complete Tree of Life made from the combined results of all of the published trees and taxonomies that go into Open Tree. The function `tol_induced_subtree` will fetch a tree relating a set of IDs. Using the default arguments you can get a tree object into your R session: ```{r subtree, fig.width=7, fig.height=4} tr <- tol_induced_subtree(ott_ids=mu$ott_id) plot(tr) ``` ### Connect your data to the tips of your tree Now we have a tree for of our species, how can we use the tree and the data together? The package `phylobase` provide an object class called `phylo4d`, which is designed to represent a phylogeny and data associated with its tips. In oder to get our tree and data into one of these objects we have to make sure the labels in the tree and in our data match exactly. That's not quite the case at the moment (tree labels have underscores and IDs appended): ```{r, match_names} mu$ott_name[1] tr$tip.label[4] ``` `rotl` provides a convienence function `strip_ott_ids` to deal with these. ```{r, sub} tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE) tr$tip.label %in% mu$ott_name ``` Ok, now the tips are together we can make a new dataset. The `phylo4d()` functions matches tip labels to the row names of a `data.frame`, so let's make a new dataset that contains just the relevant data and has row names to match the tree ```{r phylobase} library(phylobase) mu_numeric <- mu[,c("mu", "pop.size", "genome.size")] rownames(mu_numeric) <- mu$ott_name tree_data <- phylo4d(tr, mu_numeric) ``` And now we can plot the data and the tree together ```{r, fig.width=7, fig.height=5} plot(tree_data) ``` ##Find external data associated with studies, trees and taxa from Open Tree In the above example we looked for a tree that related species in another dataset. Now we will go the other way, and try to find data associated with Open Tree records in other databases. ### Get external data from a study Let's imagine you were interested in extending or reproducing the results of a published study. If that study is included in Open Tree you can find it via `studies_find_studies` or `studies_find_trees` and retrieve the published trees with `get_study`. `rotl` will also help you find external. The function `study_external_IDs` retrieves the DOI for a given study, and uses that to gather some more data: ```{r} extra_data <- study_external_IDs("pg_1980") extra_data ``` Here the returned object contains an `external_data_url` (in this case a link to the study in Treebase), a pubmed ID for the paper and a vector IDs for the NCBI's nuleotide database. The packages `treebase` and `rentrez` provide functions to make use of these IDs within R. As an example, let's use `rentrez` to download the first two DNA seqences and print them. ```{r} library(rentrez) seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta") cat(seqs) ``` You could further process these sequences in R with the function `read.dna` from `ape` or save them to disk by specifying a file name with `cat`. ### Find a OTT taxon in another taxonomic database It is also possible map an Open Tree taxon to a record in another taxonomic database. For instance, if we wanted to search for data about one of the tips of the sub-tree we fetched in the example above we could do so using `taxon_external_IDs`: ```{r} Tt_ids <- taxon_external_IDs(mu$ott_id[2]) Tt_ids ``` A user could then use `rgbif` to find locality records using the gbif ID or `rentrez` to get genetic or bibliometric data about from the NCBI's databases. ## What next The demonstration gets you to the point of visualizing your data in a phylogenetic context. But there's a lot more you do with this sort of data in R. For instance, you could use packages like `ape`, `caper`, `phytools` and `mcmcGLMM` to perform phylogenetic comparative analyses of your data. You could gather more data on your species using packages that connect to trait databases like `rfishbase`, `AntWeb` or `rnpn` which provides data from the US National Phenology Network. You could also use `rentrez` to find genetic data for each of your species, and use that data to generate branch lengths for the phylogeny. rotl/vignettes/meta-analysis.Rmd0000644000177500001440000002240713056077352017012 0ustar deepayanusers--- title: "Using the Open Tree synthesis in a comparative analysis" author: "David Winter" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Using the Open Tree synthesis in a comparative analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Phylogenetic Comparative Methods The development of phylogenetic comparative methods has made phylogenies and important source of data in fields as diverse as ecology, genomic and medicine. Comparative methods can be used to investigate patterns in the evolution of traits or the diversification of lineages. In other cases a phylogeny is treated as a "nuisance parameter", allowing with the autocorrelation created by the shared evolutionary history of the different species included to be controlled for. In many cases finding a tree that relates the species for which trait data are available is a rate-limiting step in such comparative analyses. Here we show how the synthetic tree provided by Open Tree of Life (and made available in R via `rotl`) can help to fill this gap. ## A phylogenetic meta-analysis To demonstrate the use of `rotl` in a comparative analysis, we will partially reproduce the results of [Rutkowska _et al_ 2014](dx.doi.org/10.1111/jeb.12282). Very briefly, this study is a meta-analysis summarising the results of multiple studies testing for systematic differences in the size of eggs which contain male and female offspring. Such a difference might mean that birds invest more heavily in one sex than the other. Because this study involves data from 51 different species, Rutkowska _et al_ used a phylogenetic comparative approach to account for the shared evolutionary history among some of the studied-species. ### Gather the data If we are going to reproduce this analysis, we will first need to gather the data. Thankfully, the data is available as supplementary material from the publisher's website. We can collect the data from using `fulltext` (with the papers DOI as input) and read it into memory with `gdata`: ```{r egg_data, cache=TRUE} library(rotl) if (require(readxl) && require(fulltext)) { doi <- "10.1111/jeb.12282" xl_file <- ft_get_si(doi, 1, save.name="egg.xls") egg_data <- read_excel(xl_file) } else { egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl")) } head(egg_data) ``` The most important variable in this dataset is `Zr`, which is a [normalized effect size](https://en.wikipedia.org/wiki/Fisher_transformation) for difference in size between eggs that contain males and females. Values close to zero come from studies that found the sex of an egg's inhabitant had little effect in its size, while large positive or negative values correspond to studies with substantial sex biases (towards males and females respectively). Since this is a meta-analysis we should produce the classic [funnel plot](https://en.wikipedia.org/wiki/Funnel_plot) with effects-size on the y-axis and precision (the inverse of the sample standard error) on the x-axis. Here we calculate precision from the sample variance (`Vzr`): ```{r eggs_in_a_funnel, fig.width=6, fig.height=3} plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16, ylab="Effect size (Zr)", xlab="Precision (1/SE)", main="Effect sizes for sex bias in egg size among 51 brid species" ) ``` In order to use this data later on we need to first convert it to a standard `data.frame`. We can also convert the `animal` column (the species names) to lower case which will make it easier to match names later on: ```{r, clean_eggs} egg_data <- as.data.frame(egg_data) egg_data$animal <- tolower(egg_data$animal) ``` ### Find the species in OTT We can use the OTL synthesis tree to relate these species. To do so we first need to find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the Taxonomic Name Resolution Service function `tnrs_match_names`: ```{r, birds, cache=TRUE} taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals") head(taxa) ``` All of these species are in OTT, but a few of them go by different names in the Open Tree than we have in our data set. Because the tree `rotl` fetches will have Open Tree names, we need to create a named vector that maps the names we have for each species to the names Open Tree uses for them: ```{r bird_map} taxon_map <- structure(taxa$search_string, names=taxa$unique_name) ``` Now we can use this map to retrieve "data set names" from "OTT names": ```{r odd_duck} taxon_map["Anser caerulescens"] ``` ### Get a tree Now we can get the tree. There are really too many tips here to show nicely, so we will leave them out of this plot ```{r birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'} tr <- tol_induced_subtree(taxa$ott_id) plot(tr, show.tip.label=FALSE) ``` There are a few things to note here. First, the tree has not branch lengths. At present this is true for the whole of the Open Tree synthetic tree. Some comparative methods require either branch lengths or an ultrametric tree. Before you can use one of those methods you will need to get a tree with branch lengths. You could try looking for published trees made available by the Open Tree with `studies_find_trees`. Alternatively, you could estimate branch lengths from the toplogy of a phylogeny returned by `tol_induced_subtree`, perhaps by downloading DNA sequences from the NCBI with `rentrez` or "hanging" the tree on nodes of known-age using penalized likelihood method in `ape::chronos`. In this case, we will use only the topology of the tree as input to our comparative analysis, so we can skip these steps. Second, the tip labels contain OTT IDs, which means they will not perfectly match the species names in our dataset or the taxon map that we created earlier: ```{r tip_lab} tr$tip.label[1:4] ``` Finally, the tree contains node labels for those nodes that match a higher taxonomic group, and empty character vectors (`""`) for all other nodes. Some comparative methods either do no expect node labels at all, or require all labeled nodes to have a unique name (meaning multiple "empty" labels will cause and error). We can deal with all these details easily. `rotl` provides the convenience function `strip_ott_ids` to remove the extra information from the tip labels. With the IDs removed, we can use our taxon map to replace the tip labels in the tree with the species names from dataset. ```{r clean_tips} otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE) tr$tip.label <- taxon_map[ otl_tips ] ``` Finally, we can remove the node labels by setting the `node.label` attribute of the tree to `NULL`. ```{r remove_nodes} tr$node.label <- NULL ``` ### Perform the meta-analysis Now we have data and a tree, and we know the names in the tree match the ones in the data. It's time to do the comparative analysis. Rutkowska _et al_. used `MCMCglmm`, a Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis, and we will do the same. Of course, to properly analyse these data you would take some care in deciding on the appropriate priors to use and inspect the results carefully. In this case, we are really interested in using this as a demonstration, so we will just run a simple model. Specifically we sill fit a model where the only variable that might explain the values of `Zr` is the random factor `animal`, which corresponds to the phylogenetic relationships among species. We also provide `Zvr` as the measurement error variance, effectively adding extra weight to the results of more powerful studies. Here's how we specify and fit that model with `MCMCglmm`: ```{r model} library(MCMCglmm, quiet=TRUE) set.seed(123) pr<-list(R=list(V=1,nu=0.002), G=list(G1=list(V=1,nu=0.002)) ) model <- MCMCglmm(Zr~1,random=~animal, pedigree=tr, mev=egg_data$VZr, prior=pr, data=egg_data, verbose=FALSE) ``` Now that we have a result we can find out how much phylogenetic signal exists for sex-biased differences in egg-size. In a multi-level model we can use variance components to look at this, specifically the proportion of the total variance that can be explained by phylogeny is called the phylogenetic reliability, _H_. Let's calculate the _H_ for this model: ```{r PhyH} var_comps <- colMeans(model$VCV ) var_comps["animal"] / sum(var_comps) ``` It appears there is almost no phylogenetic signal to the data. The relationships among species explain much less that one percent of the total variance in the data. If you were wondering, Rutkowska _et al_. report a similar result, even after adding more predictors to their model most of the variance in `Zr` was left unexplained. ## What other comparative methods can I use in R? Here we have demonstrated just one comparative analysis that you might do in R. There are an ever-growing number of packages that allow an ever-growing number of analysis to performed in R. Some "classics" like ancestral state reconstruction, phylogenetic independent contrasts and lineage through time plots are implemented in `ape`. Packages like `phytools`, `caper` and `diversitree` provide extensions to these methods. The [CRAN Phylogenetics Taskview](https://CRAN.R-project.org/view=Phylogenetics) gives a good idea of the diversity of packages and analyses that can be completed in R. rotl/vignettes/how-to-use-rotl.Rmd0000644000177500001440000002553113056073735017232 0ustar deepayanusers--- title: "How to use rotl?" author: "François Michonneau" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: css: vignette.css vignette: > %\VignetteIndexEntry{How to use rotl?} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- `rotl` provides an interface to the Open Tree of Life (OTL) API and allows users to query the API, retrieve parts of the Tree of Life and integrate these parts with other R packages. The OTL API provides services to access: * the **Tree of Life** a.k.a. TOL (the synthetic tree): a single draft tree that is a combination of **the OTL taxonomy** and the **source trees** (studies) * the **Taxonomic name resolution services** a.k.a. TNRS: the methods for resolving taxonomic names to the internal identifiers used by the TOL and the GOL (the `ott ids`). * the **Taxonomy** a.k.a. OTT (for Open Tree Taxonomy): which represents the synthesis of the different taxonomies used as a backbone of the TOL when no studies are available. * the **Studies** containing the source trees used to build the TOL, and extracted from the scientific literature. In `rotl`, each of these services correspond to functions with different prefixes: | Service | `rotl` prefix | |---------------|---------------| | Tree of Life | `tol_` | | TNRS | `tnrs_` | | Taxonomy | `taxonomy_` | | Studies | `studies_` | `rotl` also provides a few other functions and methods that can be used to extract relevant information from the objects returned by these functions. ## Demonstration of a basic workflow The most common use for `rotl` is probably to start from a list of species and get the relevant parts of the tree for these species. This is a two step process: 1. the species names need to be matched to their `ott_id` (the Open Tree Taxonomy identifiers) using the Taxonomic name resolution services (TNRS) 1. these `ott_id` will then be used to retrieve the relevant parts of the Tree of Life. ### Step 1: Matching taxonomy to the `ott_id` Let's start by doing a search on a diverse group of taxa: a tree frog (genus _Hyla_), a fish (genus _Salmo_), a sea urchin (genus _Diadema_), and a nautilus (genus _Nautilus_). ```{r} library(rotl) taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus") resolved_names <- tnrs_match_names(taxa) ``` It's always a good idea to check that the resolved names match what you intended: `r knitr::kable(resolved_names)` The column `unique_name` sometimes indicates the higher taxonomic level associated with the name. The column `number_matches` indicates the number of `ott_id` that corresponds to a given name. In this example, our search on _Diadema_ returns 2 matches, and the one returned by default is indeed the sea urchin that we want for our query. The argument `context_name` allows you to limit the taxonomic scope of your search. _Diadema_ is also the genus name of a fungus. To ensure that our search is limited to animal names, we could do: ```{r} resolved_names <- tnrs_match_names(taxa, context_name = "Animals") ``` If you are trying to build a tree with deeply divergent taxa that the argument `context_name` cannot fix, see "How to change the ott ids assigned to my taxa?" in the FAQ below. ### Step 2: Getting the tree corresponding to our taxa Now that we have the correct `ott_id` for our taxa, we can ask for the tree using the `tol_induced_subtree()` function. By default, the object returned by `tol_induced_subtree` is a phylo object (from the [ape](https://cran.r-project.org/package=ape) package), so we can plot it directly. ```{r, fig.width=7, fig.height=4} my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id) plot(my_tree, no.margin=TRUE) ``` ## FAQ ### How to change the ott ids assigned to my taxa? If you realize that `tnrs_match_names` assigns the incorrect taxonomic group to your name (e.g., because of synonymy) and changing the `context_name` does not help, you can use the function `inspect`. This function takes the object resulting from `tnrs_match_names()`, and either the row number, the taxon name (you used in your search in lowercase), or the `ott_id` returned by the initial query. To illustrate this, let's re-use the previous query but this time pretending that we are interested in the fungus _Diadema_ and not the sea urchin: ```{r} taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus") resolved_names <- tnrs_match_names(taxa) resolved_names inspect(resolved_names, taxon_name = "diadema") ``` In our case, we want the second row in this data frame to replace the information that initially matched for _Diadema_. We can now use the `update()` function, to change to the correct taxa (the fungus not the sea urchin): ```{r} resolved_names <- update(resolved_names, taxon_name = "diadema", new_row_number = 2) ## we could also have used the ott_id to replace this taxon: ## resolved_names <- update(resolved_names, taxon_name = "diadema", ## new_ott_id = 4930522) ``` And now our `resolved_names` data frame includes the taxon we want: `r knitr::kable(resolved_names)` ### How do I know that the taxa I'm asking for is the correct one? The function `taxonomy_taxon_info()` takes `ott_ids` as arguments and returns taxonomic information about the taxa. This output can be passed to some helpers functions to extract the relevant information. Let's illustrate this with our _Diadema_ example ```{r} diadema_info <- taxonomy_taxon_info(631176) tax_rank(diadema_info) synonyms(diadema_info) tax_name(diadema_info) ``` In some cases, it might also be useful to investigate the taxonomic tree descending from an `ott_id` to check that it's the correct taxon and to determine the species included in the Open Tree Taxonomy: ```{r} diadema_tax_tree <- taxonomy_subtree(631176) diadema_tax_tree ``` By default, this function return all taxa (including self, and internal) descending from this `ott_id` but it also possible to return `phylo` object. ### How do I get the tree for a particular taxonomic group? If you are looking to get the tree for a particular taxonomic group, you need to first identify it by its node id or ott id, and then use the `tol_subtree()` function: ```{r, fig.width=7, fig.height=4} mono_id <- tnrs_match_names("Monotremata") mono_tree <- tol_subtree(ott_id = ott_id(mono_id)) plot(mono_tree) ``` ### How do I find trees from studies focused on my favourite taxa? The function `studies_find_trees()` allows the user to search for studies matching a specific criteria. The function `studies_properties()` returns the list of properties that can be used in the search. ```{r} furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia") furry_ids <- furry_studies$study_ids ``` Now that we know the `study_id`, we can ask for the meta data information associated with this study: ```{r} furry_meta <- get_study_meta("pg_2550") get_publication(furry_meta) ## The citation for the source of the study get_tree_ids(furry_meta) ## This study has 10 trees associated with it candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL ``` Using `get_study("pg_2550")` would returns a `multiPhylo` object (default) with all the trees associated with this particular study, while `get_study_tree("pg_2550", "tree5513")` would return one of these trees. ### The tree returned by the API has duplicated tip labels, how can I work around it? You may encounter the following error message: ``` Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia angulata) has already been encountered in this tree. Duplication of taxa in a tree is prohibited. ``` This message occurs as duplicate labels are not allowed in the NEXUS format and it is stricly enforced by the part of the code used by `rotl` to import the trees in memory. If you use a version of `rotl` more recent than 0.4.1, this should not happen by default for the function `get_study_tree`. If it happens with another function, please [let us know](https://github.com/ropensci/rotl/issues). The easiest way to work around this is to save the tree in a file, and use APE to read it in memory: ```{r, eval=FALSE} get_study_tree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name', file = "/tmp/tree.tre", file_format = "newick") tr <- ape::read.tree(file = "/tmp/tree.tre") ``` ### How do I get the higher taxonomy for a given taxa? If you encounter a taxon name you are not familiar with, it might be useful to obtain its higher taxonomy to see where it fits in the tree of life. We can combine several taxonomy methods to extract this information easily. ```{r} giant_squid <- tnrs_match_names("Architeuthis") tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE)) ``` ### Why are OTT IDs discovered with `rotl` missing from an induced subtree? Some taxonomic names that can be retrieved through the taxonomic name resolution service are not part of the Open Tree's synthesis tree. These are usually traditional higher-level taxa that have been found to be paraphyletic. For instance, if you wanted to fetch a tree relating the three birds that go into a [Turkducken](https://en.wikipedia.org/wiki/Turducken) as well as the pork used for stuffing, you might search for the turkey, duck, chicken, and pork genera: ```{r} turducken <- c("Meleagris", "Anas", "Gallus", "Sus") taxa <- tnrs_match_names(turducken, context="Animals") taxa ``` We have the OTT ids for each genus, however, if we tried to get the induced subtree from these results, we would get an error: ```{r, error=TRUE} tr <- tol_induced_subtree(ott_id(taxa)) ``` As the error message suggests, some of the taxa are not found in the synthetic tree. This occurs for 2 main reasons: either the taxa is invalid, or it is part of a group that is not monophyletic in the synthetic tree. There are two ways to get around this issue: (1) removing the taxa that are not part of the Open Tree; (2) using the complete species name. #### Removing the taxa missing from the synthetic tree To help with this situation, `rotl` provides a way to identify the OTT ids that are not part of the synthetic tree. The function `is_in_tree()` takes the output of the `ott_id()` function and returns a vector of logical indicating whether the taxa are part of the synthetic tree. We can then use to only keep the taxa that appear in the synthetic tree: ```{r} in_tree <- is_in_tree(ott_id(taxa)) in_tree tr <- tol_induced_subtree(ott_id(taxa)[in_tree]) ``` #### Using the full taxonomic names The best way to avoid these problems is to specify complete species names (species being the lowest level of classification in the Open Tree taxonomy they are guaranteed to be monophyletic): ```{r, fig.width=7, fig.height=4} turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa") taxa <- tnrs_match_names(turducken_spp, context="Animals") tr <- tol_induced_subtree(ott_id(taxa)) plot(tr) ``` rotl/vignettes/vignette.css0000644000177500001440000000726612647504570016146 0ustar deepayanusersbody { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #header { text-align: center; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { /*white-space: pre-wrap; /* Wrap long lines */ border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } /* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css Colours from https://gist.github.com/robsimmons/1172277 */ code > span.kw { color: #555; font-weight: bold; } /* Keyword */ code > span.dt { color: #902000; } /* DataType */ code > span.dv { color: #40a070; } /* DecVal (decimal values) */ code > span.bn { color: #d14; } /* BaseN */ code > span.fl { color: #d14; } /* Float */ code > span.ch { color: #d14; } /* Char */ code > span.st { color: #d14; } /* String */ code > span.co { color: #888888; font-style: italic; } /* Comment */ code > span.ot { color: #007020; } /* OtherToken */ code > span.al { color: #ff0000; font-weight: bold; } /* AlertToken */ code > span.fu { color: #900; font-weight: bold; } /* Function calls */ code > span.er { color: #a61717; background-color: #e3d2d2; } /* ErrorTok */ rotl/README.md0000644000177500001440000001263213056116500013032 0ustar deepayanusers [![Build Status](https://travis-ci.org/ropensci/rotl.svg?branch=master)](https://travis-ci.org/ropensci/rotl) [![Build status](https://ci.appveyor.com/api/projects/status/jwvl84e6m36bqwga?svg=true)](https://ci.appveyor.com/project/fmichonneau/rotl) [![codecov.io](https://codecov.io/github/ropensci/rotl/coverage.svg?branch=master)](https://codecov.io/github/ropensci/rotl?branch=master) [![](http://www.r-pkg.org/badges/version/rotl)](http://www.r-pkg.org/pkg/rotl) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/rotl)](http://www.r-pkg.org/pkg/rotl) [![Research software impact](http://depsy.org/api/package/cran/rotl/badge.svg)](http://depsy.org/package/r/rotl) An R interface to Open Tree API =============================== `rotl` is an R package to interact with the Open Tree of Life data APIs. It was initially developed as part of the [NESCENT/OpenTree/Arbor hackathon](http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/). Client libraries to interact with the Open Tree of Life API also exists for [Python](https://github.com/OpenTreeOfLife/pyopentree) and [Ruby](https://github.com/SpeciesFileGroup/bark). Installation ------------ The current stable version is available from CRAN, and can be installed by typing the following at the prompt in R: ``` r install.packages("rotl") ``` If you want to test the development version, you first need to install [ghit](https://github.com/cloudyr/ghit) (`ghit` is a more lightweight version of [devtools](https://github.com/hadley/devtools) if your sole purpose is to install packages that are hosted on GitHub). ``` r install.packages("ghit") ``` Then you can install `rotl` using: ``` r library(ghit) # or library(devtools) install_github("ropensci/rotl") ``` Vignettes --------- There are three vignettes: - Start by checking out the "How to use `rotl`?" by typing: `vignette("how-to-use-rotl", package="rotl")` after installing the package. - Then explore how you can use `rotl` with other packages to combine your data with trees from the Open Tree of Life project by typing: `vignette("data_mashups", package="rotl")`. - The vignette "Using the Open Tree Synthesis in a comparative analsysis" demonstrates how you can reproduce an analysis of a published paper by downloading the tree they used, and data from the supplementary material: `vignette("meta-analysis", package="rotl")`. The vignettes are also available from CRAN: [How to use `rotl`?](https://cran.r-project.org/package=rotl/vignettes/how-to-use-rotl.html), [Data mashups](https://cran.r-project.org/package=rotl/vignettes/data_mashups.html), and [Using the Open Tree synthesis in a comparative analysis](https://cran.r-project.org/package=rotl/vignettes/meta-analysis.html). Quick start ----------- ### Get a little bit of the big Open Tree tree Taxonomic names are represented in the Open Tree by numeric identifiers, the `ott_ids` (Open Tree Taxonomy identifiers). To extract a portion of a tree from the Open Tree, you first need to find `ott_ids` for a set of names using the `tnrs_match_names` function: ``` r library(rotl) ``` ## Warning: package 'rotl' was built under R version 3.4.0 ``` r apes <- c("Pan", "Pongo", "Pan", "Gorilla", "Hoolock", "Homo") (resolved_names <- tnrs_match_names(apes)) ``` ## search_string unique_name approximate_match ott_id is_synonym flags ## 1 pan Pan FALSE 417957 FALSE ## 2 pongo Pongo FALSE 417949 FALSE ## 3 pan Pan FALSE 417957 FALSE ## 4 gorilla Gorilla FALSE 417969 FALSE ## 5 hoolock Hoolock FALSE 712902 FALSE ## 6 homo Homo FALSE 770309 FALSE ## number_matches ## 1 2 ## 2 2 ## 3 2 ## 4 1 ## 5 1 ## 6 1 Now we can get the tree with just those tips: ``` r tr <- tol_induced_subtree(ott_ids=ott_id(resolved_names)) plot(tr) ``` ![](http://i.imgur.com/u0JYjjN.png) The code above can be summarized in a single pipe: ``` r library(magrittr) ## or expressed as a pipe: c("Pan", "Pongo", "Pan", "Gorilla", "Hoolock", "Homo") %>% tnrs_match_names %>% ott_id %>% tol_induced_subtree %>% plot ``` ![](http://i.imgur.com/1wjfPsN.png) Versioning ---------- Starting with v3.0.0 of the package, the major and minor version numbers (the first 2 digits of the version number) will be matched to those of the API. The patch number (the 3rd digit of the version number) will be used to reflect bug fixes and other changes that are independent from changes to the API. `rotl` can be used to access other versions of the API (if they are available) but most likely the high level functions will not work. Instead, you will need to parse the output yourself using the "raw" returns from the unexported low-level functions (all prefixed with a `.`). For instance to use the `tnrs/match_names` endpoint for `v2` of the API: ``` r rotl:::.tnrs_match_names(c("pan", "pango", "gorilla", "hoolock", "homo"), otl_v="v2") ``` ### Code of Conduct Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. [![](http://ropensci.org/public_images/github_footer.png)](http://ropensci.org) rotl/MD50000644000177500001440000001157513056457107012103 0ustar deepayanusersbef13507a27f905c474b5ac48e21dc51 *DESCRIPTION 41ba68f62ff5f58f05f96a0dbd08b282 *LICENSE 1b572ebec7a85682af97cf46a8954c85 *NAMESPACE f3ff4e9fdc6e6eaf6b8b101d5f9dec82 *NEWS.md 9a7c4a54d4f95926b66ed6e06476df86 *R/api-collections.R 9bd92e4fd5729c1c914772ea6051c0d5 *R/api-studies.R cf7e3dc77115551f7a666294970d090c *R/api-taxonomy.R 08064573a536c31daf9a479eec383e25 *R/api-tnrs.R 9049ace110a1de5d23d81cc09e1022b5 *R/api-tol.R f1d6f185eb969bcb30e8fa01828389b9 *R/base.R 6286c2d7b82011d166cf7e0b68391d82 *R/deduplicate_labels.R 71769e61f96290be93a642ed5e7e1bf1 *R/external_data.R 67afa5276ae39353db1be4c9748202e4 *R/is_in_tree.R 8f6a526c44396e5afe7a980ea0116d39 *R/match_names.R dc83309cfcec969f308dc5ef128b24c8 *R/methods.R 885012a0fef7e432f1e3ab49ac05a621 *R/rotl-package.R ecec63dfc2513887fcc35f62ad8bbf10 *R/studies-methods.R e3689dd313ea6b632450c24f11ec56e8 *R/studies-utils.R c2833f9705fb7e369e1678a37d87250b *R/studies.R f1f397f5739142664c28e4068893be34 *R/tax_utils.R 3c36224fa41a50053831efb15ff2675c *R/taxonomy.R 2b2d1c28d1e4b229a59780ba94059a47 *R/tnrs.R a884ca1e7339f1d913dc058a4d4a3653 *R/tol.R cadefaffbca9725850fbc2218753179a *R/tree_to_labels.R 41bc3a21bc29333d534950c8e8ac0084 *README.md f9b8ccf74106a49197ffc1dbc4e04e75 *build/vignette.rds f0fd4ea89eb96fe8746602e268fbf502 *inst/CITATION 319582efa0a202fed973883555cf0e51 *inst/doc/data_mashups.R 64c62d69bc9cec722c7f26e6d025ef35 *inst/doc/data_mashups.Rmd 11af7fe6047e439efb39b4eaace09c9b *inst/doc/data_mashups.html 4f8c602b56730ad3c3a97caaf33d4719 *inst/doc/how-to-use-rotl.R 8001eb5bddaaf93285424d383c3195f3 *inst/doc/how-to-use-rotl.Rmd 1b8e7ff8dc6a4dd14cab710f443bc71d *inst/doc/how-to-use-rotl.html b5e8ba63b31d0baf609e763d168b8b14 *inst/doc/meta-analysis.R ef518f65da0362f9bf5b5fefa5329cd0 *inst/doc/meta-analysis.Rmd 171d4496da0b82d1f9dbf22220fd2d91 *inst/doc/meta-analysis.html 09d93b49b2b38f2495761ea86e134006 *inst/extdata/egg.csv 5e5eb20ee387ba7192c62e629a3383f2 *inst/extdata/protist_mutation_rates.csv f30954a01d7420beb303c1916ea08971 *man/get_study.Rd d261a5dcd926401050929be39c1ed44d *man/get_study_meta.Rd 9297cffa642515239d15c2efebdce3a2 *man/get_study_subtree.Rd f60b580ce8808e50d0479e4d22c6779d *man/get_study_tree.Rd 7376b3c8fbc5c5d83c06e9c2756fbb35 *man/is_in_tree.Rd 8572338a55159b4b45eea44191cbc82f *man/list_trees.Rd e0386442684f2019f009dbd9dc75b9b3 *man/match_names-methods.Rd ce1a6b4c35d7ca5209b664938b081d7b *man/match_names.Rd 9f9c58fcedf44fecf5cfc9741ef54a3d *man/rotl.Rd ea8938c2220f0b358930fbaa9b9af738 *man/source_list.Rd 6310137f447dbe11a8bda6f2a1c1ed40 *man/strip_ott_ids.Rd 5b3f7b8629b906dc79ac5d1de9e11255 *man/studies_find_studies.Rd ddf9261b772dbcf6846955b1d34b1683 *man/studies_find_trees.Rd b52b8d1f1701bf1d7d1b4549d56ccc90 *man/studies_properties.Rd 76ee971ef937ff9fe23ceca701b35ce6 *man/study_external_IDs.Rd 39e2c7bacba5befea2f3d8bc6be223a9 *man/synonyms.match_names.Rd ccb54bd7485bddbe4ee59b4e117a0b21 *man/tax_lineage.Rd 0c0a51d7f1b9365e27f9c84b8095a0da *man/taxon_external_IDs.Rd 5f58b0e589b08000432acb5656fc2be2 *man/taxonomy-methods.Rd f72255eacbb9eadb1fa2eb513557afaa *man/taxonomy_about.Rd 5fa0e3a349e2d596e24ce621c567497b *man/taxonomy_mrca.Rd 5b43cd3d8577fc8f9df28a7f1276972c *man/taxonomy_subtree.Rd 40ef74245553816685e5c7b07096ec8b *man/taxonomy_taxon_info.Rd c01395cc8ca421b4e607c110fd714a53 *man/tnrs_contexts.Rd a16c0de937a7de364fbff3a7e1581c44 *man/tnrs_infer_context.Rd a3e1aa967d58fadf26e62d458ed075e5 *man/tnrs_match_names.Rd 0399a2876a415a962c1c35f52ad418b6 *man/tol_about.Rd 34b418796c5f85fbfecbe1b663e86197 *man/tol_induced_subtree.Rd 69cc652562e542dc2572068edbe28d6c *man/tol_mrca.Rd 16bd284ab4cf10574bcd7700d074266d *man/tol_node_info.Rd a520ea68a2b1b4943a0941832761beae *man/tol_subtree.Rd 43c8ae96b072c5ffa02ee1476ab517af *tests/test-all.R 8aa44593dfd95bba9fc05fe726562a4f *tests/testthat/test-API.R 0978d0c6f16459de48230f0ca7242888 *tests/testthat/test-api-studies.R 35dc779abfea3d163251ad3ba9681b06 *tests/testthat/test-api-taxonomy.R 620a4b277bc83dc51d84c7dc4416b32d *tests/testthat/test-api-tnrs.R eec189b1b610d1fab12164c96ba55ff5 *tests/testthat/test-api-tol.R 9a25dd598f4420dc68f2280ce323c367 *tests/testthat/test-base.R 91e80d240d100527a3496b9c2040c771 *tests/testthat/test-deduplicate_labels.R 51c9a683157c0ca97ca7fa7656dbf4a2 *tests/testthat/test-external.R 47dd8941c039e76be1a91266434680b4 *tests/testthat/test-match_names.R 4ca3762a30b73adc31d149a81e15c3a4 *tests/testthat/test-studies.R d4eb07768af70ce855fbad1323a0994e *tests/testthat/test-taxonomy.R 57bcbfabee0c58d895dce12a81166027 *tests/testthat/test-tnrs.R da7d30c00521dcf6ebaa16a1ddd5dc50 *tests/testthat/test-tol.R 85a49232f1097e80d7f059bea79d885d *tests/testthat/test-tree_to_labels.R fb4b19651907e0fdb26e6f4c0581af83 *tests/tree_of_life.json 64c62d69bc9cec722c7f26e6d025ef35 *vignettes/data_mashups.Rmd 8001eb5bddaaf93285424d383c3195f3 *vignettes/how-to-use-rotl.Rmd ef518f65da0362f9bf5b5fefa5329cd0 *vignettes/meta-analysis.Rmd da8cb33974fa673158fea8ccf2d8bbac *vignettes/vignette.css rotl/build/0000755000177500001440000000000013056407503012654 5ustar deepayanusersrotl/build/vignette.rds0000644000177500001440000000047313056407503015217 0ustar deepayanusersR=O0uPh%>J{22PUH T$6d%ۑeC86C.{^<'D$#xل$df33]n=՗lS#ӭTISR&Xjr^H! 7xItl-*Ԫ\ՓͦAPV2K0L3d,W^;b;!!ING~Ywwl{V<0L`SWLU Me~avljphrtTB3%̋ۻDyʹִUa?f_brotl/DESCRIPTION0000644000177500001440000000241513056457107013272 0ustar deepayanusersPackage: rotl Title: Interface to the 'Open Tree of Life' API Version: 3.0.3 Authors@R: c( person("Francois", "Michonneau", role=c("aut", "cre"), email="francois.michonneau@gmail.com"), person("Joseph", "Brown", role="aut"), person("David", "Winter", role="aut")) Description: An interface to the 'Open Tree of Life' API to retrieve phylogenetic trees, information about studies used to assemble the synthetic tree, and utilities to match taxonomic names to 'Open Tree identifiers'. The 'Open Tree of Life' aims at assembling a comprehensive phylogenetic tree for all named species. URL: https://github.com/ropensci/rotl BugReports: https://github.com/ropensci/rotl/issues Depends: R (>= 3.1.1) Imports: httr, jsonlite, assertthat (>= 0.1), rncl (>= 0.6.0), ape, rentrez License: BSD_2_clause + file LICENSE Suggests: knitr (>= 1.12), rmarkdown (>= 0.7), testthat, RNeXML, phylobase, MCMCglmm, fulltext (>= 0.1.6), readxl VignetteBuilder: knitr LazyData: true RoxygenNote: 6.0.1 NeedsCompilation: no Packaged: 2017-03-04 00:50:11 UTC; francois Author: Francois Michonneau [aut, cre], Joseph Brown [aut], David Winter [aut] Maintainer: Francois Michonneau Repository: CRAN Date/Publication: 2017-03-04 07:27:19 rotl/man/0000755000177500001440000000000013056407503012330 5ustar deepayanusersrotl/man/rotl.Rd0000644000177500001440000000370212670356150013603 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rotl-package.R \docType{package} \name{rotl} \alias{rotl} \alias{rotl-package} \title{An Interface to the Open Tree of Life API} \description{ The Open Tree of Life is an NSF funded project that is generating an online, comprehensive phylogenetic tree for 1.8 million species. \code{rotl} provides an interface that allows you to query and retrive the parts of the tree of life that is of interest to you. } \details{ \code{rotl} provides function to most of the end points the API provides. The documentation of the API is available at: \url{https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs} } \section{Customizing API calls}{ All functions that use API end points can take 2 arguments to customize the API call and are passed as \code{...} arguments. \itemize{ \item{ \code{otl_v} } { This argument controls which version of the API your call is using. The default value for this argument is a call to the non-exported function \code{otl_version()} which returns the current version of the Open Tree of Life APIs (v2).} \item{ \code{dev_url} } { This argument controls whether to use the development version of the API. By default, \code{dev_url} is set to \code{FALSE}, using \code{dev_url = TRUE} in your function calls will use the development version.} } For example, to use the development version of the API, you could use: \code{tnrs_match_names("anas", dev_url=TRUE)} Additional arguments can also be passed to the \code{\link[httr]{GET}} and \code{\link[httr]{POST}} methods. } \section{Acknowledgments}{ This package was started during the Open Tree of Life \href{http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/}{Hackathon} organized by OpenTree, the NESCent Hackathon Interoperability Phylogenetic group, and Arbor. } rotl/man/tnrs_match_names.Rd0000644000177500001440000000606413055075704016155 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tnrs.R \name{tnrs_match_names} \alias{tnrs_match_names} \title{Match names to the Open Tree Taxonomy} \usage{ tnrs_match_names(names = NULL, context_name = NULL, do_approximate_matching = TRUE, ids = NULL, include_suppressed = FALSE, ...) } \arguments{ \item{names}{taxon names to be queried. Currently limited to 10,000 names for exact matches and 2,500 names for approximate matches (character vector)} \item{context_name}{name of the taxonomic context to be searched (length-one character vector). Must match (case sensitive) one of the values returned by \code{\link{tnrs_contexts}}.} \item{do_approximate_matching}{A logical indicating whether or not to perform approximate string (a.k.a. \dQuote{fuzzy}) matching. Using \code{FALSE} will greatly improve speed. Default, however, is \code{TRUE}.} \item{ids}{A vector of ids to use for identifying names. These will be assigned to each name in the names array. If ids is provided, then ids and names must be identical in length.} \item{include_suppressed}{Ordinarily, some quasi-taxa, such as incertae sedis buckets and other non-OTUs, are suppressed from TNRS results. If this parameter is true, these quasi-taxa are allowed as possible TNRS results.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ A data frame summarizing the results of the query. The original query output is appended as an attribute to the returned object (and can be obtained using \code{attr(object, "original_response")}). } \description{ Match taxonomic names to the Open Tree Taxonomy. } \details{ Accepts one or more taxonomic names and returns information about potential matches for these names to known taxa in the Open Tree Taxononmy. This service uses taxonomic contexts to disambiguate homonyms and misspelled names; a context may be specified using the \code{context_name} argument. If no context is specified, then the context will be inferred (i.e., the shallowest taxonomic context that contains all unambiguous names in the input). Taxonomic contexts are uncontested higher taxa that have been selected to allow limits to be applied to the scope of TNRS searches (e.g. 'match names only within flowering plants'). Once a context has been identified (either user-specified or inferred), all taxon name matches will performed only against taxa within that context. For a list of available taxonomic contexts, see \code{\link{tnrs_contexts}}. A name is considered unambiguous if it is not a synonym and has only one exact match to any taxon name in the entire taxonomy. Several functions listed in the \sQuote{See also} section can be used to inspect and manipulate the object generated by this function. } \examples{ \dontrun{ deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha", "chordata", "hemichordata")) } } \seealso{ \code{\link{inspect.match_names}}, \code{\link{update.match_names}}, \code{\link{synonyms.match_names}}. } rotl/man/taxonomy_taxon_info.Rd0000644000177500001440000000444513055075704016733 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/taxonomy.R \name{taxonomy_taxon_info} \alias{taxonomy_taxon_info} \alias{tax_rank.taxon_info} \alias{tax_name.taxon_info} \alias{unique_name.taxon_info} \alias{synonyms.taxon_info} \alias{ott_id.taxon_info} \alias{tax_sources.taxon_info} \alias{is_suppressed.taxon_info} \alias{flags.taxon_info} \title{Taxon information} \usage{ taxonomy_taxon_info(ott_ids, include_children = FALSE, include_lineage = FALSE, include_terminal_descendants = FALSE, ...) \method{tax_rank}{taxon_info}(tax, ...) \method{tax_name}{taxon_info}(tax, ...) \method{unique_name}{taxon_info}(tax, ...) \method{synonyms}{taxon_info}(tax, ...) \method{ott_id}{taxon_info}(tax, ...) \method{tax_sources}{taxon_info}(tax, ...) \method{is_suppressed}{taxon_info}(tax, ...) \method{flags}{taxon_info}(tax, ...) } \arguments{ \item{ott_ids}{the ott ids of the taxon of interest (numeric or character containing only numbers)} \item{include_children}{whether to include information about all the children of this taxon. Default \code{FALSE}.} \item{include_lineage}{whether to include information about all the higher level taxa that include the \code{ott_ids}. Default \code{FALSE}.} \item{include_terminal_descendants}{whether to include the list of terminal \code{ott_ids} contained in the \code{ott_ids} provided.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} \item{tax}{an object generated by the \code{taxonomy_taxon_info} function} } \value{ \code{taxonomy_taxon_info} returns a list detailing information about the taxa. \code{tax_rank} and \code{tax_name} return a vector. \code{synonyms} returns a list whose elements are the synonyms for each of the \code{ott_id} requested. } \description{ Information about taxa. } \details{ Given a vector of ott ids, \code{taxonomy_taxon_info} returns information about the specified taxa. The functions \code{tax_rank}, \code{tax_name}, and \code{synonyms} can extract this information from an object created by the \code{taxonomy_taxon_info()}. } \examples{ \dontrun{ req <- taxonomy_taxon_info(ott_id=515698) tax_rank(req) tax_name(req) synonyms(req) } } \seealso{ \code{\link{tnrs_match_names}} to obtain \code{ott_id} from a taxonomic name. } rotl/man/tnrs_contexts.Rd0000644000177500001440000000200513055075704015534 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tnrs.R \name{tnrs_contexts} \alias{tnrs_contexts} \title{TNRS contexts} \usage{ tnrs_contexts(...) } \arguments{ \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ Returns invisibly a list for each major clades (e.g., animals, microbes, plants, fungi, life) whose elements contains the possible contexts. } \description{ This function returns a list of pre-defined taxonomic contexts (i.e. clades) which can be used to limit the scope of tnrs queries. } \details{ Taxonomic contexts are available to limit the scope of TNRS searches. These contexts correspond to uncontested higher taxa such as 'Animals' or 'Land plants'. This service returns a list containing all available taxonomic context names, which may be used as input (via the \code{context_name} argument in other functions) to limit the search scope of other services including \code{\link{tnrs_match_names}}. } rotl/man/tol_about.Rd0000644000177500001440000000766013055075704014623 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tol.R \name{tol_about} \alias{tol_about} \alias{tax_rank.tol_summary} \alias{tax_sources.tol_summary} \alias{unique_name.tol_summary} \alias{tax_name.tol_summary} \alias{ott_id.tol_summary} \title{Information about the Tree of Life} \usage{ tol_about(include_source_list = FALSE, ...) \method{tax_rank}{tol_summary}(tax, ...) \method{tax_sources}{tol_summary}(tax, ...) \method{unique_name}{tol_summary}(tax, ...) \method{tax_name}{tol_summary}(tax, ...) \method{ott_id}{tol_summary}(tax, ...) } \arguments{ \item{include_source_list}{Logical (default = \code{FALSE}). Return an ordered list of source trees.} \item{...}{additional arguments to customize the API call (see \code{\link{rotl}} for more information).} \item{tax}{an object created with a call to \code{tol_about}.} } \value{ An invisible list of synthetic tree summary statistics: \itemize{ \item {date_created} {String. The creation date of the tree.} \item {num_source_studies} {Integer. The number of studies (publications)used as sources.} \item {num_source_trees} {The number of trees used as sources (may be >1 tree per study).} \item {taxonomy_version} {The Open Tree Taxonomy version used as a source.} \item {filtered_flags} {List. Taxa with these taxonomy flags were not used in construction of the tree.} \item {root} {List. Describes the root node:} \itemize{ \item {node_id} {String. The canonical identifier of the node.} \item {num_tips} {Numeric. The number of descendent tips.} \item {taxon} {A list of taxonomic properties:} \itemize{ \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ott_id).} \item {name} {String. The taxonomic name of the queried node.} \item {unique_name} {String. The string that uniquely identifies the taxon in OTT.} \item {rank} {String. The taxonomic rank of the taxon in OTT.} \item {tax_sources} {List. A list of identifiers for taxonomic sources, such as other taxonomies, that define taxa judged equivalent to this taxon.} } } \item {source_list} {List. Present only if \code{include_source_list} is \code{TRUE}. The sourceid ordering is the precedence order for synthesis, with relationships from earlier trees in the list having priority over those from later trees in the list. See \code{source_id_map} below for study details.} \item {source_id_map} {Named list of lists. Present only if \code{include_source_list} is \code{TRUE}. Names correspond to the \sQuote{sourceids} used in \code{source_list} above. Source trees will have the following properties:} \itemize{ \item {git_sha} {String. The git SHA identifying a particular source version.} \item {tree_id} {String. The tree id associated with the study id used.} \item {study_id} {String. The study identifier. Will typically include a prefix ("pg_" or "ot_").} } \item {synth_id} {The unique string for this version of the tree.} } } \description{ Basic information about the Open Tree of Life (the synthetic tree) } \details{ Summary information about the current draft tree of life, including information about the list of trees and the taxonomy used to build it. The object returned by \code{tol_about} can be passed to the taxonomy methods (\code{tax_name()}, \code{tax_rank()}, \code{tax_sources()}, \code{ott_id}), to extract relevant taxonomic information for the root of the synthetic tree. } \examples{ \dontrun{ res <- tol_about() tax_sources(res) ott_id(res) studies <- source_list(tol_about(include_source_list=TRUE))} } \seealso{ \code{\link{source_list}} to explore the list of studies used in the synthetic tree (see example). } rotl/man/get_study_subtree.Rd0000644000177500001440000000430313055075704016362 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies.R \name{get_study_subtree} \alias{get_study_subtree} \title{Study Subtree} \usage{ get_study_subtree(study_id, tree_id, subtree_id, object_format = c("phylo"), tip_label = c("original_label", "ott_id", "ott_taxon_name"), file_format, file, deduplicate = TRUE, ...) } \arguments{ \item{study_id}{the study identifier (character)} \item{tree_id}{the tree identifier (character)} \item{subtree_id, }{either a node id that specifies a subtree or \dQuote{ingroup} which returns the ingroup for this subtree.} \item{object_format}{the class of the object returned by the function (default, and currently only possibility \code{phylo} from the \code{\link[ape]{ape}} package)} \item{tip_label}{the format of the tip labels. \dQuote{\code{original_label}} (default) returns the original labels as provided in the study, \dQuote{\code{ott_id}} labels are replaced by their ott IDs, \dQuote{\code{ott_taxon_name}} labels are replaced by their Open Tree Taxonomy taxon name.} \item{file_format}{character, the file format to use to save the results of the query (possible values, \sQuote{newick} or \sQuote{nexus}).} \item{file}{character, the path and file name where the output should be written.} \item{deduplicate}{logical (default \code{TRUE}). If the tree returned by the study contains duplicated taxon names, should they be made unique? It is normally illegal for NEXUS/Newick tree strings to contain duplicated tip names. This is a workaround to circumvent this requirement. If \code{TRUE}, duplicated tip labels will be appended \code{_1}, \code{_2}, etc.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \description{ Retrieve subtree from a specific tree in the Open Tree of Life data store } \examples{ \dontrun{ small_tr <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="node991044") ingroup <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup") nexus_file <- tempfile(fileext=".nex") get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup", file=nexus_file, file_format="nexus") } } rotl/man/tol_induced_subtree.Rd0000644000177500001440000000322513055075704016646 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tol.R \name{tol_induced_subtree} \alias{tol_induced_subtree} \title{Subtree from the Open Tree of Life} \usage{ tol_induced_subtree(ott_ids = NULL, node_ids = NULL, label_format = NULL, file, ...) } \arguments{ \item{ott_ids}{Numeric vector. OTT ids indicating nodes to be used as tips in the induced tree.} \item{node_ids}{Character vector. Node ids indicating nodes to be used as tips in the induced tree.} \item{label_format}{Character. Defines the label type; one of \dQuote{\code{name}}, \dQuote{\code{id}}, or \dQuote{\code{name_and_id}} (the default).} \item{file}{If specified, the function will write the subtree to a file in newick format.} \item{...}{additional arguments to customize the API call (see \code{\link{rotl}} for more information).} } \value{ If no value is specified to the \code{file} argument (default), a phyogenetic tree of class \code{phylo}. Otherwise, the function returns invisibly a logical indicating whether the file was successfully created. } \description{ Return the induced subtree on the synthetic tree that relates a list of nodes. } \details{ Return a tree with tips corresponding to the nodes identified in the input set that is consistent with the topology of the current synthetic tree. This tree is equivalent to the minimal subtree induced on the draft tree by the set of identified nodes. } \examples{ \dontrun{ res <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710)) tree_file <- tempfile(fileext=".tre") tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710), file=tree_file)} } rotl/man/get_study_meta.Rd0000644000177500001440000000373613055075704015650 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies-methods.R, R/studies.R \name{get_tree_ids} \alias{get_tree_ids} \alias{get_publication} \alias{candidate_for_synth} \alias{get_study_year} \alias{get_tree_ids.study_meta} \alias{get_publication.study_meta} \alias{candidate_for_synth.study_meta} \alias{get_study_year.study_meta} \alias{get_study_meta} \title{Study Metadata} \usage{ get_tree_ids(sm) get_publication(sm) candidate_for_synth(sm) get_study_year(sm) \method{get_tree_ids}{study_meta}(sm) \method{get_publication}{study_meta}(sm) \method{candidate_for_synth}{study_meta}(sm) \method{get_study_year}{study_meta}(sm) get_study_meta(study_id, ...) } \arguments{ \item{sm}{an object created by \code{get_study_meta}} \item{study_id}{the study identifier (character)} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ named-list containing the metadata associated with the study requested } \description{ Retrieve metadata about a study in the Open Tree of Life datastore. } \details{ \code{get_study_meta} returns a long list of attributes for the studies that are contributing to the synthetic tree. To help with the extraction of relevant information from this list, several helper functions exists: \itemize{ \item {get_tree_ids} { The identifiers of the trees associated with the study } \item {get_publication} { The citation information of the publication for the study. The DOI (or URL) for the study is available as an attribute to the returned object (i.e., \code{attr(object, "DOI")} ) }. \item {candidate_for_synth} { The identifier of the tree(s) from the study used in the synthetic tree. This is a subset of the result of \code{get_tree_ids}. \item {get_study_year} { The year of publication of the study. } } } } \examples{ \dontrun{ req <- get_study_meta("pg_719") get_tree_ids(req) candidate_for_synth(req) get_publication(req) get_study_year(req) } } rotl/man/match_names-methods.Rd0000644000177500001440000000355613055075704016553 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match_names.R, R/methods.R \name{ott_id.match_names} \alias{ott_id.match_names} \alias{flags.match_names} \alias{flags} \title{\code{ott_id} and \code{flags} for taxonomic names matched by \code{tnrs_match_names}} \usage{ \method{ott_id}{match_names}(tax, row_number, taxon_name, ott_id, ...) \method{flags}{match_names}(tax, row_number, taxon_name, ott_id, ...) flags(tax, ...) } \arguments{ \item{tax}{an object returned by \code{\link{tnrs_match_names}}} \item{row_number}{the row number corresponding to the name for which to list the synonyms} \item{taxon_name}{the taxon name corresponding to the name for which to list the synonyms} \item{ott_id}{the ott id corresponding to the name for which to list the synonyms} \item{...}{currently ignored} } \value{ A list of the ott ids or flags for the taxonomic names matched with \code{\link{tnrs_match_names}}, for either one or all the names. } \description{ \code{rotl} provides a collection of functions that allows users to extract relevant information from an object generated by \code{\link{tnrs_match_names}} function. } \details{ These methods optionally accept one of the arguments \code{row_number}, \code{taxon_name} or \code{ott_id} to retrieve the corresponding information for one of the matches in the object returned by the \code{\link{tnrs_match_names}} function. If these arguments are not provided, these methods can return information for the matches currently listed in the object returned by \code{\link{tnrs_match_names}}. } \examples{ \dontrun{ rsp <- tnrs_match_names(c("Diadema", "Tyrannosaurus")) rsp$ott_id # ott id for match currently in use ott_id(rsp) # similar as above but elements are named ## flags() is useful for instance to determine if a taxon is extinct flags(rsp, taxon_name="Tyrannosaurus") } } rotl/man/taxonomy-methods.Rd0000644000177500001440000000165713055075704016152 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{tax_rank} \alias{tax_rank} \alias{ott_id} \alias{synonyms} \alias{tax_sources} \alias{is_suppressed} \alias{unique_name} \alias{tax_name} \title{Methods for Taxonomy} \usage{ tax_rank(tax, ...) ott_id(tax, ...) synonyms(tax, ...) tax_sources(tax, ...) is_suppressed(tax, ...) unique_name(tax, ...) tax_name(tax, ...) } \arguments{ \item{tax}{an object returned by \code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, or \code{\link{tnrs_match_names}}} \item{...}{additional arguments (see \code{\link{tnrs_match_names}})} } \description{ Methods for dealing with objects containing taxonomic information (Taxonomy, TNRS endpoints) } \details{ This is the page for the generic methods. See the help pages for \code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, and \code{\link{tnrs_match_names}} for more information. } rotl/man/tax_lineage.Rd0000644000177500001440000000163313055075704015105 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R, R/taxonomy.R \name{tax_lineage} \alias{tax_lineage} \alias{tax_lineage.taxon_info} \title{Lineage of a taxon} \usage{ tax_lineage(tax, ...) \method{tax_lineage}{taxon_info}(tax, ...) } \arguments{ \item{tax}{an object created by \code{\link{taxonomy_taxon_info}} using the argument \code{include_lineage=TRUE}.} \item{...}{additional arguments (currently unused).} } \value{ A list with one slot per taxon that contains a data frame with 3 columns: the taxonomy rank, the name, and unique name for all taxa included in the lineage of the taxon up to the root of the tree. } \description{ Extract the lineage information (higher taxonomy) from an object returned by \code{\link{taxonomy_taxon_info}}. } \details{ The object passed to this function must have been created using the argument \code{include_lineage=TRUE}. } rotl/man/list_trees.Rd0000644000177500001440000000244113055075704015000 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies-methods.R \name{list_trees} \alias{list_trees} \alias{list_trees.matched_studies} \title{List trees ids in objects returned by \code{\link{studies_find_studies}} and \code{\link{studies_find_trees}}.} \usage{ list_trees(matched_studies, ...) \method{list_trees}{matched_studies}(matched_studies, study_id, ...) } \arguments{ \item{matched_studies}{an object created by \code{studies_find_trees} or \code{studies_find_studies}.} \item{...}{Currently unused} \item{study_id}{a \code{study_id} listed in the object returned by \code{studies_find_trees}} } \value{ \code{list_trees} returns a list of the tree_ids for each study that match the requested criteria. If a \code{study_id} is provided, then only the trees for this study are returned as a vector. } \description{ \code{list_trees} returns all trees associated with a particular study when used on an object returned by \code{\link{studies_find_studies}}, but only the trees that match the search criteria when used on objects returned by \code{\link{studies_find_trees}}. } \seealso{ \code{\link{studies_find_studies}} and \code{\link{studies_find_trees}}. The help for these functions have examples demonstrating the use of \code{list_trees}. } rotl/man/study_external_IDs.Rd0000644000177500001440000000256413055075704016442 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/external_data.R \name{study_external_IDs} \alias{study_external_IDs} \title{Get external identifiers for data associated with an Open Tree study} \usage{ study_external_IDs(study_id) } \arguments{ \item{study_id}{An open tree study ID} } \value{ A study_external_data object (which inherits from a list) which contains some of the following. doi, character, the DOI for the paper describing this study external_data_url, character, a URL to an external data repository (e.g. a treebase entry) if one exists. pubmed_id character, the unique ID for this study in the NCBI's pubmed database popset_ids character, vector of IDs for the NCBI's popset database nucleotide_ids character, vector of IDs for the NCBI's nucleotide database } \description{ Data associated with studies contributing to the Open Tree synthesis may be available from other databases. In particular, trees and alignments may be available from treebase and DNA sequences and bibliographic information associated with a given study may be available from the NCBI. This function retrieves that information for a given study. } \examples{ \dontrun{ flies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Drosophilidae") study_external_IDs(flies[2,]$study_ids) } } \seealso{ studies_find_studies (used to discover study IDs) } rotl/man/taxonomy_subtree.Rd0000644000177500001440000000436613055075704016242 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/taxonomy.R \name{taxonomy_subtree} \alias{taxonomy_subtree} \title{Taxonomy subtree} \usage{ taxonomy_subtree(ott_id = NULL, output_format = c("taxa", "newick", "phylo", "raw"), label_format = NULL, file, ...) } \arguments{ \item{ott_id}{The ott id of the taxon of interest.} \item{output_format}{the format of the object to be returned. See the \sQuote{Return} section.} \item{label_format}{Character. Defines the label type; one of \dQuote{\code{name}}, \dQuote{\code{id}}, or \dQuote{\code{name_and_id}} (the default).} \item{file}{the file name where to save the output of the function. Ignored unless \code{output_format} is set to \dQuote{\code{phylo}}.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ If the \code{file} argument is missing: \itemize{ \item{\dQuote{\code{taxa}}} { a list of the taxa names (species) in slot \code{tip_label}, and higher-level taxanomy (e.g., families, genera) in slot \code{edge_label}, descending from the taxa corresponding to the \code{ott_id} provided. } \item{\dQuote{\code{newick}}} { a character vector containing the newick formatted string corresponding to the taxonomic subtree for the \code{ott_id} provided. } \item{\dQuote{\code{phylo}}} { an object of the class \code{phylo} from the \code{\link[ape]{ape}} package. } \item{\dQuote{\code{raw}}} { the direct output from the API, i.e., a list with an element named \sQuote{newick} that contains the subtree as a newick formatted string. } } If a \code{file} argument is provided (and \code{output_format} is set to \dQuote{\code{phylo}}), a logical indicating whether the file was successfully created. } \description{ Given an ott id, return the inclusive taxonomic subtree descended from the specified taxon. } \details{ If the output of this function is exported to a file, the only possible value for the \code{output_format} argument is \dQuote{\code{newick}}. If the file provided already exists, it will be silently overwritten. } \examples{ \dontrun{ req <- taxonomy_subtree(ott_id=515698) plot(taxonomy_subtree(ott_id=515698, output_format="phylo")) } } rotl/man/tol_node_info.Rd0000644000177500001440000001157213055075704015446 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R, R/tol.R \name{tol_lineage} \alias{tol_lineage} \alias{tol_node_info} \alias{tax_rank.tol_node} \alias{tax_sources.tol_node} \alias{unique_name.tol_node} \alias{tax_name.tol_node} \alias{ott_id.tol_node} \alias{source_list.tol_node} \alias{tax_lineage.tol_node} \alias{tol_lineage.tol_node} \title{Node info} \usage{ tol_lineage(tax, ...) tol_node_info(ott_id = NULL, node_id = NULL, include_lineage = FALSE, ...) \method{tax_rank}{tol_node}(tax, ...) \method{tax_sources}{tol_node}(tax, ...) \method{unique_name}{tol_node}(tax, ...) \method{tax_name}{tol_node}(tax, ...) \method{ott_id}{tol_node}(tax, ...) \method{source_list}{tol_node}(tax, ...) \method{tax_lineage}{tol_node}(tax, ...) \method{tol_lineage}{tol_node}(tax, ...) } \arguments{ \item{tax}{an object returned by \code{tol_node_info}.} \item{...}{additional arguments to customize the API call (see ?rotl for more information)} \item{ott_id}{Numeric. The OpenTree taxonomic identifier.} \item{node_id}{Character. The OpenTree node identifier.} \item{include_lineage}{Logical (default = FALSE). Whether to return the lineage of the node from the synthetic tree.} } \value{ \code{tol_node_info} returns an invisible list of summary information about the queried node: \itemize{ \item {node_id} {String. The canonical identifier of the node.} \item {num_tips} {Numeric. The number of descendent tips.} \item {taxon} {A list of taxonomic properties. Only returned if the queried node is a taxon. Each source has:} \itemize{ \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).} \item {name} {String. The taxonomic name of the queried node.} \item {unique_name} {String. The string that uniquely identifies the taxon in OTT.} \item {rank} {String. The taxonomic rank of the taxon in OTT.} \item {tax_sources} {List. A list of identifiers for taxonomic sources, such as other taxonomies, that define taxa judged equivalent to this taxon.} } The following properties list support/conflict for the node across synthesis source trees. All properties involve sourceid keys and nodeid values (see \code{source_id_map} below). \item {partial_path_of} {List. The edge below this synthetic tree node is compatible with the edge below each of these input tree nodes (one per tree). Each returned element is reported as sourceid:nodeid.} \item {supported_by} {List. Input tree nodes (one per tree) that support this synthetic tree node. Each returned element is reported as sourceid:nodeid.} \item {terminal} {List. Input tree nodes (one per tree) that are equivalent to this synthetic tree node (via an exact mapping, or the input tree terminal may be the only terminal descended from this synthetic tree node. Each returned element is reported as sourceid:nodeid.} \item {conflicts_with} {Named list of lists. Names correspond to sourceid keys. Each list contains input tree node ids (one or more per tree) that conflict with this synthetic node.} \item {source_id_map} {Named list of lists. Names correspond to the sourceid keys used in the 4 properties above. Source trees will have the following properties:} \itemize{ \item {git_sha} {The git SHA identifying a particular source version.} \item {tree_id} {The tree id associated with the study id used.} \item {study_id} {The study identifier. Will typically include a prefix ("pg_" or "ot_").} } The only sourceid that does not correspond to a source tree is the taxonomy, which will have the name "ott"+`taxonomy_version`, and the value is the ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever appear in \code{supported_by}. } \code{tol_lineage} and \code{tax_lineage} return data frames. \code{tol_lineage} indicate for each ancestor its node identifier, the number of tips descending from that node, and whether it corresponds to a taxonomic level. } \description{ Get summary information about a node in the synthetic tree } \details{ Returns summary information about a node in the graph. The node of interest may be specified using either a node id or an taxon id, but not both. If the specified node or OTT id is not in the graph, an error will be returned. If the argument \code{include_lineage=TRUE} is used, you can use \code{tax_lineage()} or \code{tol_lineage} to return the taxonomic information or the node information for all the ancestors to this node, down to the root of the tree. } \examples{ \dontrun{ birds <- tol_node_info(ott_id=81461, include_lineage=TRUE) source_list(birds) tax_rank(birds) ott_id(birds) tax_lineage(birds) tol_lineage(birds)} } rotl/man/tnrs_infer_context.Rd0000644000177500001440000000177113055075704016545 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tnrs.R \name{tnrs_infer_context} \alias{tnrs_infer_context} \title{Infer the taxonomic context from a list of names} \usage{ tnrs_infer_context(names = NULL, ...) } \arguments{ \item{names}{Vector of taxon names.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ A list including the context name, the context ott id and possibly the names in the query that have an ambiguous taxonomic meaning in the query. } \description{ Return a taxonomic context given a list of taxonomic names } \details{ Find the least inclusive taxonomic context that includes all the unambiguous names in the input set. Unambiguous names are names with exact matches to non-homonym taxa. Ambiguous names (those without exact matches to non-homonym taxa) are indicated in results. } \examples{ \dontrun{ res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus")) } } rotl/man/source_list.Rd0000644000177500001440000000151113055075704015153 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R, R/tol.R \name{source_list} \alias{source_list} \alias{source_list.tol_summary} \title{List of studies used in the Tree of Life} \usage{ source_list(tax, ...) \method{source_list}{tol_summary}(tax, ...) } \arguments{ \item{tax}{a list containing a \code{source_id_map} slot.} \item{...}{additional arguments (currently unused)} } \value{ a data frame } \description{ Retrieve the detailed information for the list of studies used in the Tree of Life. } \details{ This function takes the object resulting from \code{tol_about(study_list = TRUE)}, \code{tol_mrca()}, \code{tol_node_info()}, and returns a data frame listing the \code{tree_id}, \code{study_id} and \code{git_sha} for the studies currently included in the Tree of Life. } rotl/man/taxonomy_about.Rd0000644000177500001440000000212513055075704015672 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/taxonomy.R \name{taxonomy_about} \alias{taxonomy_about} \title{Information about the Open Tree Taxonomy} \usage{ taxonomy_about(...) } \arguments{ \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ A list with the following properties: \itemize{ \item {weburl} {String. The release page for this version of the taxonomy.} \item {author} {String. The author string.} \item {name} {String. The name of the taxonomy.} \item {source} {String. The full identifying information for this version of the taxonomy.} \item {version} {String. The version number of the taxonomy.} } } \description{ Summary information about the Open Tree Taxaonomy (OTT) } \details{ Return metadata and information about the taxonomy itself. Currently, the available metadata is fairly sparse, but includes (at least) the version, and the location from which the complete taxonomy source files can be downloaded. } \examples{ \dontrun{ taxonomy_about() } } rotl/man/studies_find_trees.Rd0000644000177500001440000000561513055075704016513 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies.R \name{studies_find_trees} \alias{studies_find_trees} \title{Find Trees} \usage{ studies_find_trees(property = NULL, value = NULL, verbose = FALSE, exact = FALSE, detailed = TRUE, ...) } \arguments{ \item{property}{The property to be searched on (character)} \item{value}{The property-value to be searched on (character)} \item{verbose}{Should the output include all metadata? (logical, default \code{FALSE})} \item{exact}{Should exact matching be used for the value? (logical, default \code{FALSE})} \item{detailed}{Should a detailed report be provided? If \code{TRUE} (default), the output will include metadata about the study that include trees matching the property. Otherwise, only information about the trees will be provided.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ A data frame that summarizes the trees found (and their associated studies) for the requested criteria. If a study has more than 5 trees, the \code{tree_ids} of the first ones will be shown, followed by \code{...} to indicate that more are present. If \code{detailed=FALSE}, the data frame will include the study ids of the study (\code{study_ids}), the number of trees in this study that match the search criteria (\code{n_matched_trees}), the tree ids that match the search criteria (\code{match_tree_ids}). If \code{detailed=TRUE}, in addition of the fields listed above, the data frame will also contain the total number of trees associated with the study (\code{n_trees}), all the tree ids associated with the study (\code{tree_ids}), the tree id that is a potential candidate for inclusion in the synthetic tree (if any) (\code{candidate}), the year the study was published (\code{study_year}), the title of the study (\code{title}), the DOI for the study (\code{study_doi}). } \description{ Return a list of studies for which trees match a given set of properties } \details{ The list of possible values to be used as values for the argument \code{property} can be found using the function \code{\link{studies_properties}}. } \examples{ \dontrun{ res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia", detailed=FALSE) ## summary of the trees and associated studies that match this criterion res ## With metadata about the studies (default) res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia", detailed=TRUE) ## The list of trees for each study that match the search criteria list_trees(res) ## the trees for a given study list_trees(res, study_id = "pg_2769") } } \seealso{ \code{\link{studies_properties}} which lists properties the studies can be searched on. \code{\link{list_trees}} for listing the trees that match the query. } rotl/man/match_names.Rd0000644000177500001440000000500213055075704015076 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match_names.R \name{inspect.match_names} \alias{inspect.match_names} \alias{inspect} \alias{update.match_names} \title{Inspect and Update alternative matches for a name returned by tnrs_match_names} \usage{ \method{inspect}{match_names}(response, row_number, taxon_name, ott_id, ...) inspect(response, ...) \method{update}{match_names}(object, row_number, taxon_name, ott_id, new_row_number, new_ott_id, ...) } \arguments{ \item{response}{an object generated by the \code{\link{tnrs_match_names}} function} \item{row_number}{the row number corresponding to the name to inspect} \item{taxon_name}{the taxon name corresponding to the name to inspect} \item{ott_id}{the ott id corresponding to the name to inspect} \item{...}{currently ignored} \item{object}{an object created by \code{\link{tnrs_match_names}}} \item{new_row_number}{the row number in the output of \code{\link{inspect}} to replace the taxa specified by \code{row_number}, \code{taxon_name}, or \code{ott_id}.} \item{new_ott_id}{the ott id of the taxon to replace the taxa specified by \code{row_number}, \code{taxon_name}, or \code{ott_id}.} } \value{ a data frame } \description{ Taxonomic names may have different meanings in different taxonomic contexts, as the same genus name can be applied to animals and plants for instance. Additionally, the meaning of a taxonomic name may have change throughout its history, and may have referred to a different taxon in the past. In such cases, a given names might have multiple matches in the Open Tree Taxonomy. These functions allow users to inspect (and update) alternative meaning of a given name and its current taxonomic status according to the Open Tree Taxonomy. } \details{ To inspect alternative taxonomic meanings of a given name, you need to provide the object resulting from a call to the tnrs_match_names function, as well as one of either the row number corresponding to the name in this object, the name itself (as used in the original query), or the ott_id listed for this name. To update one of the name, you also need to provide the row number in which the name to be replaced appear or its ott id. } \examples{ \dontrun{ matched_names <- tnrs_match_names(c("holothuria", "diadema", "boletus")) inspect(matched_names, taxon_name="diadema") new_matched_names <- update(matched_names, taxon_name="diadema", new_ott_id = 631176) new_matched_names } } \seealso{ \code{\link{tnrs_match_names}} } rotl/man/studies_properties.Rd0000644000177500001440000000220313055075704016553 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies.R \name{studies_properties} \alias{studies_properties} \title{Properties of the Studies} \usage{ studies_properties(...) } \arguments{ \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ A list of the study properties that can be used to find studies and trees that are contributing to the synthetic tree. } \description{ Return the list of study properties that can be used to search studies and trees used in the synthetic tree. } \details{ The list returned has 2 elements \code{tree_properties} and \code{studies_properties}. Each of these elements lists additional arguments to customize the API request properties that can be used to search for trees and studies that are contributing to the synthetic tree. The definitions of these properties are available from \url{https://github.com/OpenTreeOfLife/phylesystem-api/wiki/NexSON} } \examples{ \dontrun{ all_the_properties <- studies_properties() unlist(all_the_properties$tree_properties) } } \seealso{ \code{\link{studies_find_trees}} } rotl/man/get_study.Rd0000644000177500001440000000374413055075704014641 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies.R \name{get_study} \alias{get_study} \title{Get all the trees associated with a particular study} \usage{ get_study(study_id = NULL, object_format = c("phylo", "nexml"), file_format, file, ...) } \arguments{ \item{study_id}{the study ID for the study of interest (character)} \item{object_format}{the class of the object the query should return (either \code{phylo} or \code{nexml}). Ignored if \code{file_format} is specified.} \item{file_format}{the format of the file to be generated (\code{newick}, \code{nexus}, \code{nexml} or \code{json}).} \item{file}{the file name where the output of the function will be saved.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ if \code{file_format} is missing, an object of class \code{phylo} or \code{nexml}, otherwise a logical indicating whether the file was successfully created. } \description{ Returns the trees associated with a given study } \details{ If \code{file_format} is missing, the function returns an object of the class \code{phylo} from the \code{\link[ape]{ape}} package (default), or an object of the class \code{nexml} from the \code{RNeXML} package. Otherwise \code{file_format} can be either \code{newick}, \code{nexus}, \code{nexml} or \code{json}, and the function will generate a file of the selected format. In this case, a file name needs to be provided using the argument \code{file}. If a file with the same name already exists, it will be silently overwritten. } \examples{ \dontrun{ that_one_study <- get_study(study_id="pg_719", object_format="phylo") if (require(RNeXML)) { ## if RNeXML is installed get the object directly nexml_study <- get_study(study_id="pg_719", object_format="nexml") } else { ## otherwise write it to a file get_study(study_id="pg_719", file_format="nexml", file=tempfile(fileext=".nexml")) } } } \seealso{ \code{\link{get_study_meta}} } rotl/man/tol_mrca.Rd0000644000177500001440000001343413055075704014427 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tol.R \name{tol_mrca} \alias{tol_mrca} \alias{tax_sources.tol_mrca} \alias{unique_name.tol_mrca} \alias{tax_name.tol_mrca} \alias{tax_rank.tol_mrca} \alias{ott_id.tol_mrca} \alias{source_list.tol_mrca} \title{MRCA of taxa from the synthetic tree} \usage{ tol_mrca(ott_ids = NULL, node_ids = NULL, ...) \method{tax_sources}{tol_mrca}(tax, ...) \method{unique_name}{tol_mrca}(tax, ...) \method{tax_name}{tol_mrca}(tax, ...) \method{tax_rank}{tol_mrca}(tax, ...) \method{ott_id}{tol_mrca}(tax, ...) \method{source_list}{tol_mrca}(tax, ...) } \arguments{ \item{ott_ids}{Numeric vector. The ott ids for which the MRCA is desired.} \item{node_ids}{Character vector. The node ids for which the MRCA is desired.} \item{...}{additional arguments to customize the API call (see \code{\link{rotl}} for more information).} \item{tax}{an object returned by \code{tol_mrca()}.} } \value{ An invisible list of the MRCA node properties: \itemize{ \item {mrca} {List of node properties.} \itemize{ \item {node_id} {String. The canonical identifier of the node.} \item {num_tips} {Numeric. The number of descendent tips.} \item {taxon} {A list of taxonomic properties. Only returned if the queried node is a taxon. (If the node is not a taxon, a \code{nearest_taxon} list is returned (see below)).} \itemize{ \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).} \item {name} {String. The taxonomic name of the queried node.} \item {unique_name} {String. The string that uniquely identifies the taxon in OTT.} \item {rank} {String. The taxonomic rank of the taxon in OTT.} \item {tax_sources} {List. A list of identifiers for taxonomic sources, such as other taxonomies, that define taxa judged equivalent to this taxon.} } The following properties list support/conflict for the node across synthesis source trees. All properties involve sourceid keys and nodeid values (see \code{source_id_map} below) Not all properties are are present for every node. \item {partial_path_of} {List. The edge below this synthetic tree node is compatible with the edge below each of these input tree nodes (one per tree). Each returned element is reported as sourceid:nodeid.} \item {supported_by} {List. Input tree nodes (one per tree) that support this synthetic tree node. Each returned element is reported as sourceid:nodeid.} \item {terminal} {List. Input tree nodes (one per tree) that are equivalent to this synthetic tree node (via an exact mapping, or the input tree terminal may be the only terminal descended from this synthetic tree node. Each returned element is reported as sourceid:nodeid.} \item {conflicts_with} {Named list of lists. Names correspond to sourceid keys. Each list contains input tree node ids (one or more per tree) that conflict with this synthetic node.} } \item {nearest_taxon} {A list of taxonomic properties of the nearest rootward taxon node to the MRCA node. Only returned if the MRCA node is a not taxon (otherwise the \code{taxon} list above is returned).} \itemize{ \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).} \item {name} {String. The taxonomic name of the queried node.} \item {unique_name} {String. The string that uniquely identifies the taxon in OTT.} \item {rank} {String. The taxonomic rank of the taxon in OTT.} \item {tax_sources} {List. A list of identifiers for taxonomic sources, such as other taxonomies, that define taxa judged equivalent to this taxon.} } \item {source_id_map} {Named list of lists. Names correspond to the sourceid keys used in the support/conflict properties of the \code{mrca} list above. Source trees will have the following properties:} \itemize{ \item {git_sha} {The git SHA identifying a particular source version.} \item {tree_id} {The tree id associated with the study id used.} \item {study_id} {The study identifier. Will typically include a prefix ("pg_" or "ot_").} } The only sourceid that does not correspond to a source tree is the taxonomy, which will have the name "ott"+`taxonomy_version`, and the value is the ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever appear in \code{supported_by}. } } \description{ Most Recent Common Ancestor for a set of nodes } \details{ Get the MRCA of a set of nodes on the current synthetic tree. Accepts any combination of node ids and ott ids as input. Returns information about the most recent common ancestor (MRCA) node as well as the most recent taxonomic ancestor (MRTA) node (the closest taxonomic node to the MRCA node in the synthetic tree; the MRCA and MRTA may be the same node). If they are the same, the taxonomic information will be in the \code{mrca} slot, otherwise they will be in the \code{nearest_taxon} slot of the list. If any of the specified nodes is not in the synthetic tree an error will be returned. Taxonomic methods (\code{tax_sources()}, \code{ott_id()}, \code{unique_name()}, ...) are availble on the objects returned by \code{tol_mrca()}. If the MRCA node is MRTA, the name of the object returned by these methods will start with \sQuote{ott}, otherwise it will start with \sQuote{mrca}. } \examples{ \dontrun{ birds_mrca <- tol_mrca(ott_ids=c(412129, 536234)) ott_id(birds_mrca) tax_sources(birds_mrca)} } rotl/man/synonyms.match_names.Rd0000644000177500001440000000346713055075704017011 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match_names.R \name{synonyms.match_names} \alias{synonyms.match_names} \title{List the synonyms for a given name} \usage{ \method{synonyms}{match_names}(tax, row_number, taxon_name, ott_id, ...) } \arguments{ \item{tax}{a data frame generated by the \code{\link{tnrs_match_names}} function} \item{row_number}{the row number corresponding to the name for which to list the synonyms} \item{taxon_name}{the taxon name corresponding to the name for which to list the synonyms} \item{ott_id}{the ott id corresponding to the name for which to list the synonyms} \item{...}{currently ignored} } \value{ a list whose elements are all synomym names (as vectors of character) for the taxonomic names that match the query (the names of the elements of the list). } \description{ When querying the Taxonomic Name Resolution Services for a particular taxonomic name, the API returns as possible matches all names that include the queried name as a possible synonym. This function allows you to explore other synonyms for an accepted name, and allows you to determine why the name you queried is returning an accepted synonym. } \details{ To list synonyms for a given taxonomic name, you need to provide the object resulting from a call to the \code{\link{tnrs_match_names}} function, as well as one of either the row number corresponding to the name in this object, the name itself (as used in the original query), or the ott_id listed for this name. Otherwise, the synonyms for all the currently matched names are returned. } \examples{ \dontrun{ echino <- tnrs_match_names(c("Diadema", "Acanthaster", "Fromia")) ## These 3 calls are identical synonyms(echino, taxon_name="Acanthaster") synonyms(echino, row_number=2) synonyms(echino, ott_id=337928) } } rotl/man/tol_subtree.Rd0000644000177500001440000000301713055075704015152 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tol.R \name{tol_subtree} \alias{tol_subtree} \title{Extract a subtree from the synthetic tree} \usage{ tol_subtree(ott_id = NULL, node_id = NULL, label_format = NULL, file, ...) } \arguments{ \item{ott_id}{Numeric. The ott id of the node in the tree that should serve as the root of the tree returned.} \item{node_id}{Character. The node id of the node in the tree that should serve as the root of the tree returned.} \item{label_format}{Character. Defines the label type; one of \dQuote{\code{name}}, \dQuote{\code{id}}, or \dQuote{\code{name_and_id}} (the default).} \item{file}{If specified, the function will write the subtree to a file in newick format.} \item{...}{additional arguments to customize the API call (see \code{\link{rotl}} for more information).} } \value{ If no value is specified to the \code{file} argument (default), a phyogenetic tree of class \code{phylo}. Otherwise, the function returns invisibly a logical indicating whether the file was successfully created. } \description{ Extract a subtree from the synthetic tree from an Open Tree node id. } \details{ Given a node, return the subtree of the synthetic tree descended from that node. The start node may be specified using either a node id or an ott id, but not both. If the specified node is not in the synthetic tree an error will be returned. There is a size limit of 25000 tips for this method. } \examples{ \dontrun{ res <- tol_subtree(ott_id=241841)} } rotl/man/taxon_external_IDs.Rd0000644000177500001440000000167613055075704016426 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/external_data.R \name{taxon_external_IDs} \alias{taxon_external_IDs} \title{Get external identifiers for data associated with an Open Tree taxon} \usage{ taxon_external_IDs(taxon_id) } \arguments{ \item{taxon_id}{An open tree study ID} } \value{ a data.frame in which each row represents a unique record in an external databse. The column "source" provides and abbreviated name for the database, and "id" the unique ID for the record. } \description{ The Open Tree taxonomy is a synthesis of multiple reference taxonomies. This function retrieves identifiers to external taxonomic records that have contributed the rank, position and definition of a given Open Tree taxon. } \examples{ \dontrun{ gibbon_IDs <- taxon_external_IDs(712902) } } \seealso{ tnrs_matchnames, which can be used to search for taxa by name. taxonomy_taxon, for more information about a given taxon. } rotl/man/studies_find_studies.Rd0000644000177500001440000000476213055075704017053 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies.R \name{studies_find_studies} \alias{studies_find_studies} \title{Find a Study} \usage{ studies_find_studies(property = NULL, value = NULL, verbose = FALSE, exact = FALSE, detailed = TRUE, ...) } \arguments{ \item{property}{The property to be searched on (character)} \item{value}{The property value to be searched on (character)} \item{verbose}{Should the output include all metadata (logical default \code{FALSE})} \item{exact}{Should exact matching be used? (logical, default \code{FALSE})} \item{detailed}{If \code{TRUE} (default), the function will return a data frame that summarizes information about the study (see \sQuote{Value}). Otherwise, it only returns the study identifiers.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ If \code{detailed=TRUE}, the function returns a data frame listing the study id (\code{study_ids}), the number of trees associated with this study (\code{n_trees}), the tree ids (at most 5) associated with the studies (\code{tree_ids}), the tree id that is a candidate for the synthetic tree if any (\code{candidate}), the year of publication of the study (\code{study_year}), the title of the publication for the study (\code{title}), and the DOI (Digital Object Identifier) for the study (\code{study_doi}). If \code{detailed=FALSE}, the function returns a data frame with a single column containing the study identifiers. } \description{ Return the identifiers of studies that match given properties } \examples{ \dontrun{ ## To match a study for which the identifier is already known one_study <- studies_find_studies(property="ot:studyId", value="pg_719") list_trees(one_study) ## To find studies pertaining to Mammals mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="mammalia") ## To extract the tree identifiers for each of the studies list_trees(mammals) ## ... or for a given study list_trees(mammals, "ot_308") ## Just the identifiers without other information about the studies mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="mammalia", detailed=FALSE) } } \seealso{ \code{\link{studies_properties}} which lists properties against which the studies can be searched. \code{\link{list_trees}} that returns a list for all tree ids associated with a study. } rotl/man/strip_ott_ids.Rd0000644000177500001440000000165313055075704015515 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tol.R \name{strip_ott_ids} \alias{strip_ott_ids} \title{Strip OTT ids from tip labels} \usage{ strip_ott_ids(tip_labels, remove_underscores = FALSE) } \arguments{ \item{tip_labels}{a character vector containing tip labels (most likely the \code{tip.label} element from a tree returned by \code{\link{tol_induced_subtree}}} \item{remove_underscores}{logical (defaults to FALSE). If set to TRUE underscores in tip labels are converted to spaces} } \value{ A character vector containing the contents of \code{tip_labels} with any OTT ids removed. } \description{ Strip OTT ids from tip labels } \examples{ \dontrun{ genera <- c("Perdix", "Dendroica", "Cinclus", "Selasphorus", "Struthio") tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 102710)) tr$tip.label \%in\% genera tr$tip.label <- strip_ott_ids(tr$tip.label) tr$tip.label \%in\% genera} } rotl/man/taxonomy_mrca.Rd0000644000177500001440000000331613055075704015505 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/taxonomy.R \name{taxonomy_mrca} \alias{taxonomy_mrca} \alias{tax_rank.taxon_mrca} \alias{tax_name.taxon_mrca} \alias{ott_id.taxon_mrca} \alias{unique_name.taxon_mrca} \alias{tax_sources.taxon_mrca} \alias{flags.taxon_mrca} \alias{is_suppressed.taxon_mrca} \title{Taxonomic MRCA} \usage{ taxonomy_mrca(ott_ids = NULL, ...) \method{tax_rank}{taxon_mrca}(tax, ...) \method{tax_name}{taxon_mrca}(tax, ...) \method{ott_id}{taxon_mrca}(tax, ...) \method{unique_name}{taxon_mrca}(tax, ...) \method{tax_sources}{taxon_mrca}(tax, ...) \method{flags}{taxon_mrca}(tax, ...) \method{is_suppressed}{taxon_mrca}(tax, ...) } \arguments{ \item{ott_ids}{a vector of ott ids for the taxa whose MRCA is to be found (numeric).} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} \item{tax}{an object generated by the \code{taxonomy_mrca} function} } \value{ \itemize{ \item{\code{taxonomy_mrca}} { returns a list about the taxonomic information relating to the MRCA for the ott_ids provided. } \item{\code{tax_rank}} { returns a character vector of the taxonomic rank for the MRCA. } \item{\code{tax_name}} { returns a character vector the Open Tree Taxonomy name for the MRCA. } \item{\code{ott_id}} { returns a numeric vector of the ott id for the MRCA. } } } \description{ Taxonomic Least Inclusive Common Ancestor (MRCA) } \details{ Given a set of OTT ids, get the taxon that is the most recent common ancestor (the MRCA) of all the identified taxa. } \examples{ \dontrun{ req <- taxonomy_mrca(ott_ids=c(515698,590452,643717)) tax_rank(req) tax_name(req) ott_id(req) } } rotl/man/is_in_tree.Rd0000644000177500001440000000263113056074002014733 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is_in_tree.R \name{is_in_tree} \alias{is_in_tree} \title{Check that OTT ids occur in the Synthetic Tree} \usage{ is_in_tree(ott_ids, ...) } \arguments{ \item{ott_ids}{a vector of Open Tree Taxonomy identifiers} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ A named logical vector. \code{TRUE} indicates that the OTT id is in the synthetic tree, and \code{FALSE} that it is not. } \description{ Some valid taxonomic names do not occur in the Synthetic Tree. This convenience function allows you to check whether a given Open Tree Taxonomy identifier (OTT id) is in the tree. A taxonomic name may not occur in the synthetic tree because (1) it is an extinct or invalid taxon, or (2) it is part of a group that is not monophyletic in the tree. } \examples{ \dontrun{ plant_families <- c("Asteraceae", "Solanaceae", "Poaceae", "Amaranthaceae", "Zamiaceae", "Araceae", "Juncaceae") matched_names <- tnrs_match_names(plant_families) ## This fails because some ott ids are not in the tree ## plant_tree <- tol_induced_subtree(ott_id(matched_names)) ## So let's check which ones are actually in the tree first: in_tree <- is_in_tree(ott_id(matched_names)) ## This now works: plant_tree <- tol_induced_subtree(ott_id(matched_names)[in_tree]) } } rotl/man/get_study_tree.Rd0000644000177500001440000000434313055075704015654 0ustar deepayanusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/studies.R \name{get_study_tree} \alias{get_study_tree} \title{Study Tree} \usage{ get_study_tree(study_id = NULL, tree_id = NULL, object_format = c("phylo"), tip_label = c("original_label", "ott_id", "ott_taxon_name"), file_format, file, deduplicate = TRUE, ...) } \arguments{ \item{study_id}{the identifier of a study (character)} \item{tree_id}{the identifier of a tree within the study} \item{object_format}{the class of the object to be returned (default and currently only possible value \code{phylo} from the \code{\link[ape]{ape}} package).} \item{tip_label}{the format of the tip labels. \dQuote{\code{original_label}} (default) returns the original labels as provided in the study, \dQuote{\code{ott_id}} labels are replaced by their ott IDs, \dQuote{\code{ott_taxon_name}} labels are replaced by their Open Tree Taxonomy taxon name.} \item{file_format}{the format of the file to be generated (\code{newick} default, \code{nexus}, or \code{json}).} \item{file}{the file name where the output of the function will be saved.} \item{deduplicate}{logical (default \code{TRUE}). If the tree returned by the study contains duplicated taxon names, should they be made unique? It is normally illegal for NEXUS/Newick tree strings to contain duplicated tip names. This is a workaround to circumvent this requirement. If \code{TRUE}, duplicated tip labels will be appended \code{_1}, \code{_2}, etc.} \item{...}{additional arguments to customize the API request (see \code{\link{rotl}} package documentation).} } \value{ if \code{file_format} is missing, an object of class \code{phylo}, otherwise a logical indicating whether the file was successfully created. } \description{ Returns a specific tree from within a study } \examples{ \dontrun{ tree <- get_study_tree(study_id="pg_1144", tree_id="tree2324") ## comparison of the first few tip labels depending on the options used head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="original_label")$tip.label) head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_id")$tip.label) head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_taxon_name")$tip.label) } } rotl/LICENSE0000644000177500001440000000011713056156657012574 0ustar deepayanusersYEAR: 2017 COPYRIGHT HOLDER: Francois Michonneau, Joseph W. Brown, David Winter