rotl/ 0000755 0001775 0000144 00000000000 13056457107 011562 5 ustar deepayan users rotl/inst/ 0000755 0001775 0000144 00000000000 13056407503 012532 5 ustar deepayan users rotl/inst/CITATION 0000644 0001775 0000144 00000004761 13055321701 013671 0 ustar deepayan users c(
bibentry(
header = "To cite rotl in publications use:",
bibtype = "Article",
title = "{rotl}: an R package to interact with the Open Tree of Life data",
author = personList(as.person("Francois Michonneau"),
as.person("Joseph W. Brown"),
as.person("David J. Winter")),
journal = "Methods in Ecology and Evolution",
year = "2016",
volume = "7",
number = "12",
pages = "1476-1481",
doi = "10.1111/2041-210X.12593",
textVersion =
paste("Michonneau, F., Brown, J. W. and Winter, D. J. (2016),
rotl: an R package to interact with the Open Tree of Life data.
Methods Ecol Evol. 7(12):1476-1481. doi:10.1111/2041-210X.12593")
),
bibentry(
header = "You may also want to cite the paper for the Open Tree of Life",
bibtype = "Article",
title = "Synthesis of phylogeny and taxonomy into a comprehensive tree of life", ,
author = personList(
as.person("Cody E. Hinchliff"),
as.person("Stephen A. Smith"),
as.person("James F. Allman"),
as.person("J. Gordon Burleigh"),
as.person("Ruchi Chaudhary"),
as.person("Lyndon M. Coghill"),
as.person("Keith A. Crandall"),
as.person("Jiabin Deng"),
as.person("Bryan T. Drew"),
as.person("Romina Gazis"),
as.person("Karl Gude"),
as.person("David S. Hibbett"),
as.person("Laura A. Katz"),
as.person("H. Dail Laughinghouse IV"),
as.person("Emily Jane McTavish"),
as.person("Peter E. Midford"),
as.person("Christopher L. Owen"),
as.person("Richard H. Ree"),
as.person("Jonathan A. Rees"),
as.person("Douglas E. Soltis"),
as.person("Tiffani Williams"),
as.person("Karen A. Cranston")),
journal = "Proceedings of the National Academy of Sciences",
year = "2015",
volume = "112",
number = "41",
pages = "12764-12769",
doi = "10.1073/pnas.1423041112",
textVersion = c("Hinchliff, C. E., et al. (2015). Synthesis of phylogeny and
taxonomy into a comprehensive tree of life. Proceedings of
the National Academy of Sciences 112.41 (2015): 12764-12769")
)
)
rotl/inst/extdata/ 0000755 0001775 0000144 00000000000 12707532206 014165 5 ustar deepayan users rotl/inst/extdata/egg.csv 0000644 0001775 0000144 00000021115 12706456642 015454 0 ustar deepayan users animal,Spp,Lndim,Measure,Neggs,Nclutches,ESr,Type,StudyID,Year,D,EN,Zr,VZr
Zonotrichia_leucophrys,White-crowned sparrow,0,volume,294,73,0.140045943,stat,Mead1987,1987,3.421917808,85.91673339,0.140972438,0.012060292
Passer_domesticus,House sparrow,0.009407469,volume,149,31,0.11175203,stat,Cordero2000,2000,4.04516129,36.83413078,0.112220753,0.029555954
Serinus_canaria,Canary,0,volume,52,21,0.4967914,stat,Leitner2006,2006,2.180952381,23.84279476,0.545037117,0.047978211
Turdus_merula,European blackbird,0.021189299,volume,82,54,0.3859854,stat,Martyka2010,2010,1.414814815,57.95811518,0.40707397,0.018195675
Agelaius_phoeniceus,Red-winged blackbird,0.218316086,volume,394,106,0.07410136,raw,Weatherhead1985,1985,3.173584906,124.1498216,0.074237439,0.008254242
Quiscalus_mexicanus,Great-tailed grackle,0.281894985,mass,822,205,0.051788336,raw,Teather1989,1989,3.407804878,241.2109934,0.05183471,0.004197959
Taeniopygia_guttata,Zebra finch,-0.010812869,mass,116,24,-0.05636213,stat,Rutkowska2005,2005,4.066666667,28.52459016,-0.056421926,0.039177906
Taeniopygia_guttata,Zebra finch,-0.010812869,mass,90,20,0,stat,Rutkowska2002,2002,3.8,23.68421053,0,0.048346056
Vanellus_vanellus,Northern lapwing,-0.029825984,volume,114,32,0.03014961,stat,Lislevand2005,2005,3.05,37.37704918,0.03015875,0.029089175
Philomachus_pugnax,Ruff,0.22184875,volume,120,30,0.03462025,stat,Thuman2003,2003,3.4,35.29411765,0.034634091,0.030965392
Luscinia_svecica,Bluethroat,0,volume,102,18,-0.1468127,stat,Lifjeld2005,2005,4.733333333,21.54929577,-0.147881353,0.053910402
Sturnus_unicolor,Spotless starling,0.025305865,mass,153,34,-0.133824538,stat,Cordero2001,2001,3.8,40.26315789,-0.134632122,0.026836158
Branta_canadensis,Canada goose,0.061028185,mass,242,44,-0.006674089,stat,Leblanc1987,1987,4.6,52.60869565,-0.006674188,0.020157756
Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,132,33,-0.1910071,stat,Martinez-Padilla2007,2007,3.4,38.82352941,-0.193382195,0.027914614
Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,108,28,0.2165249,stat,Blanco2003,2003,3.285714286,32.86956522,0.220007175,0.033478894
Larus_michahellis,Yellow-legged gull,0.091409863,mass,1068,669,-0.06718163,stat,Rubolini2009,2009,1.477130045,723.0236794,-0.067282976,0.001388843
Cincloramphus_cruralis,Brown songlark,0.363356588,mass,44,17,0.1439293,stat,Isaksson2010,2010,2.270588235,19.37823834,0.144935702,0.061056628
Pavo_cristatus,Peafowl,0.119205592,mass,3313,205,0.005428755,stat,Petrie2001,2001,13.12878049,252.3463625,0.005428808,0.004010486
Parus_caeruleus,Blue tit,0.010299957,mass,192,21,0.07261821,stat,Cichon2003,2003,7.514285714,25.5513308,0.072746264,0.044343281
Larus_argentatus,Herring gull,0.049687784,mass,79,30,0.1160999,stat,Bogdanova2006,2006,2.306666667,34.24855491,0.116625804,0.03200148
Larus_fuscus,Lesser black-backed gull,0.064503231,mass,60,22,0.124354,stat,Bogdanova2005,2005,2.381818182,25.19083969,0.125001014,0.045063639
Corvus_monedula,Jackdaw,0.041392685,mass,226,70,0.2033713,stat,Arnold2003,2003,2.782857143,81.21149897,0.206246808,0.012785844
Carpodacus_mexicanus,House finch,0.002809678,volume,378,84,-0.075561895,raw,Badyaev2006,2006,3.8,99.47368421,-0.075706199,0.010365521
Carpodacus_mexicanus,House finch,0,volume,304,64,-0.072529696,raw,Badyaev2006,2006,4,76,-0.072657281,0.01369863
Sterna_hirundo,Common Tern,0.008600172,volume,427,158,0,raw,Fletcher2004,2004,2.362025316,180.7770632,0,0.005625023
Lonchura_striata,Bengalese finch,-0.021189299,mass,116,34,0.09239892,stat,Soma2007,2007,2.929411765,39.59839357,0.092663229,0.027323604
Molothrus_bonariensis,Ring-billed gull,0.078874433,mass,90,30,-0.000377536,stat,Chin2012,2012,2.6,34.61538462,-0.000377536,0.03163017
Pica_pica,Magpie,0.063358906,volume,43,8,-0.036738337,raw,Slagsvold1992,1992,4.5,9.555555556,-0.036754879,0.152542373
Corvus_corone,Hooded crow,0.049218023,volume,103,31,0.073794034,raw,Slagsvold1992,1992,2.858064516,36.03837472,0.073928423,0.030267833
Taeniopygia_guttata,Zebra finch,-0.010812869,mass,133,22,-0.03994883,stat,Pariser2012,2012,5.036363636,26.40794224,-0.039970102,0.042720543
Delichon_urbicum,House martin,0,mass,90,35,0.03076685,stat,Gil2006,2006,2.257142857,39.87341772,0.030776563,0.027119808
Xanthocephalus_xanthocephalus,Yellow-headed blackbird,0.247321812,mass,90,23,-0.088294337,raw,Richter1983,1983,3.330434783,27.02349869,-0.088524861,0.04162591
Larus_delawarensis,Ring-billed gull,0.06069784,mass,110,37,0.053696485,raw,Meathrel1987,1987,2.578378378,42.66247379,0.053748182,0.025212749
Cincloramphus_cruralis,Brown songlark,0.363356588,volume,95,40,-0.3293106,stat,Magrath2003,2003,2.1,45.23809524,-0.342054801,0.02367531
Larus_ridibundus,Black-headed gull,0,mass,60,20,0.1391671,stat,Groothuis2006,2006,2.6,23.07692308,0.140076126,0.049808429
Anseranas_semipalmata,Magpie goose,0.125672077,mass,60,16,-0.110406595,raw,Whitehead1990,1990,3.2,18.75,-0.11085851,0.063492063
Sterna_hirundo,Common Tern,0,volume,108,40,0.073872636,raw,Gonzalez-Solis2005,2005,2.36,45.76271186,0.074007456,0.023384859
Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,80,16,0.097815848,raw,Wu2010,2010,4.2,19.04761905,0.098129617,0.06231454
Larus_delawarensis,Shiny cowbird,0.035472318,volume,90,90,-0.07294093,stat,Tuero2012,2012,1,90,-0.073070702,0.011494253
Pygoscelis_antarcticus,Chinstrap penguin,0.058509856,volume,264,132,0.03094903,stat,Fargallo2006,2006,1.8,146.6666667,0.030958917,0.006960557
Phoebastria_irrorata,Waved albatross,0.091157684,volume,224,224,-0.088469783,stat,Awkerman2007,2007,1,224,-0.088701688,0.004524887
Passer_domesticus,House sparrow,0.009407469,volume,77,19,0.05470045,stat,Wetzel2012,2012,3.442105263,22.37003058,0.054755105,0.051626145
Sula_nebouxii,Blue-footed booby,-0.147287056,mass,76,56,-0.2611557,stat,D'Alba2007,2007,1.285714286,59.11111111,-0.267348297,0.017821782
Stercorarius_parasiticus,Parasitic jaeger,-0.063477845,volume,534,267,0.1397023,stat,Janssen2006,2006,1.8,296.6666667,0.140621937,0.003405221
Ficedula_albicollis,Collared flycatcher,0.013679697,volume,1162,198,0.04804496,stat,Bowers2013,2013,4.894949495,237.3875361,0.048081979,0.004266438
Sterna_dougallii,Roseate tern,0,mass,440,146,-0.062868359,raw,Szczys2005,2005,2.610958904,168.5204617,-0.062951384,0.006041549
Centrocercus_urophasianus,Greater sage-grouse,0.261995252,volume,146,20,0.06614164,stat,Atamian2010,2010,6.04,24.17218543,0.066238344,0.04723178
Phasianus_colchicus,Ring-necked pheasant,0.140492874,mass,106,15,0.4531655,stat,Rubolini2007,2007,5.853333333,18.10933941,0.488676681,0.06618423
Taeniopygia_guttata,Zebra finch,-0.010812869,mass,43,15,0,raw,Clotfelter1996,1996,2.493333333,17.2459893,0,0.070195195
Larus_fuscus,Lesser black-backed gull,-0.010812869,volume,304,101,-0.00990453,raw,Bradbury1999,1999,2.607920792,116.5679575,-0.009904854,0.008805301
Sturnus_vulgaris,European starling,0.01616166,mass,354,69,0.04996099,stat,Love2011,2011,4.304347826,82.24242424,0.050002622,0.012619503
Erythrura_gouldiae,Gouldian finch,0,volume,1473,324,-0.019559388,stat,Pryke2009,2009,3.837037037,383.8899614,-0.019561883,0.00262543
Larus_ridibundus,Black-headed gull,0,volume,147,49,0.05453834,stat,Lezalova2005,2005,2.6,56.53846154,0.05459251,0.018678161
Aythya_ferina,Common pochard,0,volume,185,26,0.1957707,stat,Lezalova2013a,2014,5.892307692,31.39686684,0.198330883,0.035215153
Aythya_fuligula,Tufted duck,0.026629385,volume,46,7,-0.04923335,stat,Lezalova2013b,2014,5.457142857,8.429319372,-0.049273187,0.184185149
Aythya_affinis,Lesser scaup,0.031791834,volume,38,5,-0.143657133,raw,Dawson1996,1996,6.28,6.050955414,-0.144657788,0.32776618
Chen_caerulescens,Lesser snow goose,0.037500891,mass,85,22,-0.089455106,raw,Ankney1980,1980,3.290909091,25.82872928,-0.089694871,0.043804453
Falco_sparverius,American kestrels,-0.036212173,volume,170,34,0.093153037,raw,Anderson1997,1997,4.2,40.47619048,0.093423893,0.026683609
Columba_livia,Domestic pigeon,0,mass,63,16,0.3301501,stat,Pike2005,2005,3.35,18.80597015,0.342996707,0.063267233
Zonotrichia_leucophrys,White-crowned sparrow,0,mass,38,11,0.06348392,stat,Bonier2007,2007,2.963636364,12.82208589,0.063569411,0.101811368
Quiscalus_major,Boat-tailed grackle,0.268544242,mass,122,41,-0.056059589,raw,Bancroft1984,1984,2.580487805,47.2778828,-0.056118426,0.022584639
Cuculus_canorus,Common cuckoo,0.042879996,volume,71,38,0.1527316,stat,Fossoy2012,2012,1.694736842,41.89440994,0.153936091,0.025710636
Eudyptes_chrysocome,Rockhopper penguin,0.037301411,mass,213,194,0.1191936,stat,Poisbleau2010,2010,1.078350515,197.5239006,0.119762927,0.005140756
Larus_michahellis,Yellow-legged gull,0.091409863,volume,124,48,-0.003408011,stat,Perez2006,2006,2.266666667,54.70588235,-0.003408024,0.019340159
Gallus_gallus,Red Junglefowl,0.080943092,volume,220,71,-0.08467843,stat,Parker2005,2005,2.678873239,82.12407992,-0.084881699,0.012638378
rotl/inst/extdata/protist_mutation_rates.csv 0000644 0001775 0000144 00000000536 12547574666 021552 0 ustar deepayan users species,mu,pop.size,genome.size
Tetrahymena thermophila,7.61E-012,1.12E+008,1.04E+008
Paramecium tetraurelia,1.94E-011,1.24E+008,7.20E+007
Chlamydomonas reinhardtii,2.08E-010,1.00E+008,1.12E+008
Dictyostelium discoideum,2.9E-011,7.40E+006,3.40E+007
Saccharomyces cerevisiae,3.3E-010,1.00E+008,1.25E+008
Saccharomyces pombe,2E-010,1.00E+007,1.25E+008
rotl/inst/doc/ 0000755 0001775 0000144 00000000000 13056407503 013277 5 ustar deepayan users rotl/inst/doc/how-to-use-rotl.R 0000644 0001775 0000144 00000006543 13056407503 016417 0 ustar deepayan users ## ------------------------------------------------------------------------
library(rotl)
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
## ------------------------------------------------------------------------
resolved_names <- tnrs_match_names(taxa, context_name = "Animals")
## ---- fig.width=7, fig.height=4------------------------------------------
my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
plot(my_tree, no.margin=TRUE)
## ------------------------------------------------------------------------
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
resolved_names
inspect(resolved_names, taxon_name = "diadema")
## ------------------------------------------------------------------------
resolved_names <- update(resolved_names, taxon_name = "diadema",
new_row_number = 2)
## we could also have used the ott_id to replace this taxon:
## resolved_names <- update(resolved_names, taxon_name = "diadema",
## new_ott_id = 4930522)
## ------------------------------------------------------------------------
diadema_info <- taxonomy_taxon_info(631176)
tax_rank(diadema_info)
synonyms(diadema_info)
tax_name(diadema_info)
## ------------------------------------------------------------------------
diadema_tax_tree <- taxonomy_subtree(631176)
diadema_tax_tree
## ---- fig.width=7, fig.height=4------------------------------------------
mono_id <- tnrs_match_names("Monotremata")
mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
plot(mono_tree)
## ------------------------------------------------------------------------
furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
furry_ids <- furry_studies$study_ids
## ------------------------------------------------------------------------
furry_meta <- get_study_meta("pg_2550")
get_publication(furry_meta) ## The citation for the source of the study
get_tree_ids(furry_meta) ## This study has 10 trees associated with it
candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
## ---- eval=FALSE---------------------------------------------------------
# get_study_tree(study_id="pg_710", tree_id="tree1277",
# tip_label='ott_taxon_name', file = "/tmp/tree.tre",
# file_format = "newick")
# tr <- ape::read.tree(file = "/tmp/tree.tre")
## ------------------------------------------------------------------------
giant_squid <- tnrs_match_names("Architeuthis")
tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
## ------------------------------------------------------------------------
turducken <- c("Meleagris", "Anas", "Gallus", "Sus")
taxa <- tnrs_match_names(turducken, context="Animals")
taxa
## ---- error=TRUE---------------------------------------------------------
tr <- tol_induced_subtree(ott_id(taxa))
## ------------------------------------------------------------------------
in_tree <- is_in_tree(ott_id(taxa))
in_tree
tr <- tol_induced_subtree(ott_id(taxa)[in_tree])
## ---- fig.width=7, fig.height=4------------------------------------------
turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa")
taxa <- tnrs_match_names(turducken_spp, context="Animals")
tr <- tol_induced_subtree(ott_id(taxa))
plot(tr)
rotl/inst/doc/meta-analysis.R 0000644 0001775 0000144 00000004350 13056407503 016173 0 ustar deepayan users ## ----egg_data, cache=TRUE------------------------------------------------
library(rotl)
if (require(readxl) && require(fulltext)) {
doi <- "10.1111/jeb.12282"
xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
egg_data <- read_excel(xl_file)
} else {
egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
}
head(egg_data)
## ----eggs_in_a_funnel, fig.width=6, fig.height=3-------------------------
plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
ylab="Effect size (Zr)",
xlab="Precision (1/SE)",
main="Effect sizes for sex bias in egg size among 51 brid species" )
## ---- clean_eggs---------------------------------------------------------
egg_data <- as.data.frame(egg_data)
egg_data$animal <- tolower(egg_data$animal)
## ---- birds, cache=TRUE--------------------------------------------------
taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
head(taxa)
## ----bird_map------------------------------------------------------------
taxon_map <- structure(taxa$search_string, names=taxa$unique_name)
## ----odd_duck------------------------------------------------------------
taxon_map["Anser caerulescens"]
## ----birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'------
tr <- tol_induced_subtree(taxa$ott_id)
plot(tr, show.tip.label=FALSE)
## ----tip_lab-------------------------------------------------------------
tr$tip.label[1:4]
## ----clean_tips----------------------------------------------------------
otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label <- taxon_map[ otl_tips ]
## ----remove_nodes--------------------------------------------------------
tr$node.label <- NULL
## ----model---------------------------------------------------------------
library(MCMCglmm, quiet=TRUE)
set.seed(123)
pr<-list(R=list(V=1,nu=0.002),
G=list(G1=list(V=1,nu=0.002))
)
model <- MCMCglmm(Zr~1,random=~animal,
pedigree=tr,
mev=egg_data$VZr,
prior=pr,
data=egg_data,
verbose=FALSE)
## ----PhyH----------------------------------------------------------------
var_comps <- colMeans(model$VCV )
var_comps["animal"] / sum(var_comps)
rotl/inst/doc/data_mashups.R 0000644 0001775 0000144 00000004350 13056407503 016075 0 ustar deepayan users ## ---- data---------------------------------------------------------------
csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
mu <- read.csv(csv_path, stringsAsFactors=FALSE)
mu
## ---- context------------------------------------------------------------
library(rotl)
tnrs_contexts()
## ---- match--------------------------------------------------------------
taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
knitr::kable(taxon_search)
## ---- munge--------------------------------------------------------------
mu$ott_name <- taxon_search$unique_name
mu$ott_id <- taxon_search$ott_id
## ---- properties---------------------------------------------------------
studies_properties()
## ----taxon_count---------------------------------------------------------
studies_find_trees(property="ot:ottId", value="180195")
## ---- all_taxa_count-----------------------------------------------------
hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
sapply(hits, function(x) sum(x[["n_matched_trees"]]))
## ----subtree, fig.width=7, fig.height=4---------------------------------
tr <- tol_induced_subtree(ott_ids=mu$ott_id)
plot(tr)
## ---- match_names--------------------------------------------------------
mu$ott_name[1]
tr$tip.label[4]
## ---- sub----------------------------------------------------------------
tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label %in% mu$ott_name
## ----phylobase-----------------------------------------------------------
library(phylobase)
mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
rownames(mu_numeric) <- mu$ott_name
tree_data <- phylo4d(tr, mu_numeric)
## ---- fig.width=7, fig.height=5-----------------------------------------
plot(tree_data)
## ------------------------------------------------------------------------
extra_data <- study_external_IDs("pg_1980")
extra_data
## ------------------------------------------------------------------------
library(rentrez)
seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
cat(seqs)
## ------------------------------------------------------------------------
Tt_ids <- taxon_external_IDs(mu$ott_id[2])
Tt_ids
rotl/inst/doc/data_mashups.Rmd 0000644 0001775 0000144 00000017643 13056407503 016427 0 ustar deepayan users ---
title: "Connecting data to Open Tree trees"
author: "David Winter"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Connecting data to Open Tree trees}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
## Combining data from OToL and other sources.
One of the major goals of `rotl` is to help users combine data from other
sources with the phylogenetic trees in the Open Tree database. This examples
document describes some of the ways in whih a user might connect data to trees
from Open Tree.
## Get Open Tree IDs to match your data.
Let's say you have a dataset where each row represents a measurement taken from
one species, and your goal is to put these measurements in some phylogenetic
context. Here's a small example: the best estimate of the mutation rate for a
set of unicellular Eukaryotes along with some other property of those species
which might explain the mutation rate:
```{r, data}
csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
mu <- read.csv(csv_path, stringsAsFactors=FALSE)
mu
```
If we want to get a tree for these species we need to start by finding the
unique ID for each of these species in the Open Tree database. We can use the
Taxonomic Name Resolution Service (`tnrs`) functions to do this. Before we do
that we should see if any of the taxonomic contexts, which can be used to narrow
a search and avoid conflicts between different codes, apply to our group of species:
```{r, context}
library(rotl)
tnrs_contexts()
```
Hmm, none of those groups contain all of our species. In this case we can
search using the `All life` context and the function `tnrs_match_names`:
```{r, match}
taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
knitr::kable(taxon_search)
```
Good, all of our species are known to Open Tree. Note, though, that one of the names
is a synonym. _Saccharomyces pombe_ is older name for what is now called
_Schizosaccharomyces pombe_. As the name suggests, the Taxonomic Name
Resolution Service is designed to deal with these problems (and similar ones
like misspellings), but it is always a good idea to check the results of
`tnrs_match_names` closely to ensure the results are what you expect.
In this case we have a good ID for each of our species so we can move on. Before
we do that, let's ensure we can match up our original data to the Open Tree
names and IDs by adding them to our `data.frame`:
```{r, munge}
mu$ott_name <- taxon_search$unique_name
mu$ott_id <- taxon_search$ott_id
```
## Find a tree with your taxa
Now let's find a tree. There are two possible options here: we can search for
published studies that include our taxa or we can use the 'synthetic tree' from
Open Tree. We can try both approaches.
### Published trees
Before we can search for published studies or trees, we should check out the
list of properties we can use to perform such searches:
```{r, properties}
studies_properties()
```
We have `ottIds` for our taxa, so let's use those IDs to search for trees that
contain them. Starting with our first species _Tetrahymena thermophila_ we can
use `studies_find_trees` to do this search.
```{r taxon_count}
studies_find_trees(property="ot:ottId", value="180195")
```
Well... that's not very promising. We can repeat that process for all of the IDs
to see if the other species are better represented.
```{r, all_taxa_count}
hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
sapply(hits, function(x) sum(x[["n_matched_trees"]]))
```
OK, most of our species are not in any of the published trees available. You can
help fix this sort of problem by [making sure you submit your published trees to
Open Tree](https://tree.opentreeoflife.org/curator).
### A part of the synthesis tree
Thankfully, we can still use the complete Tree of Life made from the
combined results of all of the published trees and taxonomies that go into Open
Tree. The function `tol_induced_subtree` will fetch a tree relating a set of IDs.
Using the default arguments you can get a tree object into your R session:
```{r subtree, fig.width=7, fig.height=4}
tr <- tol_induced_subtree(ott_ids=mu$ott_id)
plot(tr)
```
### Connect your data to the tips of your tree
Now we have a tree for of our species, how can we use the tree and the data
together?
The package `phylobase` provide an object class called `phylo4d`, which is
designed to represent a phylogeny and data associated with its tips. In oder to
get our tree and data into one of these objects we have to make sure the labels
in the tree and in our data match exactly. That's not quite the case at the
moment (tree labels have underscores and IDs appended):
```{r, match_names}
mu$ott_name[1]
tr$tip.label[4]
```
`rotl` provides a convienence function `strip_ott_ids` to deal with these.
```{r, sub}
tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label %in% mu$ott_name
```
Ok, now the tips are together we can make a new dataset. The `phylo4d()`
functions matches tip labels to the row names of a `data.frame`, so let's make
a new dataset that contains just the relevant data and has row names to match
the tree
```{r phylobase}
library(phylobase)
mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
rownames(mu_numeric) <- mu$ott_name
tree_data <- phylo4d(tr, mu_numeric)
```
And now we can plot the data and the tree together
```{r, fig.width=7, fig.height=5}
plot(tree_data)
```
##Find external data associated with studies, trees and taxa from Open Tree
In the above example we looked for a tree that related species in another dataset.
Now we will go the other way, and try to find data associated with Open Tree records
in other databases.
### Get external data from a study
Let's imagine you were interested in extending or reproducing the results of a
published study. If that study is included in Open Tree you can find it via
`studies_find_studies` or `studies_find_trees` and retrieve the published trees
with `get_study`. `rotl` will also help you find external. The function
`study_external_IDs` retrieves the DOI for a given study, and uses that to
gather some more data:
```{r}
extra_data <- study_external_IDs("pg_1980")
extra_data
```
Here the returned object contains an `external_data_url` (in this case a link to
the study in Treebase), a pubmed ID for the paper and a vector IDs for the
NCBI's nuleotide database. The packages `treebase` and `rentrez` provide
functions to make use of these IDs within R.
As an example, let's use `rentrez` to download the first two DNA seqences and
print them.
```{r}
library(rentrez)
seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
cat(seqs)
```
You could further process these sequences in R with the function `read.dna` from
`ape` or save them to disk by specifying a file name with `cat`.
### Find a OTT taxon in another taxonomic database
It is also possible map an Open Tree taxon to a record in another taxonomic
database. For instance, if we wanted to search for data about one of the tips of
the sub-tree we fetched in the example above we could do so using
`taxon_external_IDs`:
```{r}
Tt_ids <- taxon_external_IDs(mu$ott_id[2])
Tt_ids
```
A user could then use `rgbif` to find locality records using the gbif ID or
`rentrez` to get genetic or bibliometric data about from the NCBI's databases.
## What next
The demonstration gets you to the point of visualizing your data in a
phylogenetic context. But there's a lot more you do with this sort of data in R.
For instance, you could use packages like `ape`, `caper`, `phytools` and
`mcmcGLMM` to perform phylogenetic comparative analyses of your data. You could
gather more data on your species using packages that connect to
trait databases like `rfishbase`, `AntWeb` or `rnpn` which provides data from
the US National Phenology Network. You could also use `rentrez` to find genetic
data for each of your species, and use that data to generate branch lengths for
the phylogeny.
rotl/inst/doc/meta-analysis.html 0000644 0001775 0000144 00000121075 13056407503 016742 0 ustar deepayan users
Using the Open Tree synthesis in a comparative analysis
Using the Open Tree synthesis in a comparative analysis
David Winter
2017-03-03
Phylogenetic Comparative Methods
The development of phylogenetic comparative methods has made phylogenies and important source of data in fields as diverse as ecology, genomic and medicine. Comparative methods can be used to investigate patterns in the evolution of traits or the diversification of lineages. In other cases a phylogeny is treated as a “nuisance parameter”, allowing with the autocorrelation created by the shared evolutionary history of the different species included to be controlled for.
In many cases finding a tree that relates the species for which trait data are available is a rate-limiting step in such comparative analyses. Here we show how the synthetic tree provided by Open Tree of Life (and made available in R via rotl) can help to fill this gap.
A phylogenetic meta-analysis
To demonstrate the use of rotl in a comparative analysis, we will partially reproduce the results of Rutkowska et al 2014. Very briefly, this study is a meta-analysis summarising the results of multiple studies testing for systematic differences in the size of eggs which contain male and female offspring. Such a difference might mean that birds invest more heavily in one sex than the other.
Because this study involves data from 51 different species, Rutkowska et al used a phylogenetic comparative approach to account for the shared evolutionary history among some of the studied-species.
Gather the data
If we are going to reproduce this analysis, we will first need to gather the data. Thankfully, the data is available as supplementary material from the publisher’s website. We can collect the data from using fulltext (with the papers DOI as input) and read it into memory with gdata:
## # A tibble: 6 × 14
## animal Spp Lndim Measure Neggs
## <chr> <chr> <dbl> <chr> <dbl>
## 1 Zonotrichia_leucophrys White-crowned sparrow 0.000000000 volume 294
## 2 Passer_domesticus House sparrow 0.009407469 volume 149
## 3 Serinus_canaria Canary 0.000000000 volume 52
## 4 Turdus_merula European blackbird 0.021189299 volume 82
## 5 Agelaius_phoeniceus Red-winged blackbird 0.218316086 volume 394
## 6 Quiscalus_mexicanus Great-tailed grackle 0.281894985 mass 822
## # ... with 9 more variables: Nclutches <dbl>, ESr <dbl>, Type <chr>,
## # StudyID <chr>, Year <dbl>, D <dbl>, EN <dbl>, Zr <dbl>, VZr <dbl>
The most important variable in this dataset is Zr, which is a normalized effect size for difference in size between eggs that contain males and females. Values close to zero come from studies that found the sex of an egg’s inhabitant had little effect in its size, while large positive or negative values correspond to studies with substantial sex biases (towards males and females respectively). Since this is a meta-analysis we should produce the classic funnel plot with effects-size on the y-axis and precision (the inverse of the sample standard error) on the x-axis. Here we calculate precision from the sample variance (Vzr):
plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
ylab="Effect size (Zr)",
xlab="Precision (1/SE)",
main="Effect sizes for sex bias in egg size among 51 brid species" )
In order to use this data later on we need to first convert it to a standard data.frame. We can also convert the animal column (the species names) to lower case which will make it easier to match names later on:
We can use the OTL synthesis tree to relate these species. To do so we first need to find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the Taxonomic Name Resolution Service function tnrs_match_names:
All of these species are in OTT, but a few of them go by different names in the Open Tree than we have in our data set. Because the tree rotl fetches will have Open Tree names, we need to create a named vector that maps the names we have for each species to the names Open Tree uses for them:
There are a few things to note here. First, the tree has not branch lengths. At present this is true for the whole of the Open Tree synthetic tree. Some comparative methods require either branch lengths or an ultrametric tree. Before you can use one of those methods you will need to get a tree with branch lengths. You could try looking for published trees made available by the Open Tree with studies_find_trees. Alternatively, you could estimate branch lengths from the toplogy of a phylogeny returned by tol_induced_subtree, perhaps by downloading DNA sequences from the NCBI with rentrez or “hanging” the tree on nodes of known-age using penalized likelihood method in ape::chronos. In this case, we will use only the topology of the tree as input to our comparative analysis, so we can skip these steps.
Second, the tip labels contain OTT IDs, which means they will not perfectly match the species names in our dataset or the taxon map that we created earlier:
Finally, the tree contains node labels for those nodes that match a higher taxonomic group, and empty character vectors ("") for all other nodes. Some comparative methods either do no expect node labels at all, or require all labeled nodes to have a unique name (meaning multiple “empty” labels will cause and error).
We can deal with all these details easily. rotl provides the convenience function strip_ott_ids to remove the extra information from the tip labels. With the IDs removed, we can use our taxon map to replace the tip labels in the tree with the species names from dataset.
Finally, we can remove the node labels by setting the node.label attribute of the tree to NULL.
tr$node.label <-NULL
Perform the meta-analysis
Now we have data and a tree, and we know the names in the tree match the ones in the data. It’s time to do the comparative analysis. Rutkowska et al. used MCMCglmm, a Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis, and we will do the same. Of course, to properly analyse these data you would take some care in deciding on the appropriate priors to use and inspect the results carefully. In this case, we are really interested in using this as a demonstration, so we will just run a simple model.
Specifically we sill fit a model where the only variable that might explain the values of Zr is the random factor animal, which corresponds to the phylogenetic relationships among species. We also provide Zvr as the measurement error variance, effectively adding extra weight to the results of more powerful studies. Here’s how we specify and fit that model with MCMCglmm:
library(MCMCglmm, quiet=TRUE)
##
## Attaching package: 'ape'
## The following object is masked from 'package:phylobase':
##
## edges
## Warning in inverseA(pedigree = pedigree, scale = scale, nodes = nodes): no
## branch lengths: compute.brlen from ape has been used
Now that we have a result we can find out how much phylogenetic signal exists for sex-biased differences in egg-size. In a multi-level model we can use variance components to look at this, specifically the proportion of the total variance that can be explained by phylogeny is called the phylogenetic reliability, H. Let’s calculate the H for this model:
It appears there is almost no phylogenetic signal to the data. The relationships among species explain much less that one percent of the total variance in the data. If you were wondering, Rutkowska et al. report a similar result, even after adding more predictors to their model most of the variance in Zr was left unexplained.
What other comparative methods can I use in R?
Here we have demonstrated just one comparative analysis that you might do in R. There are an ever-growing number of packages that allow an ever-growing number of analysis to performed in R. Some “classics” like ancestral state reconstruction, phylogenetic independent contrasts and lineage through time plots are implemented in ape. Packages like phytools, caper and diversitree provide extensions to these methods. The CRAN Phylogenetics Taskview gives a good idea of the diversity of packages and analyses that can be completed in R.
rotl/inst/doc/meta-analysis.Rmd 0000644 0001775 0000144 00000022407 13056407503 016517 0 ustar deepayan users ---
title: "Using the Open Tree synthesis in a comparative analysis"
author: "David Winter"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Using the Open Tree synthesis in a comparative analysis}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
## Phylogenetic Comparative Methods
The development of phylogenetic comparative methods has made phylogenies and
important source of data in fields as diverse as ecology, genomic and medicine.
Comparative methods can be used to investigate patterns in the evolution of
traits or the diversification of lineages. In other cases a phylogeny is treated
as a "nuisance parameter", allowing with the autocorrelation created by the shared
evolutionary history of the different species included to be controlled for.
In many cases finding a tree that relates the species for which trait data are
available is a rate-limiting step in such comparative analyses. Here we show
how the synthetic tree provided by Open Tree of Life (and made available in R via
`rotl`) can help to fill this gap.
## A phylogenetic meta-analysis
To demonstrate the use of `rotl` in a comparative analysis, we will partially
reproduce the results of [Rutkowska _et al_ 2014](dx.doi.org/10.1111/jeb.12282).
Very briefly, this study is a meta-analysis summarising the results of multiple
studies testing for systematic differences in the size of eggs which contain
male and female offspring. Such a difference might mean that birds invest more
heavily in one sex than the other.
Because this study involves data from 51 different species, Rutkowska _et al_
used a phylogenetic comparative approach to account for the shared evolutionary
history among some of the studied-species.
### Gather the data
If we are going to reproduce this analysis, we will first need to gather the
data. Thankfully, the data is available as supplementary material from the
publisher's website. We can collect the data from using `fulltext` (with the
papers DOI as input) and read it into memory with `gdata`:
```{r egg_data, cache=TRUE}
library(rotl)
if (require(readxl) && require(fulltext)) {
doi <- "10.1111/jeb.12282"
xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
egg_data <- read_excel(xl_file)
} else {
egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
}
head(egg_data)
```
The most important variable in this dataset is `Zr`, which is a [normalized
effect size](https://en.wikipedia.org/wiki/Fisher_transformation) for difference
in size between eggs that contain males and females. Values close to zero come
from studies that found the sex of an egg's inhabitant had little effect in its size,
while large positive or negative values correspond to studies with substantial
sex biases (towards males and females respectively). Since this is a
meta-analysis we should produce the classic [funnel plot](https://en.wikipedia.org/wiki/Funnel_plot)
with effects-size on the y-axis and precision (the inverse of the sample
standard error) on the x-axis. Here we calculate precision from the sample
variance (`Vzr`):
```{r eggs_in_a_funnel, fig.width=6, fig.height=3}
plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
ylab="Effect size (Zr)",
xlab="Precision (1/SE)",
main="Effect sizes for sex bias in egg size among 51 brid species" )
```
In order to use this data later on we need to first convert it to a standard
`data.frame`. We can also convert the `animal` column (the species names) to
lower case which will make it easier to match names later on:
```{r, clean_eggs}
egg_data <- as.data.frame(egg_data)
egg_data$animal <- tolower(egg_data$animal)
```
### Find the species in OTT
We can use the OTL synthesis tree to relate these species. To do so we first need to
find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the
Taxonomic Name Resolution Service function `tnrs_match_names`:
```{r, birds, cache=TRUE}
taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
head(taxa)
```
All of these species are in OTT, but a few of them go by different names in the
Open Tree than we have in our data set. Because the tree `rotl` fetches
will have Open Tree names, we need to create a named vector that maps the names
we have for each species to the names Open Tree uses for them:
```{r bird_map}
taxon_map <- structure(taxa$search_string, names=taxa$unique_name)
```
Now we can use this map to retrieve "data set names" from "OTT names":
```{r odd_duck}
taxon_map["Anser caerulescens"]
```
### Get a tree
Now we can get the tree. There are really too many tips here to show nicely, so
we will leave them out of this plot
```{r birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'}
tr <- tol_induced_subtree(taxa$ott_id)
plot(tr, show.tip.label=FALSE)
```
There are a few things to note here. First, the tree has not branch lengths.
At present this is true for the whole of the Open Tree synthetic tree. Some
comparative methods require either branch lengths or an ultrametric tree. Before
you can use one of those methods you will need to get a tree with branch
lengths. You could try looking for published trees made available by the Open
Tree with `studies_find_trees`. Alternatively, you could estimate branch lengths
from the toplogy of a phylogeny returned by `tol_induced_subtree`, perhaps by
downloading DNA sequences from the NCBI with `rentrez` or "hanging" the tree on
nodes of known-age using penalized likelihood method in `ape::chronos`.
In this case, we will use only the topology of the tree as input to our
comparative analysis, so we can skip these steps.
Second, the tip labels contain OTT IDs, which means they will not perfectly
match the species names in our dataset or the taxon map that we created earlier:
```{r tip_lab}
tr$tip.label[1:4]
```
Finally, the tree contains node labels for those nodes that match a higher taxonomic
group, and empty character vectors (`""`) for all other nodes. Some
comparative methods either do no expect node labels at all, or require all
labeled nodes to have a unique name (meaning multiple "empty" labels will cause
and error).
We can deal with all these details easily. `rotl` provides the convenience
function `strip_ott_ids` to remove the extra information from the tip labels.
With the IDs removed, we can use our taxon map to replace the tip labels in the tree
with the species names from dataset.
```{r clean_tips}
otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label <- taxon_map[ otl_tips ]
```
Finally, we can remove the node labels by setting the `node.label` attribute of
the tree to `NULL`.
```{r remove_nodes}
tr$node.label <- NULL
```
### Perform the meta-analysis
Now we have data and a tree, and we know the names in the tree match the ones in
the data. It's time to do the comparative analysis. Rutkowska _et al_. used `MCMCglmm`, a
Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis,
and we will do the same. Of course, to properly analyse these data you would
take some care in deciding on the appropriate priors to use and inspect the
results carefully. In this case, we are really interested in using this as a
demonstration, so we will just run a simple model.
Specifically we sill fit a model where the only variable that might explain the
values of `Zr` is the random factor `animal`, which corresponds to the
phylogenetic relationships among species. We also provide `Zvr` as the measurement
error variance, effectively adding extra weight to the results of more powerful
studies. Here's how we specify and fit that model with `MCMCglmm`:
```{r model}
library(MCMCglmm, quiet=TRUE)
set.seed(123)
pr<-list(R=list(V=1,nu=0.002),
G=list(G1=list(V=1,nu=0.002))
)
model <- MCMCglmm(Zr~1,random=~animal,
pedigree=tr,
mev=egg_data$VZr,
prior=pr,
data=egg_data,
verbose=FALSE)
```
Now that we have a result we can find out how much phylogenetic signal exists
for sex-biased differences in egg-size. In a multi-level model we can use variance
components to look at this, specifically the proportion of the total variance
that can be explained by phylogeny is called the phylogenetic reliability, _H_. Let's
calculate the _H_ for this model:
```{r PhyH}
var_comps <- colMeans(model$VCV )
var_comps["animal"] / sum(var_comps)
```
It appears there is almost no phylogenetic signal to the data.
The relationships among species explain much less that one percent of the total
variance in the data. If you were wondering, Rutkowska _et al_. report a similar result,
even after adding more predictors to their model most of the variance in `Zr`
was left unexplained.
## What other comparative methods can I use in R?
Here we have demonstrated just one comparative analysis that you might do in R.
There are an ever-growing number of packages that allow an ever-growing number
of analysis to performed in R. Some "classics" like ancestral state
reconstruction, phylogenetic independent contrasts and lineage through time plots
are implemented in `ape`. Packages like `phytools`, `caper` and `diversitree`
provide extensions to these methods. The [CRAN Phylogenetics Taskview](https://CRAN.R-project.org/view=Phylogenetics)
gives a good idea of the diversity of packages and analyses that can be
completed in R.
rotl/inst/doc/how-to-use-rotl.html 0000644 0001775 0000144 00000175430 13056407503 017164 0 ustar deepayan users
How to use rotl?
How to use rotl?
François Michonneau
2017-03-03
rotl provides an interface to the Open Tree of Life (OTL) API and allows users to query the API, retrieve parts of the Tree of Life and integrate these parts with other R packages.
The OTL API provides services to access:
the Tree of Life a.k.a. TOL (the synthetic tree): a single draft tree that is a combination of the OTL taxonomy and the source trees (studies)
the Taxonomic name resolution services a.k.a. TNRS: the methods for resolving taxonomic names to the internal identifiers used by the TOL and the GOL (the ott ids).
the Taxonomy a.k.a. OTT (for Open Tree Taxonomy): which represents the synthesis of the different taxonomies used as a backbone of the TOL when no studies are available.
the Studies containing the source trees used to build the TOL, and extracted from the scientific literature.
In rotl, each of these services correspond to functions with different prefixes:
Service
rotl prefix
Tree of Life
tol_
TNRS
tnrs_
Taxonomy
taxonomy_
Studies
studies_
rotl also provides a few other functions and methods that can be used to extract relevant information from the objects returned by these functions.
Demonstration of a basic workflow
The most common use for rotl is probably to start from a list of species and get the relevant parts of the tree for these species. This is a two step process:
the species names need to be matched to their ott_id (the Open Tree Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)
these ott_id will then be used to retrieve the relevant parts of the Tree of Life.
Step 1: Matching taxonomy to the ott_id
Let’s start by doing a search on a diverse group of taxa: a tree frog (genus Hyla), a fish (genus Salmo), a sea urchin (genus Diadema), and a nautilus (genus Nautilus).
It’s always a good idea to check that the resolved names match what you intended:
search_string
unique_name
approximate_match
ott_id
is_synonym
flags
number_matches
hyla
Hyla
FALSE
1062216
FALSE
1
salmo
Salmo
FALSE
982359
FALSE
1
diadema
Diadema (genus in Nucletmycea)
FALSE
4930522
FALSE
3
nautilus
Nautilus
FALSE
616358
FALSE
1
The column unique_name sometimes indicates the higher taxonomic level associated with the name. The column number_matches indicates the number of ott_id that corresponds to a given name. In this example, our search on Diadema returns 2 matches, and the one returned by default is indeed the sea urchin that we want for our query. The argument context_name allows you to limit the taxonomic scope of your search. Diadema is also the genus name of a fungus. To ensure that our search is limited to animal names, we could do:
If you are trying to build a tree with deeply divergent taxa that the argument context_name cannot fix, see “How to change the ott ids assigned to my taxa?” in the FAQ below.
Step 2: Getting the tree corresponding to our taxa
Now that we have the correct ott_id for our taxa, we can ask for the tree using the tol_induced_subtree() function. By default, the object returned by tol_induced_subtree is a phylo object (from the ape package), so we can plot it directly.
If you realize that tnrs_match_names assigns the incorrect taxonomic group to your name (e.g., because of synonymy) and changing the context_name does not help, you can use the function inspect. This function takes the object resulting from tnrs_match_names(), and either the row number, the taxon name (you used in your search in lowercase), or the ott_id returned by the initial query.
To illustrate this, let’s re-use the previous query but this time pretending that we are interested in the fungus Diadema and not the sea urchin:
In our case, we want the second row in this data frame to replace the information that initially matched for Diadema. We can now use the update() function, to change to the correct taxa (the fungus not the sea urchin):
resolved_names <-update(resolved_names, taxon_name ="diadema",
new_row_number =2)
## we could also have used the ott_id to replace this taxon:
## resolved_names <- update(resolved_names, taxon_name = "diadema",
## new_ott_id = 4930522)
And now our resolved_names data frame includes the taxon we want:
search_string
unique_name
approximate_match
ott_id
is_synonym
flags
number_matches
hyla
Hyla
FALSE
1062216
FALSE
1
salmo
Salmo
FALSE
982359
FALSE
1
diadema
Diadema (genus in Holozoa)
FALSE
631176
FALSE
3
nautilus
Nautilus
FALSE
616358
FALSE
1
How do I know that the taxa I’m asking for is the correct one?
The function taxonomy_taxon_info() takes ott_ids as arguments and returns taxonomic information about the taxa. This output can be passed to some helpers functions to extract the relevant information. Let’s illustrate this with our Diadema example
In some cases, it might also be useful to investigate the taxonomic tree descending from an ott_id to check that it’s the correct taxon and to determine the species included in the Open Tree Taxonomy:
By default, this function return all taxa (including self, and internal) descending from this ott_id but it also possible to return phylo object.
How do I get the tree for a particular taxonomic group?
If you are looking to get the tree for a particular taxonomic group, you need to first identify it by its node id or ott id, and then use the tol_subtree() function:
## Warning in collapse_singles(tr): Dropping singleton nodes with labels:
## Ornithorhynchidae ott344066, Ornithorhynchus ott962391, Tachyglossus
## ott16047, Tachyglossus aculeatus ott16038
plot(mono_tree)
How do I find trees from studies focused on my favourite taxa?
The function studies_find_trees() allows the user to search for studies matching a specific criteria. The function studies_properties() returns the list of properties that can be used in the search.
Now that we know the study_id, we can ask for the meta data information associated with this study:
furry_meta <-get_study_meta("pg_2550")
get_publication(furry_meta) ## The citation for the source of the study
## [1] "O'Leary, Maureen A., Marc Allard, Michael J. Novacek, Jin Meng, and John Gatesy. 2004. \"Building the mammalian sector of the tree of life: Combining different data and a discussion of divergence times for placental mammals.\" In: Cracraft J., & Donoghue M., eds. Assembling the Tree of Life. pp. 490-516. Oxford, United Kingdom, Oxford University Press."
## attr(,"DOI")
## [1] ""
get_tree_ids(furry_meta) ## This study has 10 trees associated with it
candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
## NULL
Using get_study("pg_2550") would returns a multiPhylo object (default) with all the trees associated with this particular study, while get_study_tree("pg_2550", "tree5513") would return one of these trees.
The tree returned by the API has duplicated tip labels, how can I work around it?
You may encounter the following error message:
Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
angulata) has already been encountered in this tree. Duplication of taxa in a
tree is prohibited.
This message occurs as duplicate labels are not allowed in the NEXUS format and it is stricly enforced by the part of the code used by rotl to import the trees in memory.
If you use a version of rotl more recent than 0.4.1, this should not happen by default for the function get_study_tree. If it happens with another function, please let us know.
The easiest way to work around this is to save the tree in a file, and use APE to read it in memory:
How do I get the higher taxonomy for a given taxa?
If you encounter a taxon name you are not familiar with, it might be useful to obtain its higher taxonomy to see where it fits in the tree of life. We can combine several taxonomy methods to extract this information easily.
## $`5295401`
## rank name unique_name ott_id
## 1 family Architeuthidae Architeuthidae 564393
## 2 suborder Oegopsina Oegopsina 43352
## 3 order Teuthida Teuthida 380472
## 4 superorder Decapodiformes Decapodiformes 854107
## 5 infraclass Neocoleoidea Neocoleoidea 329546
## 6 subclass Coleoidea Coleoidea 7371
## 7 class Cephalopoda Cephalopoda 7368
## 8 phylum Mollusca Mollusca 802117
## 9 no rank Lophotrochozoa Lophotrochozoa 155737
## 10 no rank Protostomia Protostomia 189832
## 11 no rank Bilateria Bilateria 117569
## 12 no rank Eumetazoa Eumetazoa 641038
## 13 kingdom Metazoa Metazoa 691846
## 14 no rank Holozoa Holozoa 5246131
## 15 no rank Opisthokonta Opisthokonta 332573
## 16 domain Eukaryota Eukaryota 304358
## 17 no rank cellular organisms cellular organisms 93302
## 18 no rank life life 805080
Why are OTT IDs discovered with rotl missing from an induced subtree?
Some taxonomic names that can be retrieved through the taxonomic name resolution service are not part of the Open Tree’s synthesis tree. These are usually traditional higher-level taxa that have been found to be paraphyletic.
For instance, if you wanted to fetch a tree relating the three birds that go into a Turkducken as well as the pork used for stuffing, you might search for the turkey, duck, chicken, and pork genera:
We have the OTT ids for each genus, however, if we tried to get the induced subtree from these results, we would get an error:
tr <-tol_induced_subtree(ott_id(taxa))
## Error: HTTP failure: 400
## The following OTT ids were not found: [765185]. BadIdsExceptionopentree.plugins.BadIdsExceptionlist("opentree.plugins.tree_of_life_v3.doInducedSubtree(tree_of_life_v3.java:516)", "opentree.plugins.tree_of_life_v3.induced_subtree(tree_of_life_v3.java:400)", "java.lang.reflect.Method.invoke(Method.java:498)", "org.neo4j.server.plugins.PluginMethod.invoke(PluginMethod.java:57)", "org.neo4j.server.plugins.PluginManager.invoke(PluginManager.java:168)", "org.neo4j.server.rest.web.ExtensionService.invokeGraphDatabaseExtension(ExtensionService.java:300)", "org.neo4j.server.rest.web.ExtensionService.invokeGraphDatabaseExtension(ExtensionService.java:122)",
## "java.lang.reflect.Method.invoke(Method.java:498)", "org.neo4j.server.rest.security.SecurityFilter.doFilter(SecurityFilter.java:112)")
As the error message suggests, some of the taxa are not found in the synthetic tree. This occurs for 2 main reasons: either the taxa is invalid, or it is part of a group that is not monophyletic in the synthetic tree. There are two ways to get around this issue: (1) removing the taxa that are not part of the Open Tree; (2) using the complete species name.
Removing the taxa missing from the synthetic tree
To help with this situation, rotl provides a way to identify the OTT ids that are not part of the synthetic tree. The function is_in_tree() takes the output of the ott_id() function and returns a vector of logical indicating whether the taxa are part of the synthetic tree. We can then use to only keep the taxa that appear in the synthetic tree:
in_tree <-is_in_tree(ott_id(taxa))
in_tree
## Meleagris Anas Gallus Sus
## TRUE FALSE TRUE TRUE
tr <-tol_induced_subtree(ott_id(taxa)[in_tree])
Using the full taxonomic names
The best way to avoid these problems is to specify complete species names (species being the lowest level of classification in the Open Tree taxonomy they are guaranteed to be monophyletic):
rotl/inst/doc/how-to-use-rotl.Rmd 0000644 0001775 0000144 00000025531 13056407503 016736 0 ustar deepayan users ---
title: "How to use rotl?"
author: "François Michonneau"
date: "`r Sys.Date()`"
output:
rmarkdown::html_vignette:
css: vignette.css
vignette: >
%\VignetteIndexEntry{How to use rotl?}
%\VignetteEngine{knitr::rmarkdown}
\usepackage[utf8]{inputenc}
---
`rotl` provides an interface to the Open Tree of Life (OTL) API and allows users
to query the API, retrieve parts of the Tree of Life and integrate these parts
with other R packages.
The OTL API provides services to access:
* the **Tree of Life** a.k.a. TOL (the synthetic tree): a single draft tree that is
a combination of **the OTL taxonomy** and the **source trees** (studies)
* the **Taxonomic name resolution services** a.k.a. TNRS: the methods for
resolving taxonomic names to the internal identifiers used by the TOL and the
GOL (the `ott ids`).
* the **Taxonomy** a.k.a. OTT (for Open Tree Taxonomy): which represents the
synthesis of the different taxonomies used as a backbone of the TOL when no
studies are available.
* the **Studies** containing the source trees used to build the TOL, and
extracted from the scientific literature.
In `rotl`, each of these services correspond to functions with different
prefixes:
| Service | `rotl` prefix |
|---------------|---------------|
| Tree of Life | `tol_` |
| TNRS | `tnrs_` |
| Taxonomy | `taxonomy_` |
| Studies | `studies_` |
`rotl` also provides a few other functions and methods that can be used to
extract relevant information from the objects returned by these functions.
## Demonstration of a basic workflow
The most common use for `rotl` is probably to start from a list of species and
get the relevant parts of the tree for these species. This is a two step
process:
1. the species names need to be matched to their `ott_id` (the Open Tree
Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)
1. these `ott_id` will then be used to retrieve the relevant parts of the Tree
of Life.
### Step 1: Matching taxonomy to the `ott_id`
Let's start by doing a search on a diverse group of taxa: a tree frog (genus
_Hyla_), a fish (genus _Salmo_), a sea urchin (genus _Diadema_), and a nautilus
(genus _Nautilus_).
```{r}
library(rotl)
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
```
It's always a good idea to check that the resolved names match what you
intended:
`r knitr::kable(resolved_names)`
The column `unique_name` sometimes indicates the higher taxonomic level
associated with the name. The column `number_matches` indicates the number of
`ott_id` that corresponds to a given name. In this example, our search on
_Diadema_ returns 2 matches, and the one returned by default is indeed the sea
urchin that we want for our query. The argument `context_name` allows you to
limit the taxonomic scope of your search. _Diadema_ is also the genus name of a
fungus. To ensure that our search is limited to animal names, we could do:
```{r}
resolved_names <- tnrs_match_names(taxa, context_name = "Animals")
```
If you are trying to build a tree with deeply divergent taxa that the argument
`context_name` cannot fix, see "How to change the ott ids assigned to my taxa?"
in the FAQ below.
### Step 2: Getting the tree corresponding to our taxa
Now that we have the correct `ott_id` for our taxa, we can ask for the tree
using the `tol_induced_subtree()` function. By default, the object returned by
`tol_induced_subtree` is a phylo object (from the
[ape](https://cran.r-project.org/package=ape) package), so we can plot it
directly.
```{r, fig.width=7, fig.height=4}
my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
plot(my_tree, no.margin=TRUE)
```
## FAQ
### How to change the ott ids assigned to my taxa?
If you realize that `tnrs_match_names` assigns the incorrect taxonomic group to
your name (e.g., because of synonymy) and changing the `context_name` does not
help, you can use the function `inspect`. This function takes the object
resulting from `tnrs_match_names()`, and either the row number, the taxon name
(you used in your search in lowercase), or the `ott_id` returned by the initial
query.
To illustrate this, let's re-use the previous query but this time pretending that
we are interested in the fungus _Diadema_ and not the sea urchin:
```{r}
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
resolved_names
inspect(resolved_names, taxon_name = "diadema")
```
In our case, we want the second row in this data frame to replace the
information that initially matched for _Diadema_. We can now use the `update()`
function, to change to the correct taxa (the fungus not the sea urchin):
```{r}
resolved_names <- update(resolved_names, taxon_name = "diadema",
new_row_number = 2)
## we could also have used the ott_id to replace this taxon:
## resolved_names <- update(resolved_names, taxon_name = "diadema",
## new_ott_id = 4930522)
```
And now our `resolved_names` data frame includes the taxon we want:
`r knitr::kable(resolved_names)`
### How do I know that the taxa I'm asking for is the correct one?
The function `taxonomy_taxon_info()` takes `ott_ids` as arguments and returns
taxonomic information about the taxa. This output can be passed to some helpers
functions to extract the relevant information. Let's illustrate this with our
_Diadema_ example
```{r}
diadema_info <- taxonomy_taxon_info(631176)
tax_rank(diadema_info)
synonyms(diadema_info)
tax_name(diadema_info)
```
In some cases, it might also be useful to investigate the taxonomic tree
descending from an `ott_id` to check that it's the correct taxon and to
determine the species included in the Open Tree Taxonomy:
```{r}
diadema_tax_tree <- taxonomy_subtree(631176)
diadema_tax_tree
```
By default, this function return all taxa (including self, and internal)
descending from this `ott_id` but it also possible to return `phylo` object.
### How do I get the tree for a particular taxonomic group?
If you are looking to get the tree for a particular taxonomic group, you need to
first identify it by its node id or ott id, and then use the `tol_subtree()`
function:
```{r, fig.width=7, fig.height=4}
mono_id <- tnrs_match_names("Monotremata")
mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
plot(mono_tree)
```
### How do I find trees from studies focused on my favourite taxa?
The function `studies_find_trees()` allows the user to search for studies
matching a specific criteria. The function `studies_properties()` returns the
list of properties that can be used in the search.
```{r}
furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
furry_ids <- furry_studies$study_ids
```
Now that we know the `study_id`, we can ask for the meta data information
associated with this study:
```{r}
furry_meta <- get_study_meta("pg_2550")
get_publication(furry_meta) ## The citation for the source of the study
get_tree_ids(furry_meta) ## This study has 10 trees associated with it
candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
```
Using `get_study("pg_2550")` would returns a `multiPhylo` object (default) with
all the trees associated with this particular study, while
`get_study_tree("pg_2550", "tree5513")` would return one of these trees.
### The tree returned by the API has duplicated tip labels, how can I work around it?
You may encounter the following error message:
```
Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
angulata) has already been encountered in this tree. Duplication of taxa in a
tree is prohibited.
```
This message occurs as duplicate labels are not allowed in the NEXUS format and
it is stricly enforced by the part of the code used by `rotl` to import the
trees in memory.
If you use a version of `rotl` more recent than 0.4.1, this should not happen by
default for the function `get_study_tree`. If it happens with another function,
please [let us know](https://github.com/ropensci/rotl/issues).
The easiest way to work around this is to save the tree in a file, and use APE
to read it in memory:
```{r, eval=FALSE}
get_study_tree(study_id="pg_710", tree_id="tree1277",
tip_label='ott_taxon_name', file = "/tmp/tree.tre",
file_format = "newick")
tr <- ape::read.tree(file = "/tmp/tree.tre")
```
### How do I get the higher taxonomy for a given taxa?
If you encounter a taxon name you are not familiar with, it might be useful to
obtain its higher taxonomy to see where it fits in the tree of life. We can
combine several taxonomy methods to extract this information easily.
```{r}
giant_squid <- tnrs_match_names("Architeuthis")
tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
```
### Why are OTT IDs discovered with `rotl` missing from an induced subtree?
Some taxonomic names that can be retrieved through the taxonomic name
resolution service are not part of the Open Tree's synthesis tree. These are
usually traditional higher-level taxa that have been found to be paraphyletic.
For instance, if you wanted to fetch a tree relating the three birds that go
into a [Turkducken](https://en.wikipedia.org/wiki/Turducken) as well as the pork
used for stuffing, you might search for the turkey, duck, chicken, and pork
genera:
```{r}
turducken <- c("Meleagris", "Anas", "Gallus", "Sus")
taxa <- tnrs_match_names(turducken, context="Animals")
taxa
```
We have the OTT ids for each genus, however, if we tried to get the induced
subtree from these results, we would get an error:
```{r, error=TRUE}
tr <- tol_induced_subtree(ott_id(taxa))
```
As the error message suggests, some of the taxa are not found in the synthetic
tree. This occurs for 2 main reasons: either the taxa is invalid, or it is part
of a group that is not monophyletic in the synthetic tree. There are two ways to
get around this issue: (1) removing the taxa that are not part of the Open Tree;
(2) using the complete species name.
#### Removing the taxa missing from the synthetic tree
To help with this situation, `rotl` provides a way to identify the OTT ids that
are not part of the synthetic tree. The function `is_in_tree()` takes the output
of the `ott_id()` function and returns a vector of logical indicating whether
the taxa are part of the synthetic tree. We can then use to only keep the taxa that appear in the synthetic tree:
```{r}
in_tree <- is_in_tree(ott_id(taxa))
in_tree
tr <- tol_induced_subtree(ott_id(taxa)[in_tree])
```
#### Using the full taxonomic names
The best way to avoid these problems is to specify complete species names
(species being the lowest level of classification in the Open Tree taxonomy they
are guaranteed to be monophyletic):
```{r, fig.width=7, fig.height=4}
turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa")
taxa <- tnrs_match_names(turducken_spp, context="Animals")
tr <- tol_induced_subtree(ott_id(taxa))
plot(tr)
```
rotl/inst/doc/data_mashups.html 0000644 0001775 0000144 00000165035 13056407503 016650 0 ustar deepayan users
Connecting data to Open Tree trees
Connecting data to Open Tree trees
David Winter
2017-03-03
Combining data from OToL and other sources.
One of the major goals of rotl is to help users combine data from other sources with the phylogenetic trees in the Open Tree database. This examples document describes some of the ways in whih a user might connect data to trees from Open Tree.
Get Open Tree IDs to match your data.
Let’s say you have a dataset where each row represents a measurement taken from one species, and your goal is to put these measurements in some phylogenetic context. Here’s a small example: the best estimate of the mutation rate for a set of unicellular Eukaryotes along with some other property of those species which might explain the mutation rate:
csv_path <-system.file("extdata", "protist_mutation_rates.csv", package ="rotl")
mu <-read.csv(csv_path, stringsAsFactors=FALSE)
mu
If we want to get a tree for these species we need to start by finding the unique ID for each of these species in the Open Tree database. We can use the Taxonomic Name Resolution Service (tnrs) functions to do this. Before we do that we should see if any of the taxonomic contexts, which can be used to narrow a search and avoid conflicts between different codes, apply to our group of species:
Good, all of our species are known to Open Tree. Note, though, that one of the names is a synonym. Saccharomyces pombe is older name for what is now called Schizosaccharomyces pombe. As the name suggests, the Taxonomic Name Resolution Service is designed to deal with these problems (and similar ones like misspellings), but it is always a good idea to check the results of tnrs_match_names closely to ensure the results are what you expect.
In this case we have a good ID for each of our species so we can move on. Before we do that, let’s ensure we can match up our original data to the Open Tree names and IDs by adding them to our data.frame:
Now let’s find a tree. There are two possible options here: we can search for published studies that include our taxa or we can use the ‘synthetic tree’ from Open Tree. We can try both approaches.
Published trees
Before we can search for published studies or trees, we should check out the list of properties we can use to perform such searches:
We have ottIds for our taxa, so let’s use those IDs to search for trees that contain them. Starting with our first species Tetrahymena thermophila we can use studies_find_trees to do this search.
Thankfully, we can still use the complete Tree of Life made from the combined results of all of the published trees and taxonomies that go into Open Tree. The function tol_induced_subtree will fetch a tree relating a set of IDs.
Using the default arguments you can get a tree object into your R session:
Now we have a tree for of our species, how can we use the tree and the data together?
The package phylobase provide an object class called phylo4d, which is designed to represent a phylogeny and data associated with its tips. In oder to get our tree and data into one of these objects we have to make sure the labels in the tree and in our data match exactly. That’s not quite the case at the moment (tree labels have underscores and IDs appended):
mu$ott_name[1]
## [1] "Tetrahymena thermophila"
tr$tip.label[4]
## [1] "Dictyostelium_discoideum_ott160850"
rotl provides a convienence function strip_ott_ids to deal with these.
Ok, now the tips are together we can make a new dataset. The phylo4d() functions matches tip labels to the row names of a data.frame, so let’s make a new dataset that contains just the relevant data and has row names to match the tree
And now we can plot the data and the tree together
plot(tree_data)
Find external data associated with studies, trees and taxa from Open Tree
In the above example we looked for a tree that related species in another dataset. Now we will go the other way, and try to find data associated with Open Tree records in other databases.
Get external data from a study
Let’s imagine you were interested in extending or reproducing the results of a published study. If that study is included in Open Tree you can find it via studies_find_studies or studies_find_trees and retrieve the published trees with get_study. rotl will also help you find external. The function study_external_IDs retrieves the DOI for a given study, and uses that to gather some more data:
## External data identifiers for study
## $doi: 10.1016/j.ympev.2006.04.016
## $pubmed_id: 16762568
## $nucleotide_ids: vector of 58 IDs
## $external_data_url http://purl.org/phylo/treebase/phylows/study/TB2:S1575
Here the returned object contains an external_data_url (in this case a link to the study in Treebase), a pubmed ID for the paper and a vector IDs for the NCBI’s nuleotide database. The packages treebase and rentrez provide functions to make use of these IDs within R.
As an example, let’s use rentrez to download the first two DNA seqences and print them.
You could further process these sequences in R with the function read.dna from ape or save them to disk by specifying a file name with cat.
Find a OTT taxon in another taxonomic database
It is also possible map an Open Tree taxon to a record in another taxonomic database. For instance, if we wanted to search for data about one of the tips of the sub-tree we fetched in the example above we could do so using taxon_external_IDs:
A user could then use rgbif to find locality records using the gbif ID or rentrez to get genetic or bibliometric data about from the NCBI’s databases.
What next
The demonstration gets you to the point of visualizing your data in a phylogenetic context. But there’s a lot more you do with this sort of data in R. For instance, you could use packages like ape, caper, phytools and mcmcGLMM to perform phylogenetic comparative analyses of your data. You could gather more data on your species using packages that connect to trait databases like rfishbase, AntWeb or rnpn which provides data from the US National Phenology Network. You could also use rentrez to find genetic data for each of your species, and use that data to generate branch lengths for the phylogeny.
rotl/tests/ 0000755 0001775 0000144 00000000000 12705157664 012731 5 ustar deepayan users rotl/tests/testthat/ 0000755 0001775 0000144 00000000000 13056407503 014557 5 ustar deepayan users rotl/tests/testthat/test-api-taxonomy.R 0000644 0001775 0000144 00000005543 12707503302 020307 0 ustar deepayan users context("taxonomy API")
############################################################################
## .taxonomy_taxon_info ##
############################################################################
test_that("ott_id is not null for .taxonomy_taxon_info", {
skip_on_cran()
expect_error(.taxonomy_taxon_info(NULL),
"must supply")
})
test_that("ott_id is of length 1 for .taxonomy_taxon_info", {
skip_on_cran()
expect_error(.taxonomy_taxon_info(c(123, 456, 789)),
"Must only supply")
})
test_that("ott_id is a numeric for .taxonomy_taxon_info", {
skip_on_cran()
expect_error(.taxonomy_taxon_info(TRUE),
"look like numbers")
})
test_that("include_lineage is a flag", {
skip_on_cran()
expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c(TRUE, FALSE)),
"is not a flag")
expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c("na")),
"is not a flag")
expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c(1235)),
"is not a flag")
})
test_that("list_terminal_descendants is a flag", {
skip_on_cran()
expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c(TRUE, FALSE)),
"is not a flag")
expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c("na")),
"is not a flag")
expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c(1235)),
"is not a flag")
})
############################################################################
## .taxonomy_subtree ##
############################################################################
test_that("ott_id is not null for .taxonomy_subtree", {
skip_on_cran()
expect_error(.taxonomy_subtree(NULL),
"must supply")
})
test_that("ott_id is of length 1 for .taxonomy_subtree", {
skip_on_cran()
expect_error(.taxonomy_subtree(c(123, 456, 789)),
"Must only supply")
})
test_that("ott_id is a numeric for .taxonomy_subtree", {
skip_on_cran()
expect_error(.taxonomy_subtree(TRUE),
"look like numbers")
})
############################################################################
## .taxonomy_mrca ##
############################################################################
test_that("ott_id is not null for .taxonomy_lica", {
skip_on_cran()
expect_error(.taxonomy_mrca(NULL),
"must supply")
})
test_that("ott_id is a numeric for .taxonomy_lica", {
skip_on_cran()
expect_error(.taxonomy_mrca(TRUE),
"look like numbers")
})
rotl/tests/testthat/test-taxonomy.R 0000644 0001775 0000144 00000030247 13056114755 017547 0 ustar deepayan users context("taxonomy")
############################################################################
## taxonomy about ##
############################################################################
test_that("taxonomy_about is a list", {
skip_on_cran()
tt <- taxonomy_about()
expect_true(inherits(tt, "list"))
})
test_that("taxonomy_about has the names listed in documentation (if it breaks update documentation)", {
skip_on_cran()
tt <- taxonomy_about()
expect_true(all(names(tt) %in% c("weburl", "author", "name", "source", "version")))
})
############################################################################
## taxon Info ##
############################################################################
test_that("taxonomy taxon info", {
skip_on_cran()
tid <- 515698
tt <- taxonomy_taxon_info(tid)
expect_equal(tt[[1]][["ott_id"]], tid)
expect_true(inherits(tt, "taxon_info"))
})
test_that("taxonomy with include_lineage=TRUE", {
skip_on_cran()
tt <- taxonomy_taxon_info(515698, include_lineage = TRUE)
expect_true(exists("lineage", tt[[1]]))
expect_true(length(tt[[1]]$lineage) > 1)
})
test_that("taxonomy with include_lineage=FALSE", {
skip_on_cran()
tt <- taxonomy_taxon_info(515698, include_lineage = FALSE)
expect_false(exists("lineage", tt[[1]]))
})
test_that("taxonomy with include_terminal_descendants=TRUE", {
skip_on_cran()
tt <- taxonomy_taxon_info(515698, include_terminal_descendants = TRUE)
expect_true(exists("terminal_descendants", tt[[1]]))
expect_true(length(tt[[1]][["terminal_descendants"]]) > 1)
})
test_that("taxonomy with include_terminal_descendants=FALSE", {
skip_on_cran()
tt <- taxonomy_taxon_info(515698, include_terminal_descendants = FALSE)
expect_false(exists("terminal_descendants", tt[[1]]))
})
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
tid <- c(5004030, 337928, 631176)
tax_info <- taxonomy_taxon_info(tid)
}
test_that("taxonomy_taxon tax_rank method", {
skip_on_cran()
expect_true(inherits(tax_rank(tax_info),
c("otl_tax_rank", "list")))
expect_equal(names(tax_rank(tax_info)),
c("Holothuria", "Acanthaster",
"Diadema (genus in Holozoa)"))
expect_equal(unlist(unname(tax_rank(tax_info))),
rep("genus", 3))
})
test_that("taxonomy_taxon ott_taxon_name method", {
skip_on_cran()
expect_true(inherits(tax_name(tax_info),
c("otl_tax_info", "list")))
expect_equal(names(tax_name(tax_info)),
c("Holothuria", "Acanthaster",
"Diadema (genus in Holozoa)"))
expect_equal(unlist(unname(tax_name(tax_info))),
c("Holothuria", "Acanthaster", "Diadema"))
})
test_that("taxonomy_taxon synonyms method", {
skip_on_cran()
expect_true(inherits(synonyms(tax_info),
c("otl_synonyms", "list")))
expect_equal(names(synonyms(tax_info)),
c("Holothuria", "Acanthaster",
"Diadema (genus in Holozoa)"))
expect_true(all(c("Diamema", "Centrechinus") %in%
synonyms(tax_info)[[3]]))
})
test_that("taxonomy_taxon is_suppressed method", {
skip_on_cran()
expect_true(inherits(is_suppressed(tax_info),
c("otl_is_suppressed", "list")))
expect_equal(names(is_suppressed(tax_info)),
c("Holothuria", "Acanthaster",
"Diadema (genus in Holozoa)"))
expect_equal(unlist(unname(is_suppressed(tax_info))),
c(FALSE, FALSE, FALSE))
})
test_that("taxonomy_taxon flags method", {
skip_on_cran()
expect_true(inherits(flags(tax_info),
c("otl_flags", "list")))
expect_equal(names(flags(tax_info)),
c("Holothuria", "Acanthaster",
"Diadema (genus in Holozoa)"))
expect_equal(unlist(unname(flags(tax_info))),
NULL)
})
test_that("higher taxonomy method", {
skip_on_cran()
expect_error(tax_lineage(tax_info), "needs to be created")
lg <- tax_lineage(taxonomy_taxon_info(tid, include_lineage = TRUE))
expect_true(inherits(lg, "list"))
expect_true(inherits(lg[[1]], "data.frame"))
expect_true(all(names(lg[[1]]) %in% c("rank", "name", "unique_name", "ott_id")))
expect_true(any(grepl("no rank", lg[[1]][["rank"]])))
expect_true(any(grep("life", lg[[1]][["name"]])))
})
### ott_id() --------------------------------------------------------------------
test_that("taxonomy_taxon_info with ott_id for tax_info", {
skip_on_cran()
expect_equivalent(ott_id(tax_info),
ott_id(taxonomy_taxon_info(ott_id(tax_info))))
})
test_that("taxonomy_subtree with ott_id for tax_info", {
skip_on_cran()
expect_error(taxonomy_subtree(ott_id = ott_id(tax_info)),
"supply one")
})
test_that("tol_node_info with ott_id for tax_info", {
skip_on_cran()
expect_error(tol_node_info(ott_id(tax_info)),
"provide a single")
})
test_that("tol_subtree with ott_id for tax_info", {
skip_on_cran()
expect_error(tol_subtree(ott_id = ott_id(tax_info)),
"provide a single")
})
test_that("tol_mrca with ott_id for tax_info", {
skip_on_cran()
expect_equivalent(list("Euleutheroza" = 317277),
ott_id(tol_mrca(ott_id(tax_info))))
})
test_that("tol_induced_subtree with ott_id for tax_info", {
skip_on_cran()
expect_true(inherits(tol_induced_subtree(ott_id(tax_info)),
"phylo"))
})
test_that("taxonomy_mrca with ott_id for tax_info", {
skip_on_cran()
expect_equivalent(list("Euleutheroza" = 317277),
ott_id(taxonomy_mrca(ott_id(tax_info))))
})
test_that("ott_id subset works", {
skip_on_cran()
expect_true(inherits(ott_id(tax_info), "otl_ott_id"))
expect_true(inherits(ott_id(tax_info)[1], "otl_ott_id"))
expect_true(!is.null(names(ott_id(tax_info))))
})
############################################################################
## taxon subtree ##
############################################################################
test_that("taxonomy subtree raw output", {
skip_on_cran()
tt <- taxonomy_subtree(515698, output_format = "raw")
expect_true(inherits(tt, "list"))
expect_identical(names(tt), "newick")
})
test_that("taxonomy subtree returns warning if file is provided with something else than newick output", {
skip_on_cran()
expect_warning(taxonomy_subtree(515698, output_format = "raw", file = "/foo/bar"),
"ignored")
})
test_that("taxonomy subtree writes a 'valid' newick file", {
skip_on_cran()
ff <- tempfile(fileext = ".tre")
tt <- taxonomy_subtree(515698, output_format = "newick", file = ff)
expect_true(tt)
expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
})
test_that("taxonomy subtree returns a valid newick string", {
skip_on_cran()
tt <- taxonomy_subtree(515698, output_format = "newick")
expect_true(inherits(ape::read.tree(text = tt), "phylo"))
})
test_that("taxonomy subtree returns a valid phylo object", {
skip_on_cran()
tt <- taxonomy_subtree(515698, output_format = "phylo")
expect_true(inherits(tt, "phylo"))
})
test_that("taxonomy subtree returns valid internal node names", {
skip_on_cran()
tt <- taxonomy_subtree(515698, output_format = "taxa")
expect_true(inherits(tt, "list"))
expect_equal(length(tt), 2)
expect_equal(length(tt$tip_label), 14)
expect_equal(length(tt$edge_label), 2)
})
test_that("taxonomy subtree works if taxa has only 1 descendant", {
skip_on_cran()
tt <- taxonomy_subtree(ott_id = 3658331, output_format = "taxa")
expect_true(inherits(tt, "list"))
expect_equal(length(tt), 2)
expect_true(inherits(tt$tip_label, "character"))
})
############################################################################
## taxonomic MRCA ##
############################################################################
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
tax_mrca <- taxonomy_mrca(ott_id = c(515698, 590452, 643717))
tax_mrca_mono <- taxonomy_mrca(ott_id = c(79623, 962377))
}
test_that("taxonomic most recent common ancestor", {
skip_on_cran()
expect_true(inherits(tax_mrca, "taxon_mrca"))
expect_true(inherits(tax_mrca, "list"))
})
test_that("mrca tax_rank method", {
skip_on_cran()
expect_equal(tax_rank(tax_mrca)[1],
list("Asterales" = "order"))
})
test_that("mrca tax_name method", {
skip_on_cran()
expect_equal(tax_name(tax_mrca)[1],
list("Asterales" = "Asterales"))
})
test_that("mrca ott_id method", {
skip_on_cran()
expect_equivalent(ott_id(tax_mrca)[1],
list("Asterales" = 1042120))
expect_true(inherits(ott_id(tax_mrca), "otl_ott_id"))
})
test_that("mrca unique_name method", {
skip_on_cran()
expect_equal(unique_name(tax_mrca)[1],
list("Asterales" = "Asterales"))
expect_true(inherits(unique_name(tax_mrca),
"otl_unique_name"))
})
test_that("mrca tax_sources method", {
skip_on_cran()
expect_equal(tax_sources(tax_mrca)[1],
list("Asterales" =
c("ncbi:4209", "worms:234044",
"gbif:414", "irmng:10011")))
expect_true(inherits(tax_sources(tax_mrca),
"otl_tax_sources"))
})
test_that("mrca is_suppressed method", {
skip_on_cran()
expect_true(inherits(is_suppressed(tax_mrca),
c("otl_is_suppressed", "list")))
expect_equal(is_suppressed(tax_mrca)[1],
list("Asterales" = FALSE))
})
test_that("mrca flags method", {
skip_on_cran()
expect_true(inherits(flags(tax_mrca),
c("otl_flags", "list")))
expect_equal(flags(tax_mrca)[1],
list("Asterales" = NULL))
})
### ott_id() --------------------------------------------------------------------
test_that("taxonomy_taxon_info with ott_id for tax_mrca", {
skip_on_cran()
expect_equivalent(ott_id(tax_mrca_mono),
ott_id(taxonomy_taxon_info(ott_id(tax_mrca_mono))))
})
test_that("taxonomy_subtree with ott_id for tax_mrca", {
skip_on_cran()
tt <- taxonomy_subtree(ott_id = ott_id(tax_mrca_mono))
expect_true(length(tt[["tip_label"]]) > 10)
expect_true(length(tt[["edge_label"]]) > 1)
})
test_that("tol_node_info with ott_id for tax_mrca", {
skip_on_cran()
expect_equivalent(ott_id(tax_mrca_mono),
ott_id(tol_node_info(ott_id(tax_mrca_mono))))
})
test_that("tol_subtree with ott_id for tax_mrca", {
skip_on_cran()
tt <- tol_subtree(ott_id = ott_id(tax_mrca_mono))
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_true(length(tt$node.label) > 1)
})
test_that("tol_mrca with ott_id for tax_mrca", {
skip_on_cran()
expect_equivalent(ott_id(tax_mrca_mono),
ott_id(tol_mrca(ott_id(tax_mrca_mono))))
})
test_that("tol_induced_subtree with ott_id for tax_mrca", {
skip_on_cran()
expect_error(tol_induced_subtree(ott_id(tax_mrca_mono)),
"least two valid")
})
test_that("taxonomy_mrca with ott_id for tax_mrca", {
skip_on_cran()
expect_equivalent(ott_id(tax_mrca_mono),
ott_id(taxonomy_mrca(ott_id(tax_mrca_mono))))
})
test_that("ott_id subset works", {
skip_on_cran()
expect_true(inherits(ott_id(tax_mrca_mono), "otl_ott_id"))
expect_true(inherits(ott_id(tax_mrca_mono)[1], "otl_ott_id"))
expect_true(!is.null(names(ott_id(tax_mrca_mono))))
})
### is_in_tree() ---------------------------------------------------------------
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
spp <- c("Tyrannosaurus rex", "Velociraptor", "Fabaceae", "Solanaceae")
ot_names <- tnrs_match_names(spp)
ot_ids <- ott_id(ot_names)
}
test_that("test is_in_tree", {
skip_on_cran()
in_tree <- is_in_tree(ot_ids)
expect_equal(sum(in_tree), 1)
expect_true(all(names(in_tree) %in% spp))
})
rotl/tests/testthat/test-tnrs.R 0000644 0001775 0000144 00000005412 12705157664 016661 0 ustar deepayan users context("tnrs")
############################################################################
## tnrs_match_names ##
############################################################################
test_that("tnrs_match_names fails if incorrect context is provided", {
skip_on_cran()
expect_error(tnrs_match_names("felis", context_name = "Cats"),
"Check possible values using tnrs_contexts")
})
test_that("tnrs_match_names fails if invalid name provided (nothing returned)", {
skip_on_cran()
expect_error(tnrs_match_names("fluffy", do_approximate_matching = FALSE),
"No matches for any of the provided taxa")
})
test_that("tnrs_match_names warns if a name is not matched", {
skip_on_cran()
expect_warning(tnrs_match_names(c("fluffy", "felis"), do_approximate_matching = FALSE),
"are not matched")
})
test_that("object returned by tnrs_match_names have the correct data type", {
skip_on_cran()
birds <- c("stercorarius parasiticus", "ficedula albicollis", "sterna dougallii")
taxa <- tnrs_match_names(birds, do_approximate_matching = FALSE)
expect_true(is.logical(taxa[["approximate_match"]]))
expect_true(is.logical(taxa[["is_synonym"]]))
})
test_that("tnrs_match_names deals correctly with non-exact matches", {
skip_on_cran()
birds <- c("stercorarius parasiticus", "ficedula albicollis", "sternadougallii")
expect_warning(taxa <- tnrs_match_names(birds, do_approximate_matching = FALSE),
"are not matched")
expect_equal(nrow(taxa), 3L)
expect_equivalent(taxa[match("sternadougallii", taxa[["search_string"]]), ],
list("sternadougallii", NA_character_, NA, NA_character_, NA, NA_character_, NA_character_))
})
## everything else is covered by the match_names + the API tests
############################################################################
## tnrs_contexts ##
############################################################################
test_that("tnrs_contexts", {
skip_on_cran()
tc <- tnrs_contexts()
expect_true(inherits(tc, "tnrs_contexts"))
expect_true(all(names(tc) %in% c("ANIMALS", "MICROBES", "FUNGI", "PLANTS", "LIFE")))
})
############################################################################
## tnrs_infer_context ##
############################################################################
test_that("tnrs_infer_context", {
skip_on_cran()
tic <- tnrs_infer_context(c("Felis", "Leo"))
expect_equal(tic[["context_name"]], "Mammals")
expect_equal(tic[["context_ott_id"]], 244265)
expect_equal(tic[["ambiguous_names"]][[1]], "leo")
})
rotl/tests/testthat/test-deduplicate_labels.R 0000644 0001775 0000144 00000003103 12567651142 021467 0 ustar deepayan users tr_string <- "
((A,A),A 1); ((B.1,B,C),B);
((D,D_1),D.1);
((('A 1','A 1'),A.1),'A 1');
((('A A A','A A A'),A.1),'A 1');
((((A_1:0.1,B__2:0.1)cats:0.1,(A_1:0.1,A_1:0.1)dogs:0.1)mammals:0.1):0.1)fur:0.1;
"
file_dup <- tempfile()
cat(tr_string, file = file_dup, sep = "\n")
############################################################################
## parse_newick ##
############################################################################
context("parse_newick")
test_that("parse newick works correctly", {
prsed_str <- parse_newick(file_dup)
expect_true(is.character(prsed_str))
expect_equal(length(prsed_str), 6L)
})
############################################################################
## deduplicate_labels ##
############################################################################
context("deduplicate_labels")
test_that("deduplicate labels works on made up example", {
expect_warning(dedup_tr <- deduplicate_labels(file_dup),
"Some tip labels were duplicated")
expect_true(file.exists(dedup_tr))
phylo_tr <- rncl::read_newick_phylo(file = dedup_tr)
expect_true(inherits(phylo_tr, "multiPhylo"))
expect_equal(phylo_tr[[6]]$tip.label, c("A_1_1", "B__2", "A_1_2", "A_1"))
})
test_that("deduplicate labels works on a OTL study", {
skip_on_cran()
expect_warning(get_study_tree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name'),
"Some tip labels were duplicated")
})
unlink(file_dup)
rotl/tests/testthat/test-tol.R 0000644 0001775 0000144 00000037775 12770013470 016476 0 ustar deepayan users ############################################################################
## tol_about ##
############################################################################
context("test tol_about (and in turn print.tol_summary)")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
req <- tol_about(include_source_list = TRUE)
}
test_that("Names in object returned are correct/match the docs", {
skip_on_cran()
expect_true(all(names(req) %in%
c("source_list", "date_created", "root", "num_source_trees",
"taxonomy_version", "num_source_studies",
"filtered_flags", "synth_id", "source_id_map")))
expect_true(all(names(req$root) %in%
c("taxon", "num_tips", "node_id")))
expect_true(all(names(req$root$taxon) %in%
c("tax_sources", "name", "unique_name", "rank", "ott_id")))
expect_true(all(names(source_list(req)) %in% c("study_id",
"tree_id",
"git_sha")))
expect_error(source_list(tol_about(include_source_list = FALSE)),
"has been created using")
expect_true(nrow(source_list(req)) > 1)
expect_true(all(grepl("^(ot|pg)", source_list(req)[["study_id"]])))
expect_true(all(grepl("^tr", source_list(req)[["tree_id"]], ignore.case = TRUE)))
})
test_that("tol_node tax_rank method", {
skip_on_cran()
expect_true(inherits(tax_rank(req),
c("otl_rank", "list")))
expect_equal(tax_rank(req)[[1]], "no rank")
})
test_that("tol_node ott_id method", {
skip_on_cran()
expect_true(inherits(ott_id(req),
c("otl_ott_id", "list")))
expect_equal(ott_id(req)[[1]], 93302)
expect_equal(names(ott_id(req)), "cellular organisms")
})
test_that("tol_node tax_sources", {
skip_on_cran()
expect_true(inherits(tax_sources(req),
c("otl_tax_sources", "list")))
expect_true(any(grepl("ncbi", tax_sources(req)[[1]])))
expect_equal(names(tax_sources(req)), "cellular organisms")
})
test_that("tol_node unique_name", {
skip_on_cran()
expect_true(inherits(unique_name(req),
c("otl_unique_name", "list")))
expect_equal(unique_name(req)[[1]], "cellular organisms")
expect_equal(names(unique_name(req)), "cellular organisms")
})
test_that("tol_node tax_name", {
skip_on_cran()
expect_true(inherits(tax_name(req),
c("otl_name", "list")))
expect_equal(tax_name(req)[[1]], "cellular organisms")
expect_equal(names(tax_name(req)), "cellular organisms")
})
### ott_id() --------------------------------------------------------------------
test_that("taxonomy_taxon_info with ott_id for tol_about", {
skip_on_cran()
expect_equal(ott_id(req),
ott_id(taxonomy_taxon_info(ott_id(req))))
})
## can't do that, it's pulling the whole tree
## test_that("taxonomy_subtree with ott_id for tol_about", {
## taxonomy_subtree(ott_id = ott_id(req))
## })
test_that("tol_node_info with ott_id for tol_about", {
skip_on_cran()
expect_equal(ott_id(req),
ott_id(tol_node_info(ott_id(req))))
})
## can't do that, it's pulling the whole tree
## test_that("tol_subtree with ott_id for tol_about", {
## tol_subtree(ott_id = ott_id(req))
## })
test_that("tol_mrca with ott_id for tol_about", {
skip_on_cran()
expect_equal(ott_id(req)[1],
ott_id(tol_mrca(ott_id(req)))[1])
})
test_that("tol_induced_subtree with ott_id for tol_about", {
skip_on_cran()
expect_error(tol_induced_subtree(ott_id(req)),
"least two valid")
})
test_that("taxonomy_mrca with ott_id for tol_about", {
skip_on_cran()
expect_equal(ott_id(req),
ott_id(taxonomy_mrca(ott_id(req))))
})
############################################################################
## tol_subtree ##
############################################################################
context("test tol_subtree")
test_that("tol_subtree fails if ott_id is invalid", {
skip_on_cran()
expect_error(tol_subtree(ott_id = 6666666))
})
test_that("tol_subtree fails if more than one ott_id is provided", {
skip_on_cran()
expect_error(tol_subtree(ott_id = c(666666, 6666667)),
"Please provide a single")
})
test_that("tol_subtree fails if ott_id doesn't look like a number", {
skip_on_cran()
expect_error(tol_subtree(ott_id = "111A1111"),
"must look like numbers")
})
test_that("tol_subtree returns a phylo object by default", {
skip_on_cran()
expect_true(inherits(tol_subtree(ott_id = 81461), "phylo"))
})
test_that("tol_subtree returns a newick file when providing a file argument", {
skip_on_cran()
ff <- tempfile(fileext = ".tre")
tr <- tol_subtree(ott_id = 81461, file = ff)
expect_true(tr)
expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
})
############################################################################
## tol_induced_subtree ##
############################################################################
context("test tol_induced_subtree")
test_that("warning for node ids that are not in TOL graph", {
skip_on_cran()
expect_error(tol_induced_subtree(ott_ids = c(357968, 867416, 939325, 9999999)),
"not found")
})
test_that("error if ott_ids provided don't look like numbers", {
skip_on_cran()
expect_error(tol_induced_subtree(ott_ids = c("13242", "kitten")),
"must look like numbers")
})
## test_that("warning for ott ids not in tree",
## ???)
test_that("tol_induced_subtree generates a newick file when providing a file argument", {
skip_on_cran()
ff <- tempfile(fileext = ".tre")
tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104), file = ff)
expect_true(tr)
expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
})
############################################################################
## tol_mrca ##
############################################################################
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
birds <- tol_mrca(ott_ids = c(412129, 536234))
hol <- tol_mrca(c(431586, 957434))
mono <- tol_mrca(ott_ids = c(962377, 79623))
}
test_that("tol_mrca fails if ott_ids are not numbers", {
skip_on_cran()
expect_error(tol_mrca(ott_ids = c(13243, "a13415")),
"must look like numbers")
})
test_that("tol_mrca returns a list", {
skip_on_cran()
expect_true(inherits(birds, "list"))
expect_true(inherits(birds, "tol_mrca"))
expect_true(all(names(birds) %in%
c("mrca",
"source_id_map",
"nearest_taxon")))
})
test_that("methods for tol_mrca where the node is a taxon", {
skip_on_cran()
expect_true(inherits(tax_sources(hol),
c("otl_tax_sources", "list")))
expect_true(inherits(unique_name(hol),
c("otl_unique_name", "list")))
expect_true(inherits(tax_name(hol),
c("otl_name", "list")))
expect_true(inherits(tax_rank(hol),
c("otl_rank", "list")))
expect_true(inherits(ott_id(hol),
c("otl_ott_id", "list")))
expect_true(length(tax_sources(hol)[[1]]) > 1)
expect_true(any(grepl("worms", tax_sources(hol)[[1]])))
expect_equal(unique_name(hol)[[1]], "Holothuria")
expect_equal(tax_name(hol)[[1]], "Holothuria")
expect_equal(tax_rank(hol)[[1]], "genus")
expect_equal(ott_id(hol)[[1]], 5004030)
expect_equal(names(tax_sources(hol)), "Holothuria")
expect_true(all(names(source_list(hol)) %in% c("tree_id",
"study_id",
"git_sha")))
expect_equal(attr(tax_sources(hol), "taxon_type"), "mrca")
})
test_that("methods for tol_mrca where the node is not a taxon", {
skip_on_cran()
expect_true(inherits(birds, "list"))
expect_true(inherits(tax_sources(birds),
c("otl_tax_sources", "list")))
expect_true(inherits(unique_name(birds),
c("otl_unique_name", "list")))
expect_true(inherits(tax_name(birds),
c("otl_name", "list")))
expect_true(inherits(tax_rank(birds),
c("otl_rank", "list")))
expect_true(inherits(ott_id(birds),
c("otl_ott_id", "list")))
expect_true(length(tax_sources(birds)[[1]]) >= 1)
expect_true(any(grepl("ncbi", tax_sources(birds)[[1]])))
expect_equal(unique_name(birds)[[1]], "Neognathae")
expect_equal(tax_name(birds)[[1]], "Neognathae")
expect_equal(tax_rank(birds)[[1]], "superorder")
expect_equal(ott_id(birds)[[1]], 241846)
expect_equal(names(ott_id(birds)), "Neognathae")
expect_true(all(names(source_list(birds)) %in% c("tree_id",
"study_id",
"git_sha")))
expect_equal(attr(tax_sources(birds), "taxon_type"), "nearest_taxon")
})
### ott_id() --------------------------------------------------------------------
test_that("taxonomy_taxon_info with ott_id for tol_mrca", {
skip_on_cran()
expect_equal(ott_id(mono)[1],
ott_id(taxonomy_taxon_info(ott_id(mono)))[1])
})
test_that("taxonomy_subtree with ott_id for tol_mrca", {
skip_on_cran()
tt <- taxonomy_subtree(ott_id = ott_id(mono))
expect_true(length(tt[["tip_label"]]) > 10)
expect_true(length(tt[["edge_label"]]) > 7)
})
test_that("tol_node_info with ott_id for tol_mrca", {
skip_on_cran()
expect_equal(ott_id(mono)[1],
ott_id(tol_node_info(ott_id(mono)))[1])
})
test_that("tol_subtree with ott_id for tol_mrca", {
skip_on_cran()
tt <- tol_subtree(ott_id = ott_id(mono))
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_true(length(tt$node.label) > 1)
})
test_that("tol_mrca with ott_id for tol_mrca", {
skip_on_cran()
expect_equal(ott_id(mono)[1],
ott_id(tol_mrca(ott_id(mono)))[1])
})
test_that("tol_induced_subtree with ott_id for tol_mrca", {
skip_on_cran()
expect_error(tol_induced_subtree(ott_id(mono)),
"least two valid")
})
test_that("taxonomy_mrca with ott_id for tol_mrca", {
skip_on_cran()
expect_equivalent(ott_id(mono),
ott_id(taxonomy_mrca(ott_id(mono))))
})
############################################################################
## strip_ott_ids ##
############################################################################
test_that("OTT ids can be striped from tip labels to allow taxon-matching", {
skip_on_cran()
genera <- c("Setophaga", "Cinclus", "Struthio")
tr <- tol_induced_subtree(ott_ids=c(666104, 267845, 292466))
expect_true(all(strip_ott_ids(tr$tip.label) %in% genera))
})
############################################################################
## tol_node_info ##
############################################################################
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
tol_info <- tol_node_info(ott_id = 81461)
tol_lin <- tol_node_info(ott_id = 81461, include_lineage = TRUE)
tol_mono <- tol_node_info(ott_id = 962396)
}
test_that("tol node info.", {
skip_on_cran()
expect_true(all(names(tol_info) %in%
c("partial_path_of", "supported_by", "source_id_map", "taxon",
"num_tips", "terminal", "node_id")))
expect_true(inherits(tol_info, "tol_node"))
})
### methods ---------------------------------------------------------------------
test_that("tol_node tax_rank method", {
skip_on_cran()
expect_true(inherits(tax_rank(tol_info),
c("otl_tax_rank", "list")))
expect_equal(tax_rank(tol_info)[[1]], "class")
})
test_that("tol_node ott_id method", {
skip_on_cran()
expect_true(inherits(ott_id(tol_info),
c("otl_ott_id", "list")))
expect_equal(ott_id(tol_info)[[1]], 81461)
expect_equal(names(ott_id(tol_info)), "Aves")
})
test_that("tol_node tax_sources", {
skip_on_cran()
expect_true(inherits(tax_sources(tol_info),
c("otl_tax_sources", "list")))
expect_true(any(grepl("worms", tax_sources(tol_info)[[1]])))
expect_equal(names(tax_sources(tol_info)), "Aves")
})
test_that("tol_node unique_name", {
skip_on_cran()
expect_true(inherits(unique_name(tol_info),
c("otl_unique_name", "list")))
expect_equal(unique_name(tol_info)[[1]], "Aves")
expect_equal(names(unique_name(tol_info)), "Aves")
})
test_that("tol_node tax_name", {
skip_on_cran()
expect_true(inherits(tax_name(tol_info),
c("otl_name", "list")))
expect_equal(tax_name(tol_info)[[1]], "Aves")
expect_equal(names(tax_name(tol_info)), "Aves")
})
test_that("tol_node source_list method", {
skip_on_cran()
expect_true(inherits(source_list(tol_info), "data.frame"))
expect_true(all(names(source_list(tol_info)) %in%
c("study_id", "tree_id", "git_sha")))
})
test_that("tol_node tol_lineage", {
skip_on_cran()
expect_error(tol_lineage(tol_info), "needs to be created")
expect_true(inherits(tol_lineage(tol_lin), "data.frame"))
expect_true(nrow(tol_lineage(tol_lin)) > 1)
expect_true(all(names(tol_lineage(tol_lin)) %in% c("node_id",
"num_tips",
"is_taxon")))
expect_true(all(grepl("^(ott|mrca)", tol_lineage(tol_lin)[["node_id"]])))
})
test_that("tol_node tax_lineage", {
skip_on_cran()
expect_error(tax_lineage(tol_info), "needs to be created")
expect_true(inherits(tax_lineage(tol_lin), "data.frame"))
expect_true(nrow(tax_lineage(tol_lin)) > 1)
expect_true(all(names(tax_lineage(tol_lin)) %in% c("rank",
"name",
"unique_name",
"ott_id")))
expect_true(any(grepl("no rank", tax_lineage(tol_lin)[["rank"]])))
expect_true(any(grepl("cellular organisms", tax_lineage(tol_lin)[["name"]])))
})
### ott_id() --------------------------------------------------------------------
test_that("taxonomy_taxon_info with ott_id for tol_info", {
skip_on_cran()
expect_equivalent(ott_id(tol_mono),
ott_id(taxonomy_taxon_info(ott_id(tol_mono))))
})
test_that("taxonomy_subtree with ott_id for tol_info", {
skip_on_cran()
tt <- taxonomy_subtree(ott_id = ott_id(tol_mono))
expect_true(length(tt[["tip_label"]]) > 10)
expect_true(length(tt[["edge_label"]]) > 7)
})
test_that("tol_node_info with ott_id for tol_info", {
skip_on_cran()
expect_equivalent(ott_id(tol_mono),
ott_id(tol_node_info(ott_id(tol_mono))))
})
test_that("tol_subtree with ott_id for tol_info", {
skip_on_cran()
tt <- tol_subtree(ott_id = ott_id(tol_mono))
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_true(length(tt$node.label) > 1)
})
test_that("tol_mrca with ott_id for tol_info", {
skip_on_cran()
expect_equivalent(ott_id(tol_mono),
ott_id(tol_mrca(ott_id(tol_mono))))
})
test_that("tol_induced_subtree with ott_id for tol_info", {
skip_on_cran()
expect_error(tol_induced_subtree(ott_id(tol_mono)),
"least two valid")
})
test_that("taxonomy_mrca with ott_id for tol_info", {
skip_on_cran()
expect_equivalent(ott_id(tol_mono),
ott_id(taxonomy_mrca(ott_id(tol_mono))))
})
rotl/tests/testthat/test-api-studies.R 0000644 0001775 0000144 00000013504 12674100604 020106 0 ustar deepayan users context("studies API tests")
###########################
## .studies_find_studies ##
###########################
test_that("argument verbose needs to be logical for .studies_find_studies", {
skip_on_cran()
expect_error(.studies_find_studies(NULL, NULL, "123", FALSE),
"logical")
})
test_that("argument exact needs to be logical for .studies_find_studies", {
skip_on_cran()
expect_error(.studies_find_studies(NULL, NULL, TRUE, "123"),
"logical")
})
test_that("argument property needs to be character for .studies_find_studies", {
skip_on_cran()
expect_error(.studies_find_studies(123, NULL, TRUE, TRUE),
"character")
})
test_that("argument value needs to be character for .studies_find_studies", {
skip_on_cran()
expect_error(.studies_find_studies("test", 123, TRUE, TRUE),
"character")
})
test_that("both property & value need to be provided for .studies_find_studies", {
skip_on_cran()
expect_error(.studies_find_studies("test", NULL, TRUE, TRUE),
"Must supply")
})
test_that("both property & value need to be provided for .studies_find_studies", {
skip_on_cran()
expect_error(.studies_find_studies(NULL, "test", TRUE, TRUE),
"Must supply")
})
###########################
## .studies_find_trees ##
###########################
test_that("argument verbose needs to be logical for .studies_find_trees", {
skip_on_cran()
expect_error(.studies_find_trees(NULL, NULL, "123", FALSE),
"logical")
})
test_that("argument exact needs to be logical for .studies_find_trees", {
skip_on_cran()
expect_error(.studies_find_trees(NULL, NULL, TRUE, "123"),
"logical")
})
test_that("argument property needs to be character for .studies_find_trees", {
skip_on_cran()
expect_error(.studies_find_trees(123, NULL, TRUE, TRUE),
"character")
})
test_that("argument value needs to be character for .studies_find_trees", {
skip_on_cran()
expect_error(.studies_find_trees("test", 123, TRUE, TRUE),
"character")
})
test_that("both property & value need to be provided for .studies_find_trees", {
skip_on_cran()
expect_error(.studies_find_trees("test", NULL, TRUE, TRUE),
"Must supply")
})
test_that("both property & value need to be provided for .studies_find_trees", {
skip_on_cran()
expect_error(.studies_find_trees(NULL, "test", TRUE, TRUE),
"Must supply")
})
test_that("exact works as intended", {
skip_on_cran()
expect_equal(length(.studies_find_studies("ot:focalCladeOTTTaxonName",
"felidae", exact = TRUE)$matched_studies), 0)
})
test_that("exact works as intended", {
skip_on_cran()
expect_true(length(.studies_find_studies("ot:focalCladeOTTTaxonName",
"Felidae", exact = TRUE)$matched_studies) >= 1)
})
############################################################################
## .get_study ##
############################################################################
test_that("study_id isn't NULL for .get_study", {
skip_on_cran()
expect_error(.get_study(NULL, "test"),
"Must supply")
})
test_that("study_id is character for .get_study", {
skip_on_cran()
expect_error(.get_study(TRUE, "test"),
"character")
})
############################################################################
## .get_study_tree ##
############################################################################
test_that("study_id isn't NULL for .get_study_tree", {
skip_on_cran()
expect_error(.get_study_tree(NULL, NULL),
"Must supply")
})
test_that("study_id isn't NULL for .get_study_tree", {
skip_on_cran()
expect_error(.get_study_tree("123", NULL),
"Must supply")
})
test_that("study_id isn't NULL for .get_study_tree", {
skip_on_cran()
expect_error(.get_study_tree(NULL, "123"),
"Must supply")
})
test_that("study_id is character for .get_study", {
skip_on_cran()
expect_error(.get_study_tree(TRUE, "test"),
"character")
})
test_that("study_id is character for .get_study", {
skip_on_cran()
expect_error(.get_study_tree("test", TRUE),
"character")
})
############################################################################
## .get_study_subtree ##
############################################################################
test_that("study_id isn't NULL for .get_study_subtree", {
skip_on_cran()
expect_error(.get_study_subtree(NULL, NULL, NULL),
"Must supply")
})
test_that("tree_id isn't NULL for .get_study_subtree", {
skip_on_cran()
expect_error(.get_study_subtree("123", NULL, "123"),
"Must supply")
})
test_that("subtree_id isn't NULL for .get_study_subtree", {
skip_on_cran()
expect_error(.get_study_subtree(NULL, "123", "123"),
"Must supply")
})
test_that("study_id isn't NULL for .get_study_subtree", {
skip_on_cran()
expect_error(.get_study_subtree("123", "123", NULL),
"Must supply")
})
test_that("study_id is character for .get_study", {
skip_on_cran()
expect_error(.get_study_subtree(TRUE, "test", "test"),
"character")
})
test_that("tree_id is character for .get_study", {
skip_on_cran()
expect_error(.get_study_subtree("test", TRUE, "test"),
"character")
})
test_that("subtree_id is character for .get_study", {
skip_on_cran()
expect_error(.get_study_subtree("test", "test", TRUE),
"character")
})
rotl/tests/testthat/test-external.R 0000644 0001775 0000144 00000002434 12707722370 017510 0 ustar deepayan users context("Study external data")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
all_sources <- c("doi", "pubmed_id", "external_data_url", "popset_ids", "nucleotide_ids")
all_data <- study_external_IDs("pg_1940")
}
test_that("We can recover dois, pmids, NCBI IDs", {
skip_on_cran()
expect_that(all_data, is_a("study_external_data"))
expect_named(all_data)
})
test_that("We can handle studies with missing external IDs", {
skip_on_cran()
expect_warning(
missing_data <- study_external_IDs("ot_97"), "skipping NCBI"
)
expect_named(missing_data)
expect_that(missing_data, is_a("study_external_data"))
expect_equal( sum(is.na(match(all_sources, names(missing_data)))), 2) #we really skipped the NCBI
})
test_that("The print functions for external data objects work", {
skip_on_cran()
missing_data <- study_external_IDs("ot_91")
expect_output(print(all_data), "External data identifiers for study")
expect_output(print(missing_data), "External data identifiers for study")
})
context("Taxon external data")
test_that("We can recover external IDs for Open Tree taxa", {
skip_on_cran()
gibbon_IDs <- taxon_external_IDs(712902)
expect_that(gibbon_IDs, is_a("data.frame"))
expect_equal(names(gibbon_IDs), c("source", "id"))
})
rotl/tests/testthat/test-api-tnrs.R 0000644 0001775 0000144 00000004130 12705157664 017424 0 ustar deepayan users context("tnrs API")
############################################################################
## .tnrs_match_names ##
############################################################################
test_that("names argument is provided for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names(NULL, NULL, TRUE, NULL, FALSE),
"must supply")
})
test_that("names argument is character for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names(TRUE, NULL, TRUE, NULL, FALSE),
"character")
})
test_that("names and ids have the same lengths for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names("Felis", NULL, TRUE, c("abc", "def"), FALSE),
"same length")
})
test_that("ids must be character for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names("Felis", NULL, TRUE, TRUE, FALSE),
"character")
})
test_that("do_approximate_matching is logical for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names("Felis", NULL, "true", NULL, FALSE),
"logical")
})
test_that("include_suppressed is logical for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names("Felis", NULL, TRUE, NULL, "true"),
"logical")
})
test_that("context_name is character for .tnrs_match_names", {
skip_on_cran()
expect_error(.tnrs_match_names("Felis", TRUE, TRUE, NULL, FALSE, TRUE),
"character")
})
############################################################################
## .tnrs_infer_context ##
############################################################################
test_that("names is not NULL for .tnrs_infer_context", {
skip_on_cran()
expect_error(.tnrs_infer_context(NULL),
"Must supply")
})
test_that("names is character for .tnrs_infer_context", {
skip_on_cran()
expect_error(.tnrs_infer_context(TRUE),
"character")
})
rotl/tests/testthat/test-match_names.R 0000644 0001775 0000144 00000033476 12774536352 020166 0 ustar deepayan users context("match names")
############################################################################
## check_args_match_names ##
############################################################################
context("check_args_match_names")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia"))
}
test_that("error generated if object provided isn't created by tnrs_match_names",
expect_error(rotl:::check_args_match_names(letters),
"was not created using"))
test_that("error generated if no argument is provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp),
"You must specify")
})
test_that("error generated if row_number and taxon_name are provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, row_number = 1,
taxon_name = "holothuria"),
"must use only one of ")
})
test_that("error generated if row_number and ott_id are provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, row_number = 1,
ott_id = 5004030),
"must use only one of")
})
test_that("error generated if ott_id and taxon_name are provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, taxon_name = "holothuria",
ott_id = 5004030),
"must use only one of")
})
test_that("error generated if row_number is not numeric", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, row_number = TRUE),
"must be a numeric")
})
test_that("error generated if ott_id is not numeric", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, ott_id = TRUE),
"must look like a number")
})
test_that("error generated if taxon_name is not character", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, taxon_name = TRUE),
"must be a character")
})
test_that("error generated if row_number if not one of the row", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, row_number = 10),
"is not a valid row number")
expect_error(rotl:::check_args_match_names(rsp, row_number = 1.5),
"is not a valid row number")
expect_error(rotl:::check_args_match_names(rsp, row_number = 0),
"is not a valid row number")
})
test_that("error generated if invalid taxon_name", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, taxon_name = "echinodermata"),
"Can't find")
expect_error(rotl:::check_args_match_names(rsp, taxon_name = NA_character_),
"Can't find")
})
test_that("error generated if invalid ott id", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, ott_id = 66666),
"Can't find")
})
test_that("error generated if more than 1 value for row_number is provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, row_number = c(1, 2, 3)),
"You must supply a single element")
})
test_that("error generated if more than 1 value for taxon_name is provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, taxon_name = c("holothuria", "diadema")),
"You must supply a single element")
})
test_that("error generated if more than 1 value for ott_id is provided", {
skip_on_cran()
expect_error(rotl:::check_args_match_names(rsp, ott_id = c(5004030, 4930522, 240396)),
"only 1 element should be provided")
})
############################################################################
## inspect.match_names ##
############################################################################
context("inspect.match_names")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia"))
expect_warning(rsp_na <- tnrs_match_names(names = c("diadema", "fluffy",
"hemichordata", "escherichia")))
diadema_ids <- c(4930522, 631176, 643831)
}
test_that("correct data is being returned when asked to lookup by taxon name", {
skip_on_cran()
tt <- inspect(rsp, taxon_name = "diadema")[["ott_id"]]
expect_true(all(tt %in% diadema_ids))
})
test_that("correct data is being returned when asked to lookup by ott_id", {
skip_on_cran()
tt <- inspect(rsp, ott_id = ott_id(rsp)[2])[["ott_id"]]
expect_true(all(tt %in% diadema_ids))
})
test_that("correct data is being returned when asked to lookup by row number", {
skip_on_cran()
tt <- inspect(rsp, row_number = 2)[["ott_id"]]
expect_true(all(tt %in% diadema_ids))
})
## with missing data
test_that("correct data is being returned when asked to lookup by taxon name (with missing data)", {
skip_on_cran()
tt <- inspect(rsp_na, taxon_name = "diadema")[["ott_id"]]
expect_true(all(tt %in% diadema_ids))
expect_true(is.na(inspect(rsp_na, taxon_name = "fluffy")[["ott_id"]]))
})
test_that("correct data is being returned when asked to lookup by ott_id (with missing data)", {
skip_on_cran()
tt <- inspect(rsp_na, ott_id = ott_id(rsp)[2])[["ott_id"]]
expect_true(all(tt %in% diadema_ids))
})
test_that("correct data is being returned when asked to lookup by row number (with missing data)", {
skip_on_cran()
tt <- inspect(rsp_na, row_number = 1)[["ott_id"]]
expect_true(all(tt %in% diadema_ids))
expect_true(is.na(inspect(rsp_na, row_number = 2)[["ott_id"]]))
})
############################################################################
## synonyms.match_names ##
############################################################################
context("list_synonym_match_names")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
tax_rsp <- c("Holothuria", "Diadema", "Fromia")
rsp <- tnrs_match_names(names = tax_rsp)
tax_rsp_na <- c("Holothuria", "Diadema", "fluffy", "Fromia")
expect_warning(rsp_na <- tnrs_match_names(names = tax_rsp_na))
}
test_that("synonyms", {
skip_on_cran()
tt <- synonyms(rsp)
expect_true(inherits(tt, "list"))
expect_equal(names(tt),
c("Holothuria", "Diadema (genus in Nucletmycea)", "Fromia"))
})
test_that("correct synonyms are being returned when asked to look up by taxon name", {
skip_on_cran()
tt <- synonyms(rsp, taxon_name = "holothuria")
expect_true(any(grepl("^Holothuria", names(tt))))
})
test_that("holothuria is present in each element of the list", {
skip_on_cran()
tt <- synonyms(rsp, taxon_name = "holothuria")
expect_true(all(sapply(tt, function(x) any(grepl("holothuria", x, ignore.case = TRUE)))))
expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
})
test_that("correct synonyms are being returned when asked to look up by row number", {
skip_on_cran()
tt <- synonyms(rsp, row_number = 1)
expect_true(any(grepl("^Holothuria", names(tt))))
expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
})
test_that("correct synonyms are being returned when asked to look up by ott id", {
skip_on_cran()
tt <- synonyms(rsp, ott_id = 5004030)
expect_true(any(grepl("^Holothuria", names(tt))))
expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
})
## with missing data
test_that("synonyms", {
skip_on_cran()
tt <- synonyms(rsp_na)
expect_true(inherits(tt, "list"))
expect_equal(names(tt),
c("Holothuria", "Diadema (genus in Nucletmycea)", "Fromia"))
})
test_that("correct synonyms are being returned when asked to look up by taxon name", {
skip_on_cran()
tt <- synonyms(rsp_na, taxon_name = "holothuria")
expect_true(any(grepl("^Holothuria", names(tt))))
expect_true(is.na(synonyms(rsp_na, taxon_name = "fluffy")[[1]]))
})
test_that("correct synonyms are being returned when asked to look up by row number", {
skip_on_cran()
tt <- synonyms(rsp_na, row_number = 1)
expect_true(any(grepl("^Holothuria", names(tt))))
expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
expect_true(is.na(synonyms(rsp_na, row_number = 3)[[1]]))
})
test_that("correct synonyms are being returned when asked to look up by ott id", {
skip_on_cran()
tt <- synonyms(rsp_na, ott_id = 5004030)
expect_true(any(grepl("^Holothuria", names(tt))))
expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
})
############################################################################
## update.match_names ##
############################################################################
context("update.match_names")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia"))
}
test_that("error message if missing both new arguments", {
skip_on_cran()
expect_error(update(rsp, row_number = 1),
"You must specify either")
})
test_that("error message if both new arguments are provided", {
skip_on_cran()
expect_error(update(rsp, row_number = 1,
new_row_number = 1,
new_ott_id = 6666),
"You must use only")
})
test_that("error message if wrong new row number provided", {
skip_on_cran()
expect_error(update(rsp, row_number = 1,
new_row_number = 10),
"is not a valid row number")
expect_error(update(rsp, row_number = 1,
new_row_number = 1.5),
"is not a valid row number")
})
test_that("error message if wrong new ott id provided", {
skip_on_cran()
expect_error(update(rsp, row_number = 1,
new_ott_id = 66666),
"Can't find")
})
test_that("it works correctly when providing a new row number", {
skip_on_cran()
new_rsp <- update(rsp, row_number = 2,
new_row_number = 2)
expect_equal(new_rsp[new_rsp$search_string == "diadema", "ott_id"],
"631176")
})
test_that("it works correctly when providing a new ott id", {
skip_on_cran()
new_rsp <- update(rsp, row_number = 2,
new_ott_id = 631176)
expect_equal(new_rsp[new_rsp$search_string == "diadema", "ott_id"],
"631176")
})
test_that("it produces warning when trying to update with unmatched name", {
skip_on_cran()
expect_warning(new_rsp <- update(rsp_na, row_number = 3, new_row_number = 1))
expect_identical(new_rsp, rsp_na)
})
############################################################################
## flags method ##
############################################################################
context("flags method for class match_names")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
tax_rsp <- c("Tyrannosaurus", "Helicoplacus", "Ctenocystis",
"Holothuria", "Echinoidea")
rsp <- tnrs_match_names(tax_rsp)
}
test_that("flags with no arguments", {
skip_on_cran()
flags_rsp <- flags(rsp)
expect_equal(length(flags_rsp), 5)
expect_equivalent(sapply(flags_rsp, length),
c(1, 3, 2, 0, 0))
})
test_that("flags with row number", {
skip_on_cran()
flags_rsp <- flags(rsp, 1)
expect_true(inherits(flags_rsp, "list"))
expect_equal(length(flags_rsp), 1)
expect_equal(length(flags_rsp[[1]]), 1)
expect_true(inherits(flags_rsp[[1]], "character"))
expect_equal(names(flags_rsp), tax_rsp[1])
})
test_that("flags with taxon name", {
skip_on_cran()
flags_rsp <- flags(rsp, taxon_name = "Tyrannosaurus")
expect_true(inherits(flags_rsp, "list"))
expect_equal(length(flags_rsp), 1)
expect_equal(length(flags_rsp[[1]]), 1)
expect_true(inherits(flags_rsp[[1]], "character"))
expect_equal(names(flags_rsp), tax_rsp[1])
})
test_that("flags with ott id", {
skip_on_cran()
flags_rsp <- flags(rsp, ott_id = 664348)
expect_true(inherits(flags_rsp, "list"))
expect_equal(length(flags_rsp), 1)
expect_equal(length(flags_rsp[[1]]), 1)
expect_true(inherits(flags_rsp[[1]], "character"))
expect_equal(names(flags_rsp), tax_rsp[1])
})
############################################################################
## ott_id method ##
############################################################################
context("ott_id method for class match_names")
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
tax_rsp <- c("Tyrannosaurus", "Helicoplacus", "Ctenocystis",
"Holothuria", "Echinoidea")
rsp <- tnrs_match_names(tax_rsp)
}
test_that("ott_id with no arguments", {
skip_on_cran()
expect_true(inherits(ott_id(rsp), "list"))
expect_true(inherits(ott_id(rsp), "otl_ott_id"))
expect_equal(names(ott_id(rsp)), tax_rsp)
expect_equal(ott_id(rsp)[["Holothuria"]][[1]], 5004030)
})
test_that("ott_id with row number", {
skip_on_cran()
expect_equal(length(ott_id(rsp, 4)), 1)
expect_true(inherits(ott_id(rsp, 4), "list"))
expect_equivalent(ott_id(rsp, 4)[[1]], 5004030)
})
test_that("ott_id with taxon name", {
skip_on_cran()
expect_equal(length(ott_id(rsp, taxon_name = "Holothuria")), 1)
expect_true(inherits(ott_id(rsp, taxon_name = "Holothuria"), "list"))
expect_equivalent(ott_id(rsp, taxon_name = "Holothuria")[[1]], 5004030)
})
test_that("ott_id with ott id", {
skip_on_cran()
expect_equal(length(ott_id(rsp, ott_id=5004030)), 1)
expect_true(inherits(ott_id(rsp, ott_id=5004030), "list"))
expect_equivalent(ott_id(rsp, ott_id=5004030)[[1]], 5004030)
})
rotl/tests/testthat/test-api-tol.R 0000644 0001775 0000144 00000004441 12707503326 017231 0 ustar deepayan users context("Tree of Life API")
############################################################################
## .tol_about ##
############################################################################
test_that("include_source_list is logical for .tol_about", {
skip_on_cran()
expect_error(.tol_about("true"),
"logical")
})
############################################################################
## .tol_mrca ##
############################################################################
test_that("neither ott_ids nor node_ids are NULL for .tol_mrca", {
skip_on_cran()
expect_error(.tol_mrca(NULL),
"Must provide")
})
############################################################################
## .tol_subtree ##
############################################################################
test_that("ott_id is not NULL", {
skip_on_cran()
expect_error(.tol_subtree(ott_id = NULL, node_id = NULL),
"Must provide")
})
############################################################################
## .tol_induced_subtree ##
############################################################################
test_that("ott_ids is not NULL", {
skip_on_cran()
expect_error(.tol_induced_subtree(ott_ids = NULL),
"Must provide")
})
test_that("NAs are not accepted for ott_ids", {
skip_on_cran()
expect_error(.tol_induced_subtree(ott_ids = c(123, NA, 456)),
"NAs are not allowed")
})
####################
## .tol_node_info ##
####################
test_that("include_lineage must be logical with .tol_node_info", {
skip_on_cran()
expect_error(.tol_node_info(ott_id = "ott_123", include_lineage = "123"),
"logical")
})
test_that("ott_id must be a numeric with .tol_node_info", {
skip_on_cran()
expect_error(.tol_node_info(ott_id = "test"),
"look like numbers")
})
test_that("node_id must be a character with .tol_node_info", {
skip_on_cran()
expect_error(.tol_node_info(node_id = 123),
"must look like")
})
rotl/tests/testthat/test-API.R 0000644 0001775 0000144 00000014103 12705157664 016301 0 ustar deepayan users ####
## Making use of the shared OpenTree testing architecture
####
## The R, Python and Ruby wrappers for the Open Tree share a very similar design,
## allowing them to make use of a single test suite for the low-level functions
## (thus, the tests both checkvan individual library works as expected, and that
## the different libraries stay in line with each other).
##
## This file pulls the current version of the test from a github repo
## (https://github.com/OpenTreeOfLife/shared-api-tests) and translates the json
## files into tests that run in testthat. This takes a considerable amount of
## infrastructure so I'll briefly described the rational here.
##
## The JSON test-specificaton is defined at the github repo linked above, to
## translate these tests I have created custom testthat expectation-functionals
## (contains(), (key_has_value()... ). Because many of the test blocks in the
## JSON files have multiple expectiatoins (i.e. many key-value pairs for
## test_equals) there are functions starting with `test_` that run an entire
## test block for a given expectation. Since many of these tests require
## translation between R-objects and JSON encoded strings there is a set of
## convienence functions to automate that step and a function "test_map" that
## returns the appropriate test_* function for r given JSON test block.
##
## Finally, testthat_json_test uses the above functions to runs an entire test
## from a JSON object, and run_shared_tests() runs every tests in a JSON file.
#functionals that start with a response
contains <- function(key_name){
function(x){
expectation(key_name %in% names(x), sprintf("Missing key name: %s", key_name))
}
}
key_has_value <- function(key, value){
function(x){
if(length(value) == 0){
expectation(length(x[[key]]) == 0,
paste("Key", key, "is not empty"))
}
else if(length(value)==1){
expectation(x[[key]] == value,
paste("Key", key, "doesn't have value", value))
}
else{
expectation(all(x[[key]] %in% value),
paste("Key", key, "doesn't contain all of", value))
}
}
}
value_is_longer_than <- function(key, len){
function(x){
expectation(length(x[[key]]) > len,
paste("Value for key", key, "is shorter than", len))
}
}
value_is_error <- function(key_name){
function(x){
expectation(x[[key_name]] == 'error',
sprintf("Key %s is not 'error'",key_name))
}
}
## Functions to test entire test blocks with the above expectations
test_contains <- function(response, test_block){
key_names <- test_block[,1]
sapply(key_names, function(k) expect_that(response, contains(k)))
}
test_equals <- function(response, test_block){
kv_pairs <- sapply(test_block, "[[", 1)
for(i in 1:length(kv_pairs)){
expect_that(response, key_has_value(kv_pairs[[1]], kv_pairs[[2]]))
}
}
test_of_type <- function(response, test_block){
rtype <- type_map(test_block[[1]])
expect_that(response, is_a(rtype))
}
test_deep_equals <- function(response, test_block){
cat("*")
expect_true(TRUE)
}
test_length_greater_than <- function(response, test_block){
vl_pairs <- sapply(test_block, "[[", 1)
apply(vl_pairs, 2, function(v)
expect_that(response, value_is_longer_than(v[[1]], v[[2]])))
}
test_contains_error <- function(response, test_block){
errs <- test_block[,1]
sapply(errs, function(e) expect_that(reponse, contains_error(e)))
}
##convience functions
obj_map <- function(input){
if(is.character(input) & length(input)==1){
switch(tolower(input),
"true" = TRUE,
"false" = FALSE,
"null" = NULL,
input)
}
else{
input
}
}
json_to_r <- function(test_input){
if(length(test_input) == 0){
return(test_input)
}
return(lapply(test_input, obj_map))
}
type_map <- function(json_type){
switch(json_type,
"dict" = "list",
stop(sprintf("unknown json type in testing file: %s", json_type))
)
}
test_map <- function(test_type){
switch(test_type,
"contains" = test_contains,
"equals" = test_equals,
"deep_equals" = test_deep_equals,
"error" = stop("Error tests should be handled first"),
"length_greater_than" = test_length_greater_than,
"of_type" = test_of_type,
stop(sprintf("Unkown error type in JSON test: %s", test_type))
)
}
make_request <- function(json_test){
test_fxn <- paste0(".", json_test$test_function)
do.call(what=test_fxn, args=json_to_r(json_test$test_input))
}
testthat_json_test <- function(test_obj, test_name){
tests_to_run <- names(test_obj[[test_name]]$tests)
if(length(tests_to_run)==1){
if( grepl("error", tests_to_run)){
expect_error( make_request(test_obj[[test_name]]) )
}
}
else{
response <- make_request(test_obj[[test_name]])
for(i in 1:length(tests_to_run)){
test_block <- test_obj[[test_name]]$tests[[ tests_to_run[i] ]]
test_fxn <- test_map(tests_to_run[i])
test_fxn(response, test_block)
}
}
}
run_shared_test <- function(json_obj){
all_tests <- names(json_obj)
for(i in 1:length(all_tests)) {
test_that(all_tests[i], {
skip_on_cran()
testthat_json_test(json_obj, all_tests[i])
})
}
}
## if (identical(Sys.getenv("NOT_CRAN"), "true")) {
## base_url <- "https://raw.githubusercontent.com/OpenTreeOfLife/shared-api-tests/master/"
## apis <- c("graph_of_life",
## "studies",
## "taxonomy",
## "tree_of_life",
## "tnrs"
## )
## for(i in 1:length(apis)){
## context( paste(apis[i], "API") )
## test_text <- httr::GET(paste0(base_url, apis[i], ".json"))
## test_description <- jsonlite::fromJSON(httr::content(test_text))
## run_shared_test(test_description)
## }
## }
rotl/tests/testthat/test-base.R 0000644 0001775 0000144 00000003406 12705157664 016606 0 ustar deepayan users context("base functions")
test_that("otl_url returns the correct strings", {
skip_on_cran()
expect_match(otl_url(dev = TRUE), "^https://devapi.opentreeoflife.org$")
expect_match(otl_url(dev = FALSE), "^https://api.opentreeoflife.org$")
})
test_that("otl_version", {
skip_on_cran()
expect_equal(otl_version(), "v3")
expect_equal(otl_version("foobar"), "foobar")
})
test_that("otl_ottid_from_label", {
skip_on_cran()
expect_equal(otl_ottid_from_label("flkdjfs_ott314343"),
314343)
})
test_that("errors that would otherwise not get caught in phylo_from_otl", {
expect_error(phylo_from_otl(list(something = "((A, B), C);")),
"Cannot find tree")
expect_error(phylo_from_otl(999), "I don't know how to deal with this format")
})
############################################################################
## check_numeric ##
############################################################################
test_that("check_numeric works on integer", {
expect_true(check_numeric("123"))
expect_true(check_numeric(123))
expect_true(check_numeric(123L))
expect_true(check_numeric(list(123)))
})
test_that("check_numeric fails if there are characters", {
expect_false(check_numeric("A123"))
expect_false(check_numeric("1A23"))
expect_false(check_numeric("123A"))
expect_false(check_numeric("12-3"))
})
test_that("check_numeric fails with more exotic types", {
expect_false(check_numeric(NA))
expect_false(check_numeric(TRUE))
expect_false(check_numeric(1.23))
expect_false(check_numeric(0.9999999999999))
})
test_that("check_numeric fails if more than 1 element provided",
expect_error(check_numeric(c(1, 2))))
rotl/tests/testthat/test-tree_to_labels.R 0000644 0001775 0000144 00000004324 12541354226 020646 0 ustar deepayan users context("test tree_to_labels")
test_that("basic tree 1", {
tree1 <- "((raccon:19.19959,bear:6.80041)InnerNode1:0.84600,((sea_lion:11.99700,seal:12.00300)InnerNode2:7.52973,((monkey:100.85930,cat:47.14069):20.59201,weasel:18.87953):2.09460):3.87382,dog:25.46154);"
res_tree1 <- tree_to_labels(tree1)
expect_equal(res_tree1$tip_label, c("raccon", "bear", "sea_lion", "seal", "monkey", "cat", "weasel", "dog"))
expect_equal(res_tree1$edge_label, c("InnerNode1", "InnerNode2"))
})
test_that("basic tree 2", {
tree2 <- "(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460):0.10;"
res_tree2 <- tree_to_labels(tree2)
expect_equal(res_tree2$tip_label, c("Bovine", "Gibbon", "Orang", "Gorilla", "Chimp", "Human", "Mouse"))
expect_equal(res_tree2$edge_label, character(0))
})
test_that("basic tree 3", {
tree3 <- "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
res_tree3 <- tree_to_labels(tree3)
expect_equal(res_tree3$tip_label, c("Bovine", "Hylobates", "Pongo", "G._Gorilla", "P._paniscus", "H._sapiens", "Rodent"))
expect_equal(res_tree3$edge_label, character(0))
})
test_that("only 1 tip", {
tree_tip <- "A;"
res_tree_tip <- tree_to_labels(tree_tip)
expect_equal(res_tree_tip$tip_label, "A")
expect_equal(res_tree_tip$edge_label, character(0))
})
test_that("only 1 tip with parentheses", {
tree_tip <- "(A);"
res_tree_tip <- tree_to_labels(tree_tip)
expect_equal(res_tree_tip$tip_label, "A")
expect_equal(res_tree_tip$edge_label, character(0))
})
test_that("only 1 tip and 1 internal", {
tree_tip <- "(A)B;"
res_tree_tip <- tree_to_labels(tree_tip)
expect_equal(res_tree_tip$tip_label, "A")
expect_equal(res_tree_tip$edge_label, "B")
})
test_that("tree with singletons", {
tree_sing <- "(((((A)cats,B)dogs,(C,D)ducks)frogs)animals,E)fungi;"
res_tree_sing <- tree_to_labels(tree_sing)
expect_equal(res_tree_sing$tip_label, LETTERS[1:5])
expect_equal(res_tree_sing$edge_label, c("cats", "dogs", "ducks", "frogs", "animals", "fungi"))
})
rotl/tests/testthat/test-studies.R 0000644 0001775 0000144 00000051362 13017051564 017345 0 ustar deepayan users context("test of studies")
############################################################################
## studies_properties ##
############################################################################
test_that("studies_properties is a list with 2 elements (if breaks, need to update documentation)", {
skip_on_cran()
expect_true(all(names(studies_properties() %in% c("tree_properties", "study_properties"))))
})
############################################################################
## get_study ##
############################################################################
test_that("get_study returns an error when asking for a study that doesn't exist", {
skip_on_cran()
expect_error(get_study("tt_666666"))
})
test_that("get_study generates a phylo object", {
skip_on_cran()
tr <- get_study("pg_719", object_format = "phylo")
expect_true(inherits(tr, "multiPhylo"))
expect_equal(length(tr), 3)
expect_true(length(tr[[1]]$tip.label) > 1)
})
test_that("get_study returns an error if file is specied but file_format is not", {
skip_on_cran()
expect_error(get_study("pg_719", file = "test"),
"must be specified")
})
test_that("get_study generates a nexml object", {
skip_on_cran()
tr <- get_study("pg_719", object_format = "nexml")
expect_true(inherits(tr, "nexml"))
})
test_that("get_study generates a newick file", {
skip_on_cran()
ff <- tempfile()
tr <- get_study("pg_719", file_format = "newick", file = ff)
expect_true(tr)
expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study generates a nexus file", {
skip_on_cran()
ff <- tempfile()
tr <- get_study("pg_719", file_format = "nexus", file = ff)
expect_true(tr)
expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study generates a nexml file", {
skip_on_cran()
ff <- tempfile()
tr <- get_study("pg_719", file_format = "nexml", file = ff)
expect_true(tr)
expect_true(grepl("^<\\?xml", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study generates a json file", {
skip_on_cran()
ff <- tempfile()
tr <- get_study("pg_719", file_format = "json", file = ff)
expect_true(tr)
expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE)))
})
############################################################################
## get_study_tree ##
############################################################################
test_that("get_study_tree returns error when tree doesn't exist", {
skip_on_cran()
expect_error(get_study_tree("2655", "tree5555"))
})
test_that("get_study_tree returns error when study doesn't exist", {
skip_on_cran()
expect_error(get_study_tree("5555555", "tree555555"))
})
test_that("get_study_tree generates nexus file", {
skip_on_cran()
ff <- tempfile(fileext = ".nex")
tt <- get_study_tree("pg_1144", "tree2324", file_format = "nexus",
file = ff)
expect_true(tt)
expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study_tree generates newick file", {
skip_on_cran()
ff <- tempfile(fileext = ".tre")
tt <- get_study_tree("pg_1144", "tree2324", file_format = "newick",
file = ff)
expect_true(tt)
expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study_tree generates json file", {
skip_on_cran()
ff <- tempfile(fileext = ".json")
tt <- get_study_tree("pg_1144", "tree2324", file_format = "json",
file = ff)
expect_true(tt)
expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study_tree returns a phylo object", {
skip_on_cran()
tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo")
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
})
### Test types of labels with phylo objects
test_that("get_study_tree returns a phylo object and ott_id for tip labels", {
skip_on_cran()
tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo",
tip_label = "ott_id")
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_true(grepl("^[0-9]+$", tt$tip.label[1]))
})
test_that("get_study_tree returns a phylo object and ott_taxon_names for tip labels", {
skip_on_cran()
tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo",
tip_label = "ott_taxon_name")
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_true(sum(!grepl("^[A-Za-z]+(_[a-z]+)?$", tt$tip.label)) < 3)
})
test_that("get_study_tree returns a phylo object and original labels for tip labels", {
skip_on_cran()
tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo",
tip_label = "original_label")
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_equal(sum(!grepl("^[A-Za-z]+_[a-z]+$", tt$tip.label)), 45)
})
### Test types of labels with files (skipping json for now because there is no good way of doing it)
test_that("get_study_tree returns an error if file is given but file format is not", {
skip_on_cran()
expect_error(get_study_tree(study_id="pg_1144", tree_id="tree2324", file = "test"),
"must be specified")
})
test_that("get_study_tree returns nexus file and ott_id for tip labels", {
skip_on_cran()
ff <- tempfile(fileext = ".nex")
tt <- get_study_tree("pg_1144", "tree2324", file_format = "nexus",
tip_label = "ott_id", file = ff)
expect_true(tt)
tr <- rncl::read_nexus_phylo(ff)
expect_true(length(tr$tip.label) > 1)
expect_true(grepl("^[0-9]+$", tr$tip.label[1]))
})
test_that("get_study_tree returns a phylo object and ott_taxon_names for tip labels", {
skip_on_cran()
ff <- tempfile(fileext = ".tre")
tt <- get_study_tree("pg_1144", "tree2324", file_format = "newick",
tip_label = "ott_taxon_name", file = ff)
expect_true(tt)
tr <- rncl::read_newick_phylo(ff)
expect_true(length(tr$tip.label) > 1)
expect_true(sum(!grepl("^[A-Za-z]+(_[a-z]+)?$", tr$tip.label)) < 3)
})
############################################################################
## get_study_subtree ##
############################################################################
test_that("get_study_subtree returns an error when study_id doesn't exist", {
skip_on_cran()
expect_error(get_study_subtree("pg_55555", "tree55555", subtree_id = "node555555"))
})
test_that("get_study_subtree returns an error when tree_id doesn't exist", {
skip_on_cran()
expect_error(get_study_subtree("pg_1144", "tree55555", subtree_id = "node555555"))
})
test_that("get_study_subtree returns an error when the subtree_id is invalid", {
skip_on_cran()
expect_error(get_study_subtree("pg_1144", "tree2324", "foobar"))
})
test_that("get_study_subtree returns a phylo object", {
skip_on_cran()
tt <- get_study_subtree("pg_420", "tree522", subtree_id = "ingroup",
object_format = "phylo")
sub_tt <- get_study_subtree("pg_420", "tree522", subtree_id = "node208580",
object_format = "phylo")
expect_true(inherits(tt, "phylo"))
expect_true(length(tt$tip.label) > 1)
expect_true(inherits(sub_tt, "phylo"))
expect_true(length(sub_tt$tip.label) > 1)
expect_true(length(tt$tip.label) > length(sub_tt$tip.label))
})
test_that("get_study_subtree fails if file name is given but no file format", {
skip_on_cran()
expect_error(get_study_subtree("pg_420", "tree522", subtree_id = "ingroup",
file = "test"), "must be specified")
})
test_that("get_study_subtree returns a nexus file", {
skip_on_cran()
ff <- tempfile(fileext = ".nex")
tt <- get_study_subtree("pg_420", "tree522", subtree_id = "ingroup",
file_format = "nexus", file = ff)
expect_true(tt)
expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study_subtree returns a newick file", {
skip_on_cran()
ff <- tempfile(fileext = ".tre")
tt <- get_study_subtree("pg_420", "tree522", subtree_id = "ingroup",
file_format = "newick", file = ff)
expect_true(tt)
expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
})
test_that("get_study_subtree can deduplicate labels", {
skip_on_cran()
expect_warning(get_study_subtree(study_id="pg_710", tree_id="tree1277",
tip_label='ott_taxon_name',
subtree_id = "ingroup", deduplicate = TRUE),
"and have been modified")
})
test_that("get_study_subtree fails with duplicate labels", {
skip_on_cran()
expect_error(get_study_subtree(study_id="pg_710", tree_id="tree1277",
tip_label='ott_taxon_name',
subtree_id = "ingroup", deduplicate = FALSE),
"has already been encountered")
})
############################################################################
## get_study_meta ##
############################################################################
if (identical(Sys.getenv("NOT_CRAN"), "true")) {
sm <- get_study_meta("pg_719")
}
test_that("get_study meta returns a study_meta object", {
skip_on_cran()
expect_true(inherits(sm, "study_meta"))
})
test_that("get_tree_ids method for study_meta", {
skip_on_cran()
expect_equal(get_tree_ids(sm), c("tree1294", "tree1295", "tree1296"))
})
test_that("get_publication method for study_meta", {
skip_on_cran()
expect_equal(attr(get_publication(sm), "DOI"), "http://dx.doi.org/10.1600/036364411X605092")
})
test_that("candidate_for_synth method for study_meta", {
skip_on_cran()
expect_true(candidate_for_synth(sm) %in% get_tree_ids(sm))
})
test_that("get_study_year method for study_meta", {
skip_on_cran()
expect_equal(get_study_year(sm), 2011)
})
############################################################################
## tol_about ##
############################################################################
test_that("tol_about returns class tol_summary", {
skip_on_cran()
expect_true(inherits(tol_about(), "tol_summary"))
})
test_that("study_about", {
skip_on_cran()
ta <- source_list(tol_about(TRUE))
expect_true(inherits(ta, "data.frame"))
expect_true(nrow(ta) > 100)
expect_equal(names(ta), c("study_id","tree_id", "git_sha"))
})
############################################################################
## studies_find_studies ##
############################################################################
test_that("single study detailed=TRUE", {
skip_on_cran()
res <- studies_find_studies(property = "ot:studyId",
value = "ot_248", detailed = TRUE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "matched_studies"))
expect_true(all(names(res) %in% c("study_ids", "n_trees", "tree_ids",
"candidate", "study_year", "title",
"study_doi")))
expect_true(nrow(res) >= 1L)
expect_equal(res[["study_ids"]], "ot_248")
expect_equal(res[["n_trees"]], "1")
expect_equal(res[["candidate"]], "Tr76302")
expect_equal(res[["study_year"]], "2014")
expect_equal(res[["study_doi"]], "http://dx.doi.org/10.1016/j.cub.2014.06.060")
expect_equal(res[["title"]], "'Phylogenomic Resolution of the Class Ophiuroidea Unlocks a Global Microfossil Record'")
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
test_that("single study detailed=FALSE", {
skip_on_cran()
res <- studies_find_studies(property = "ot:studyId",
value = "ot_248", detailed = FALSE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "study_ids"))
expect_true(inherits(res, "matched_studies"))
expect_match(attr(res, "found_trees"), "list of the trees associated")
expect_equal(names(res), "study_ids")
expect_equal(res[1, 1], "ot_248")
expect_equal(nrow(res), 1L)
expect_equal(ncol(res), 1L)
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
test_that("multiple studies detailed=TRUE", {
skip_on_cran()
res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName",
value = "mammalia", detailed = TRUE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "matched_studies"))
expect_true(all(names(res) %in% c("study_ids", "n_trees", "tree_ids",
"candidate", "study_year",
"title", "study_doi")))
expect_true(nrow(res) >= 8L)
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
test_that("multiple studies detailed=FALSE", {
skip_on_cran()
res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName",
value = "mammalia", detailed = FALSE)
expect_true(inherits(res, "study_ids"))
expect_true(inherits(res, "matched_studies"))
expect_true(inherits(res, "data.frame"))
expect_equal(ncol(res), 1L)
expect_true(nrow(res) >= 8)
expect_equal(names(res), "study_ids")
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
############################################################################
## studies_find_trees ##
############################################################################
test_that("studies_find_trees single study detailed=FALSE", {
skip_on_cran()
res <- studies_find_trees(property = "ot:studyId",
value = "ot_248", detailed = FALSE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "matched_studies"))
expect_match(attr(res, "found_trees")[[1]], "Tr76302")
expect_equal(names(res), c("study_ids",
"n_matched_trees",
"match_tree_ids"))
expect_equal(res[1, 1], "ot_248")
expect_equal(nrow(res), 1L)
expect_equal(ncol(res), 3L)
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
test_that("studies_find_trees single study detailed=TRUE", {
skip_on_cran()
res <- studies_find_trees(property = "ot:studyId",
value = "ot_248", detailed = TRUE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "matched_studies"))
expect_equal(names(res), c("study_ids", "n_trees",
"tree_ids", "candidate",
"study_year", "title",
"study_doi",
"n_matched_trees",
"match_tree_ids"))
expect_equal(nrow(res), 1L)
expect_equal(res[["study_ids"]], "ot_248")
expect_equal(res[["n_trees"]], "1")
expect_equal(res[["candidate"]], "Tr76302")
expect_equal(res[["study_year"]], "2014")
expect_equal(res[["study_doi"]], "http://dx.doi.org/10.1016/j.cub.2014.06.060")
expect_equal(res[["title"]], "'Phylogenomic Resolution of the Class Ophiuroidea Unlocks a Global Microfossil Record'")
expect_equal(res[["tree_ids"]], "Tr76302")
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
test_that("studies_find_trees multiple studies detailed=TRUE", {
skip_on_cran()
res <- studies_find_trees(property = "ot:ottTaxonName",
value = "Echinodermata", detailed = TRUE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "matched_studies"))
expect_equal(names(res), c("study_ids", "n_trees",
"tree_ids", "candidate",
"study_year", "title",
"study_doi",
"n_matched_trees",
"match_tree_ids"))
expect_true(nrow(res) >= 5L)
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
test_that("studies_find_trees multiple studies detailed=FALSE", {
skip_on_cran()
res <- studies_find_trees(property = "ot:ottTaxonName",
value = "Echinodermata", detailed = FALSE)
expect_true(inherits(res, "data.frame"))
expect_true(inherits(res, "matched_studies"))
expect_equal(names(res), c("study_ids",
"n_matched_trees",
"match_tree_ids"))
expect_true(nrow(res) >= 5L)
expect_true(length(attr(res, "metadata")) > 0)
expect_true(length(attr(res, "found_trees")) > 0)
})
############################################################################
## list_trees ##
############################################################################
test_that("list_trees with studies_find_studies and detailed = FALSE", {
skip_on_cran()
expect_match(list_trees(studies_find_studies(
property = "ot:focalCladeOTTTaxonName",
value = "Aves", detailed = FALSE)),
"If you want to get a list of the trees associated with the studies")
})
test_that("list_trees with studies_find_studies and detailed = TRUE", {
skip_on_cran()
res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName",
value = "mammalia", detailed = TRUE)
expect_true(inherits(list_trees(res), "list"))
expect_true(length(list_trees(res)) >= 8)
expect_true(sum(names(list_trees(res)) %in% c("pg_2647", "ot_308",
"pg_2812", "ot_109",
"pg_2582", "pg_1428",
"ot_755", "pg_2550")) >= 8)
})
test_that("list_trees with studies_find_trees and detailed=FALSE", {
skip_on_cran()
res <- studies_find_trees(property = "ot:ottTaxonName",
value = "Echinodermata", detailed = FALSE)
lt <- list_trees(res)
expect_true(inherits(lt, "list"))
expect_true(length(names(lt)) >= 5L)
expect_true(all(sapply(lt, length) >= 1L))
})
test_that("list_trees with studies_find_trees and detailed=TRUE", {
skip_on_cran()
res <- studies_find_trees(property = "ot:ottTaxonName",
value = "Echinodermata", detailed = TRUE)
lt <- list_trees(res)
expect_true(inherits(lt, "list"))
expect_true(length(names(lt)) >= 5L)
expect_true(all(sapply(lt, length) >= 1L))
})
rotl/tests/tree_of_life.json 0000644 0001775 0000144 00000007056 12705157664 016256 0 ustar deepayan users {
"test_mrca_normal_input": {
"test_function": "tol_mrca",
"test_input": {"ott_ids":[412129, 536234]},
"tests": {
"of_type":
["dict","Response is of wrong type"]
,
"equals": [
[["nearest_taxon_mrca_rank","'superorder'"],"Fails that nearest_taxon_mrca_rank contains superorder"]
],
"contains": [
["nearest_taxon_mrca_ott_id","Doesn't contain nearest_taxon_mrca_ott_id"]
]
}
},
"test_mrca_empty_list_input": {
"test_function": "tol_mrca",
"test_input": {"ott_ids":[]},
"tests": {
"error": [
["ValueError","Return wrong kind of error, or did return error"]
]
}
},
"test_mrca_empty_list_input_two": {
"test_function": "tol_mrca",
"test_input": {"ott_ids":[],
"node_ids":[]},
"tests": {
"error": [
["ValueError","Return wrong kind of error, or did return error"]
]
}
},
"test_mrca_non_existing_node": {
"test_function": "tol_mrca",
"test_input": {"ott_ids":[4259824365942365972436598732]},
"tests": {
"error": [
["OpenTreeService.OpenTreeError","Return wrong kind of error, or did return error"]
]
}
},
"test_mrca_non_existing_empty": {
"test_function": "tol_mrca",
"test_input": "null",
"tests": {
"error": [
["ValueError","Return wrong kind of error, or did return error"]
]
}
},
"test_subtree_demo": {
"test_function": "tol_subtree",
"test_input": {"ott_id":3599390},
"tests": {
"of_type":
["dict","Response is of wrong type"]
,
"contains": [
["newick","Doesn't contain a newick string"]
]
}
},
"test_subtree_null": {
"test_function": "tol_subtree",
"test_input": {},
"tests": {
"error": [
["ValueError","Return wrong kind of error, or did return error"]
],
"of_type": ["jfdsm"]
}
},
"test_induced_tree_good": {
"test_function": "tol_induced_tree",
"test_input": {"ott_ids":[292466, 501678, 267845, 666104, 316878, 102710, 176458]},
"tests": {
"of_type":
["dict","Response is of wrong type"]
,
"contains": [
["subtree","Doesn't contain a subtree string"]
]
}
},
"test_induced_tree_null": {
"test_function": "tol_induced_tree",
"test_input": {},
"tests": {
"error": [
["ValueError","Return wrong kind of error, or did return error"]
],
"of_type": ["something"]
}
},
"test_about": {
"test_function": "tol_about",
"test_input": {},
"tests": {
"contains": [
["root_taxon_name","Output doesn't contain root_taxon_name"],
["num_source_studies","Output doesn't contain num_source_studies"],
["taxonomy_version","Output doesn't contain taxonomy_version"],
["root_ott_id","Output doesn't contain root_ott_id"],
["num_tips","Output doesn't contain num_tips"]
]
}
}
}
rotl/tests/test-all.R 0000644 0001775 0000144 00000000051 12650461700 014561 0 ustar deepayan users ###
library(testthat)
test_check('rotl')
rotl/NAMESPACE 0000644 0001775 0000144 00000006256 13056072255 013007 0 ustar deepayan users # Generated by roxygen2: do not edit by hand
S3method("[",otl_ott_id)
S3method(candidate_for_synth,study_meta)
S3method(flags,match_names)
S3method(flags,taxon_info)
S3method(flags,taxon_mrca)
S3method(get_publication,study_meta)
S3method(get_study_year,study_meta)
S3method(get_tree_ids,study_meta)
S3method(inspect,match_names)
S3method(is_in_tree,otl_ott_id)
S3method(is_suppressed,match_names)
S3method(is_suppressed,taxon_info)
S3method(is_suppressed,taxon_mrca)
S3method(list_trees,matched_studies)
S3method(ott_id,match_names)
S3method(ott_id,taxon_info)
S3method(ott_id,taxon_mrca)
S3method(ott_id,tol_mrca)
S3method(ott_id,tol_node)
S3method(ott_id,tol_summary)
S3method(print,study_external_data)
S3method(print,study_ids)
S3method(print,study_meta)
S3method(print,tnrs_contexts)
S3method(print,tol_mrca)
S3method(print,tol_node)
S3method(print,tol_summary)
S3method(source_list,tol_mrca)
S3method(source_list,tol_node)
S3method(source_list,tol_summary)
S3method(synonyms,match_names)
S3method(synonyms,taxon_info)
S3method(tax_lineage,taxon_info)
S3method(tax_lineage,tol_node)
S3method(tax_name,match_names)
S3method(tax_name,taxon_info)
S3method(tax_name,taxon_mrca)
S3method(tax_name,tol_mrca)
S3method(tax_name,tol_node)
S3method(tax_name,tol_summary)
S3method(tax_rank,match_names)
S3method(tax_rank,taxon_info)
S3method(tax_rank,taxon_mrca)
S3method(tax_rank,tol_mrca)
S3method(tax_rank,tol_node)
S3method(tax_rank,tol_summary)
S3method(tax_sources,match_names)
S3method(tax_sources,taxon_info)
S3method(tax_sources,taxon_mrca)
S3method(tax_sources,tol_mrca)
S3method(tax_sources,tol_node)
S3method(tax_sources,tol_summary)
S3method(tol_lineage,tol_node)
S3method(unique_name,match_names)
S3method(unique_name,taxon_info)
S3method(unique_name,taxon_mrca)
S3method(unique_name,tol_mrca)
S3method(unique_name,tol_node)
S3method(unique_name,tol_summary)
S3method(update,match_names)
export(candidate_for_synth)
export(flags)
export(get_publication)
export(get_study)
export(get_study_meta)
export(get_study_subtree)
export(get_study_tree)
export(get_study_year)
export(get_tree_ids)
export(inspect)
export(is_in_tree)
export(is_suppressed)
export(list_trees)
export(ott_id)
export(source_list)
export(strip_ott_ids)
export(studies_find_studies)
export(studies_find_trees)
export(studies_properties)
export(study_external_IDs)
export(synonyms)
export(tax_lineage)
export(tax_name)
export(tax_rank)
export(tax_sources)
export(taxon_external_IDs)
export(taxonomy_about)
export(taxonomy_mrca)
export(taxonomy_subtree)
export(taxonomy_taxon_info)
export(tnrs_contexts)
export(tnrs_infer_context)
export(tnrs_match_names)
export(tol_about)
export(tol_induced_subtree)
export(tol_lineage)
export(tol_mrca)
export(tol_node_info)
export(tol_subtree)
export(unique_name)
import(ape)
importFrom(assertthat,assert_that)
importFrom(assertthat,is.flag)
importFrom(assertthat,is.string)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(httr,parse_url)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importFrom(jsonlite,unbox)
importFrom(rentrez,entrez_link)
importFrom(rentrez,entrez_search)
importFrom(rncl,read_newick_phylo)
importFrom(stats,na.omit)
importFrom(stats,setNames)
importFrom(stats,update)
rotl/NEWS.md 0000644 0001775 0000144 00000011362 13056075023 012654 0 ustar deepayan users ## rotl 3.0.2
### New features
* The function `get_study_subtree` gains the argument `tip_label` to control the
formatting of the tip labels, #90, reported by @bomeara
* The new function `is_in_tree` takes a list of OTT ids (i.e., the output of
`ott_id()`), and returns a vector of logical indiicating whether they are
included in the synthetic tree (workaround #31).
### Bug fixes
* The function `get_study_subtree` ignored the argument `subtree_id`, #89
reported by @bomeara
### Other chaanges
* `citation("rotl")` now includes the reference to the Open Tree of Life
publication.
* The "How to use rotl?" vignette was updated to document the behavior of v3 of
the OTL API which returns an HTTP error code 400 when the request for induced
subtree includes taxa that are not in the synthetic tree (fix #84)
## rotl 3.0.1
* Fix tests and vignette to reflect changes accompanying release 6.1 of the
synthetic tree
* Add section in vignette "How to use rotl?" about how to get the higher
taxonomy from a given taxon.
* Add `CITATION` file with MEE manuscript information (#82)
## rotl 3.0.0
* `rotl` now interacts with v3.0 of the Open Tree of Life APIs. The
documentation has been updated to reflect the associated changes. More
information about the v3.0 of the Open Tree of Life APIs can be found
[on their wiki](https://github.com/OpenTreeOfLife/germinator/wiki/Open-Tree-of-Life-Web-APIs).
### New features
* New methods: `tax_sources`, `is_suppressed`, `tax_rank`, `unique_name`,
`name`, `ott_id`, for objects returned by `tnrs_match_names()`,
`taxonomy_taxon_info()`, `taxonomy_taxon_mrca()`, `tol_node_info()`,
`tol_about()`, and `tol_mrca()`. Each of these methods have their own class.
* New method `tax_lineage()` to extract the higher taxonomy from an object
returned by `taxonomy_taxon_info()` (initally suggested by Matt Pennell, #57).
* New method `tol_lineage()` to extract the nodes towards the root of the tree.
* New print methods for `tol_node_info()` and `tol_mrca()`.
* New functions `study_external_IDs()` and `taxon_external_IDs()` that return
the external identifiers for a study and associated trees (e.g., DOI, TreeBase
ID); and the identifiers of taxon names in taxonomic databases. The vignette
"Data mashup" includes an example on how to use it.
* The function `strip_ott_id()` gains the argument `remove_underscores` to remove
underscores from tips in trees returned by OTL.
### Changes
* Rename method `ott_taxon_name()` to `tax_name()` for consistency.
* Rename method `synth_sources()` and `study_list()` to `source_list()`.
* Refactor how result of query is checked and parsed (invisible to the user).
### Bug fixes
* Fix bug in `studies_find_studies()`, the arguments `verbose` and `exact` were
ignored.
* The argument `only_current` has been dropped for the methods associated with
objects returned by `tnrs_match_names()`
* The print method for `tnrs_context()` duplicated some names.
* `inspect()`, `update()` and `synonyms()` methods for `tnrs_match_names()` did
not work if the query included unmatched taxa.
## rotl 0.5.0
* New vignette: `meta-analysis`
* Added arguments `include_lineage` and `list_terminal_descendants` to
`taxonomy_taxon()`
* Improve warning and format of the result if one of the taxa requested doesn't
match anything `tnrs_match_names`.
* In the data frame returned by `tnrs_match_names`, the columns
`approximate_match`, `is_synonym` and `is_deprecated` are now `logical`
(instead of `character`) [issue #54]
* New utility function `strip_ott_ids` removes OTT id information from
a character vector, making it easier to match tip labels in trees returned by
`tol_induced_subtree` to taxonomic names in other data sources. This function
can also remove underscores from the taxon names.
* New method `list_trees` returns a list of tree ids associated with
studies. The function takes the output of `studies_find_studies` or
`studies_find_trees`.
* `studies_find_studies` and `studies_find_trees` gain argument `detailed`
(default set to `TRUE`), that produces a data frame summarizing information
(title of the study, year of publication, DOI, ids of associated trees, ...)
about the studies matching the search criteria.
* `get_study_tree` gains argument `deduplicate`. When `TRUE`, if the tree
returned for a given study contains duplicated tip labels, they will be made
unique before being parsed by NCL by appending a suffix (`_1`, `_2`, `_3`,
etc.). (#46, reported by @bomeara)
* New method `get_study_year` for objects of class `study_meta` that returns the
year of publication of the study.
* A more robust approach is used by `get_tree_ids` to identify the tree ids in
the metadata returned by the API
## rotl 0.4.1
* Initial CRAN release on July, 24th 2015
rotl/R/ 0000755 0001775 0000144 00000000000 13056407503 011756 5 ustar deepayan users rotl/R/studies-utils.R 0000644 0001775 0000144 00000005102 12770010460 014707 0 ustar deepayan users ## Unexported function that generates a data frame summarizing the metadata.
## This function is used by both studies_find_studies and studies_find_trees,
## to generate the output when using the argument detailed=TRUE
##' @importFrom stats setNames
summarize_meta <- function(study_ids) {
fill <- function(x) {
if (length(unlist(x))) {
x
} else {
""
}
}
meta_raw <- lapply(study_ids, function(x) get_study_meta(x))
## Extract the metadata
meta <- lapply(meta_raw, function(m) {
c(tree_ids = fill(list(get_tree_ids(m))),
study_year = fill(get_study_year(m)),
publication = fill(get_publication(m)),
doi = fill(attr(get_publication(m), "DOI")),
candidate = fill(list(candidate_for_synth(m)))
)
})
## Convert into a data frame
dat <- lapply(meta, function(m) {
c(n_trees = length(m[["tree_ids"]]),
tree_ids = limit_trees(m[["tree_ids"]]),
candidate = paste(m[["candidate"]], collapse = ", "),
study_year = m[["study_year"]],
title = fill(extract_title(m[["publication"]])),
study_doi = m[["doi"]])
})
dat <- do.call("rbind", dat)
dat <- cbind(study_ids = study_ids, dat)
rownames(dat) <- NULL
dat <- data.frame(dat, stringsAsFactors = FALSE)
## Add list of found trees as attributes
found_trees <- lapply(meta, function(m) {
m[["tree_ids"]]
})
found_trees <- stats::setNames(found_trees, study_ids)
attr(dat, "found_trees") <- found_trees
attr(dat, "metadata") <- meta_raw
dat
}
## Unexported function that attempts to extract title from the
## citation information associated with the study information. The
## function gets the element that follows what looks like a year in
## the string.
## pub_orig: the publication string extracted from the study metadata
## split_char: the character on which the bibliographic elements are
## separated with. (currently only deals with . and ,)
extract_title <- function(pub_orig, split_char = "\\.") {
pub <- unlist(strsplit(pub_orig, split = split_char))
pub <- gsub("^\\s|\\s$", "", pub)
which_year <- grep("^\\d{4}[a-z]?$", pub)
res <- pub[which_year + 1]
if (length(res) > 0)
return(res)
else if (split_char == ",") {
return(character(0))
} else {
extract_title(pub_orig, ",")
}
}
## Unexported function that limit the display of tree_ids to the first
## 5 values.
limit_trees <- function(x) {
if (length(x) > 5)
x <- c(x[1:5], "...")
paste(x, collapse = ", ")
}
rotl/R/tnrs.R 0000644 0001775 0000144 00000023554 13056070101 013066 0 ustar deepayan users
##' Match taxonomic names to the Open Tree Taxonomy.
##'
##' Accepts one or more taxonomic names and returns information about
##' potential matches for these names to known taxa in the Open Tree
##' Taxononmy.
##'
##' This service uses taxonomic contexts to disambiguate homonyms and
##' misspelled names; a context may be specified using the
##' \code{context_name} argument. If no context is specified, then the
##' context will be inferred (i.e., the shallowest taxonomic context
##' that contains all unambiguous names in the input). Taxonomic
##' contexts are uncontested higher taxa that have been selected to
##' allow limits to be applied to the scope of TNRS searches
##' (e.g. 'match names only within flowering plants'). Once a context
##' has been identified (either user-specified or inferred), all taxon
##' name matches will performed only against taxa within that
##' context. For a list of available taxonomic contexts, see
##' \code{\link{tnrs_contexts}}.
##'
##' A name is considered unambiguous if it is not a synonym and has
##' only one exact match to any taxon name in the entire taxonomy.
##'
##' Several functions listed in the \sQuote{See also} section can be
##' used to inspect and manipulate the object generated by this
##' function.
##'
##'
##' @title Match names to the Open Tree Taxonomy
##' @param names taxon names to be queried. Currently limited to
##' 10,000 names for exact matches and 2,500 names for approximate
##' matches (character vector)
##' @param context_name name of the taxonomic context to be searched
##' (length-one character vector). Must match (case sensitive) one
##' of the values returned by \code{\link{tnrs_contexts}}.
##' @param do_approximate_matching A logical indicating whether or not
##' to perform approximate string (a.k.a. \dQuote{fuzzy})
##' matching. Using \code{FALSE} will greatly improve
##' speed. Default, however, is \code{TRUE}.
##' @param ids A vector of ids to use for identifying names. These
##' will be assigned to each name in the names array. If ids is
##' provided, then ids and names must be identical in length.
##' @param include_suppressed Ordinarily, some quasi-taxa, such as
##' incertae sedis buckets and other non-OTUs, are suppressed from
##' TNRS results. If this parameter is true, these quasi-taxa are
##' allowed as possible TNRS results.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A data frame summarizing the results of the query. The
##' original query output is appended as an attribute to the
##' returned object (and can be obtained using \code{attr(object,
##' "original_response")}).
##' @seealso \code{\link{inspect.match_names}},
##' \code{\link{update.match_names}},
##' \code{\link{synonyms.match_names}}.
##' @examples \dontrun{
##' deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha",
##' "chordata", "hemichordata"))
##' }
##' @importFrom stats setNames
##' @export
tnrs_match_names <- function(names = NULL, context_name = NULL,
do_approximate_matching = TRUE, ids = NULL,
include_suppressed = FALSE, ...) {
if (!is.null(context_name) &&
!context_name %in% unlist(tnrs_contexts(...))) {
stop("The ", sQuote("context_name"),
" is not valid. Check possible values using tnrs_contexts()")
}
res <- .tnrs_match_names(names = names, context_name = context_name,
do_approximate_matching = do_approximate_matching,
ids = ids, include_suppressed = include_suppressed,
...)
check_tnrs(res)
summary_match <- build_summary_match(res,
res_id = seq_along(res[["results"]]),
match_id = 1, initial_creation = TRUE)
summary_match$search_string <- gsub("\\\\", "", summary_match$search_string)
summary_match <- summary_match[match(tolower(names),
summary_match$search_string), ]
summary_match[["approximate_match"]] <-
convert_to_logical(summary_match[["approximate_match"]])
summary_match[["is_synonym"]] <-
convert_to_logical(summary_match[["is_synonym"]])
summary_match[["flags"]] <- convert_to_logical(summary_match[["flags"]])
attr(summary_match, "original_order") <- as.numeric(rownames(summary_match))
rownames(summary_match) <- NULL
attr(summary_match, "original_response") <- res
attr(summary_match, "match_id") <- rep(1, nrow(summary_match))
attr(summary_match, "has_original_match") <-
!is.na(summary_match[["number_matches"]])
class(summary_match) <- c("match_names", "data.frame")
summary_match
}
##' @importFrom stats na.omit
convert_to_logical <- function(x) {
if (all(stats::na.omit(x) %in% c("TRUE", "FALSE"))) {
x <- as.logical(x)
} else {
x
}
}
check_tnrs <- function(req) {
if (length(req$results) < 1) {
stop("No matches for any of the provided taxa")
}
if (length(req[["unmatched_names"]]) > 0) {
warning(paste(req$unmatched_names, collapse=", "), " are not matched")
}
}
tnrs_columns <- list(
"search_string" = function(x) x[["search_string"]],
"unique_name" = function(x) .tax_unique_name(x[["taxon"]]),
"approximate_match" = function(x) x[["is_approximate_match"]],
"ott_id" = function(x) .tax_ott_id(x[["taxon"]]),
"is_synonym" = function(x) x[["is_synonym"]],
"flags" = function(x) paste(.tax_flags(x[["taxon"]]), collapse = ", ")
)
summary_row_factory <- function(res, res_id, match_id, columns = tnrs_columns) {
res_address <- res[["results"]][[res_id]][["matches"]][[match_id]]
ret <- sapply(columns, function(f) f(res_address))
n_match <- length(res[["results"]][[res_id]][["matches"]])
c(ret, number_matches = n_match)
}
build_summary_match <- function(res, res_id, match_id = NULL, initial_creation) {
build_empty_row <- function(x) {
no_match_row <- stats::setNames(
rep(NA, length(tnrs_columns) + 1),
c(names(tnrs_columns), "number_matches"))
no_match_row[1] <- x
no_match_row
}
if (length(res_id) > 1 &&
(!is.null(match_id) && length(match_id) > 1)) {
stop("Something is wrong. Please contact us.")
}
build_summary_row <- function(rid) {
if (is.null(match_id)) {
match_id <- seq_len(length(res[["results"]][[rid]][["matches"]]))
}
res <- lapply(match_id, function(mid) {
summary_row_factory(res, rid, mid)
})
if (identical(length(match_id), 1L)) {
unlist(res)
} else res
}
summary_row <- lapply(res_id, build_summary_row)
if (identical(length(res_id), 1L)) {
summary_row <- unlist(summary_row, recursive = FALSE)
}
## Needed if only 1 row returned
if (!inherits(summary_row, "list")) {
summary_row <- list(summary_row)
}
## Add potential unmatched names
if (initial_creation && length(res[["unmatched_names"]])) {
no_match <- lapply(res[["unmatched_names"]], build_empty_row)
summary_row <- c(summary_row, no_match)
}
summary_match <- do.call("rbind", summary_row)
summary_match <- data.frame(summary_match, stringsAsFactors=FALSE)
names(summary_match) <- c(names(tnrs_columns), "number_matches")
summary_match
}
##' This function returns a list of pre-defined taxonomic contexts
##' (i.e. clades) which can be used to limit the scope of tnrs
##' queries.
##'
##' Taxonomic contexts are available to limit the scope of TNRS
##' searches. These contexts correspond to uncontested higher taxa
##' such as 'Animals' or 'Land plants'. This service returns a list
##' containing all available taxonomic context names, which may be
##' used as input (via the \code{context_name} argument in other
##' functions) to limit the search scope of other services including
##' \code{\link{tnrs_match_names}}.
##' @title TNRS contexts
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return Returns invisibly a list for each major clades (e.g.,
##' animals, microbes, plants, fungi, life) whose elements
##' contains the possible contexts.
##' @export
tnrs_contexts <- function(...) {
res <- .tnrs_contexts(...)
class(res) <- "tnrs_contexts"
res
}
##' @export
print.tnrs_contexts <- function(x, ...) {
cat("Possible contexts:\n")
lapply(x, function(t) {
res <- unlist(t)
cat(" ", res[1], "\n")
if (length(res) > 1) {
lapply(seq(2, length(res), by = 5), function(l) {
m <- ifelse(l + 5 <= length(res), l+4, length(res))
cat(" ", paste(res[l:m], collapse = ", "), "\n")
})
}
})
}
##' Return a taxonomic context given a list of taxonomic names
##'
##' Find the least inclusive taxonomic context that includes all the
##' unambiguous names in the input set. Unambiguous names are names
##' with exact matches to non-homonym taxa. Ambiguous names (those
##' without exact matches to non-homonym taxa) are indicated in
##' results.
##'
##' @title Infer the taxonomic context from a list of names
##' @param names Vector of taxon names.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A list including the context name, the context ott id and
##' possibly the names in the query that have an ambiguous
##' taxonomic meaning in the query.
##' @examples
##' \dontrun{
##' res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus"))
##' }
##' @export
tnrs_infer_context <- function(names=NULL, ...) {
res <- .tnrs_infer_context(names = names, ...)
return(res)
}
rotl/R/api-taxonomy.R 0000644 0001775 0000144 00000004564 12707502314 014535 0 ustar deepayan users ##' @importFrom httr content
## Summary information about the OpenTree Taxaonomy (OTT)
.taxonomy_about <- function(...) {
res <- otl_POST(path="/taxonomy/about", body=list(), ...)
res
}
##' @importFrom jsonlite unbox
##' @importFrom httr content
##' @importFrom assertthat is.flag
##' @importFrom assertthat assert_that
## Information about an OpenTree Taxonomy (OTT) taxon
.taxonomy_taxon_info <- function(ott_id=NULL,
include_children = FALSE,
include_lineage = FALSE,
include_terminal_descendants = FALSE,
...) {
ott_id <- check_ott_ids(ott_id)
if (length(ott_id) > 1) {
stop("Must only supply one ", sQuote("ott_id"), " argument")
}
assertthat::assert_that(assertthat::is.flag(include_children))
assertthat::assert_that(assertthat::is.flag(include_lineage))
assertthat::assert_that(assertthat::is.flag(include_terminal_descendants))
q <- list(ott_id=jsonlite::unbox(ott_id),
include_children = jsonlite::unbox(include_children),
include_lineage = jsonlite::unbox(include_lineage),
include_terminal_descendants = jsonlite::unbox(include_terminal_descendants))
res <- otl_POST(path="/taxonomy/taxon_info", body=q, ...)
res
}
##' @importFrom jsonlite unbox
##' @importFrom httr content
## Get a subtree from the OpenTree Taxonomy (OTT) taxonomic tree
.taxonomy_subtree <- function(ott_id=NULL, label_format=NULL, ...) {
ott_id <- check_ott_ids(ott_id)
if (length(ott_id) > 1) {
stop("Must only supply one ", sQuote("ott_id"), " argument")
}
q <- list(ott_id=jsonlite::unbox(ott_id))
if (!is.null(label_format)) {
if (!check_label_format(label_format)) {
stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ",
sQuote("id"), ", or ", sQuote("name_and_id"))
}
q$label_format <- jsonlite::unbox(label_format)
}
res <- otl_POST(path="/taxonomy/subtree", body=q, ...)
res
}
##' @importFrom httr content
## Get the most recent common ancestor (MRCA) from nodes in the OpenTree Taxonomy (OTT)
.taxonomy_mrca <- function (ott_ids = NULL, ...) {
ott_ids <- check_ott_ids(ott_ids)
q <- list(ott_ids=ott_ids)
res <- otl_POST(path="/taxonomy/mrca", body=q, ...)
res
}
rotl/R/api-collections.R 0000644 0001775 0000144 00000001771 12674100604 015171 0 ustar deepayan users ##' @importFrom assertthat assert_that is.flag
## This endpoint currently returns JSON in XML with mime type as text/html
.collection_find_collections <- function(property = NULL, value = NULL,
verbose = FALSE, ...) {
assertthat::assert_that(assertthat::is.flag(verbose))
req_body <- list()
req_body$verbose <- verbose
res <- otl_POST(path = "collections/find_collections",
body = req_body, ...)
res
}
.collection_properties <- function(...) {
req_body <- list()
res <- otl_POST(path = "collections/properties",
body = req_body, ...)
res
}
.get_collection <- function(owner_id = NULL, collection_name = NULL, ...) {
assertthat::assert_that(assertthat::is.string(owner_id))
assertthat::assert_that(assertthat::is.string(collection_name))
req_body <- list()
res <- otl_GET(path = paste("collections", owner_id, collection_name,
sep = "/"), ...)
res
}
rotl/R/is_in_tree.R 0000644 0001775 0000144 00000004004 13056073352 014220 0 ustar deepayan users ##' Some valid taxonomic names do not occur in the Synthetic
##' Tree. This convenience function allows you to check whether a
##' given Open Tree Taxonomy identifier (OTT id) is in the tree. A taxonomic
##' name may not occur in the synthetic tree because (1) it is an
##' extinct or invalid taxon, or (2) it is part of a group that is not
##' monophyletic in the tree.
##'
##' @title Check that OTT ids occur in the Synthetic Tree
##' @param ott_ids a vector of Open Tree Taxonomy identifiers
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A named logical vector. \code{TRUE} indicates that the OTT
##' id is in the synthetic tree, and \code{FALSE} that it is not.
##' @examples
##' \dontrun{
##' plant_families <- c("Asteraceae", "Solanaceae", "Poaceae", "Amaranthaceae",
##' "Zamiaceae", "Araceae", "Juncaceae")
##' matched_names <- tnrs_match_names(plant_families)
##' ## This fails because some ott ids are not in the tree
##' ## plant_tree <- tol_induced_subtree(ott_id(matched_names))
##' ## So let's check which ones are actually in the tree first:
##' in_tree <- is_in_tree(ott_id(matched_names))
##' ## This now works:
##' plant_tree <- tol_induced_subtree(ott_id(matched_names)[in_tree])
##' }
##'
##' @export
is_in_tree <- function(ott_ids, ...) UseMethod("is_in_tree")
##' @export
is_in_tree.otl_ott_id <- function(ott_ids, ...) {
in_tree <- vapply(ott_ids, function(ottid) {
test <- try(tol_node_info(ottid, ...), silent = TRUE)
if (inherits(test, "try-error")) {
if (grepl("not find any synthetic tree nodes corresponding to the OTT id provided", test) &&
grepl(paste0("(", ottid, ")"), test)) {
} else {
warning("something seems off, check your internet connection?")
}
return(FALSE)
} else {
ott_id(test)[[1]] == ottid
}
}, logical(1), USE.NAMES = TRUE)
in_tree
}
rotl/R/tax_utils.R 0000644 0001775 0000144 00000004502 13056112635 014115 0 ustar deepayan users ## all extended-taxon-descriptors have:
## - ott_id
## - name
## - rank
## - unique_name
## - tax_sources
## and they may have
## - flags
## - synonyms
## - is_suppressed
## builds the functions to access the content of the taxon descriptors.
## slot: the name of the list element we need to access
## flatten: if the list element is a list, make it a vector
## optional: is the slot found in all taxon descriptors or only in some
tax_access_factory <- function(slot, flatten, optional) {
function(tax) {
if ((!exists(slot, tax))) {
if (optional) {
warning("This object doesn't have ", sQuote(slot), call. = FALSE)
return(NULL)
} else {
stop("Invalid taxon object", call. = FALSE)
}
} else {
if (flatten) {
unlist(tax[[slot]])
} else {
tax[[slot]]
}
}
}
}
.tax_ott_id <- tax_access_factory("ott_id", flatten = FALSE, optional = FALSE)
.tax_name <- tax_access_factory("name", flatten = FALSE, optional = FALSE)
.tax_rank <- tax_access_factory("rank", flatten = FALSE, optional = FALSE)
.tax_sources <- tax_access_factory("tax_sources", flatten = TRUE,
optional = FALSE)
.tax_unique_name <- tax_access_factory("unique_name", flatten = FALSE,
optional = FALSE)
## optional
.tax_flags <- tax_access_factory("flags", flatten = TRUE, optional = TRUE)
.tax_is_suppressed <- tax_access_factory("is_suppressed", flatten = FALSE,
optional = TRUE)
.tax_synonyms <- tax_access_factory("synonyms", flatten = TRUE, optional = TRUE)
## Does the slot element represent a taxon?
is_taxon <- function(slot) {
if (all(c("ott_id", "name", "rank", "tax_sources",
"unique_name") %in% names(slot))) {
TRUE
} else {
FALSE
}
}
### adds a class to the objects returned by the methods
add_otl_class <- function(res, .f) {
## we need a prefix to avoid class name conflict
## apparently the class "name" already exists
class(res) <- c(paste0("otl_", as.list(environment(.f))[["slot"]]),
class(res))
res
}
##' @export
`[.otl_ott_id` <- function(x, i, ...) {
r <- NextMethod()
class(r) <- class(x)
r
}
rotl/R/methods.R 0000644 0001775 0000144 00000006251 12770014122 013541 0 ustar deepayan users ############################################################################
## methods ##
############################################################################
##' Methods for dealing with objects containing taxonomic information
##' (Taxonomy, TNRS endpoints)
##'
##' This is the page for the generic methods. See the help pages for
##' \code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, and
##' \code{\link{tnrs_match_names}} for more information.
##'
##' @title Methods for Taxonomy
##' @param tax an object returned by \code{\link{taxonomy_taxon_info}},
##' \code{\link{taxonomy_mrca}}, or \code{\link{tnrs_match_names}}
##' @param ... additional arguments (see
##' \code{\link{tnrs_match_names}})
##' @rdname taxonomy-methods
##' @export
tax_rank <- function(tax, ...) { UseMethod("tax_rank") }
##' @export
##' @rdname taxonomy-methods
ott_id <- function(tax, ...) { UseMethod("ott_id") }
##' @export
##' @rdname taxonomy-methods
synonyms <- function(tax, ...) { UseMethod("synonyms") }
##' @export
##' @rdname taxonomy-methods
tax_sources <- function(tax, ...) UseMethod("tax_sources")
##' @export
##' @rdname taxonomy-methods
is_suppressed <- function(tax, ...) UseMethod("is_suppressed")
##' @export
##' @rdname taxonomy-methods
unique_name <- function(tax, ...) UseMethod("unique_name")
##' @export
##' @rdname taxonomy-methods
tax_name <- function(tax, ...) UseMethod("tax_name")
### flags -----------------------------------------------------------------------
##' @export
##' @rdname match_names-methods
flags <- function(tax, ...) UseMethod("flags")
###
##' Retrieve the detailed information for the list of studies used in
##' the Tree of Life.
##'
##' @title List of studies used in the Tree of Life
##'
##' @details This function takes the object resulting from
##' \code{tol_about(study_list = TRUE)}, \code{tol_mrca()},
##' \code{tol_node_info()}, and returns a data frame listing the
##' \code{tree_id}, \code{study_id} and \code{git_sha} for the
##' studies currently included in the Tree of Life.
##'
##' @param tax a list containing a \code{source_id_map} slot.
##' @param ... additional arguments (currently unused)
##'
##' @return a data frame
##' @export
source_list <- function(tax, ...) UseMethod("source_list")
##' Extract the lineage information (higher taxonomy) from an object
##' returned by \code{\link{taxonomy_taxon_info}}.
##'
##' The object passed to this function must have been created using
##' the argument \code{include_lineage=TRUE}.
##'
##' @title Lineage of a taxon
##' @param tax an object created by \code{\link{taxonomy_taxon_info}}
##' using the argument \code{include_lineage=TRUE}.
##' @param ... additional arguments (currently unused).
##' @return A list with one slot per taxon that contains a data frame
##' with 3 columns: the taxonomy rank, the name, and unique name
##' for all taxa included in the lineage of the taxon up to the
##' root of the tree.
##' @rdname tax_lineage
##' @export
tax_lineage <- function(tax, ...) UseMethod("tax_lineage")
##' @export
##' @rdname tol_node_info
tol_lineage <- function(tax, ...) UseMethod("tol_lineage")
rotl/R/api-studies.R 0000644 0001775 0000144 00000013557 13003747366 014351 0 ustar deepayan users ##' @importFrom jsonlite unbox
##' @importFrom httr content
## Return a list of studies from the OpenTree docstore that match a given properties
.studies_find_studies <- function(property = NULL, value = NULL, verbose = FALSE,
exact = FALSE, ...) {
if (!is.logical(verbose)) stop("Argument \'verbose\' should be logical")
if (!is.logical(exact)) stop("Argument \'exact\' should be logical")
req_body <- list()
if (!is.null(property)) {
if (!is.character(property)) {
stop("Argument \'property\' must be of class \"character\"")
}
req_body$property <- jsonlite::unbox(property)
} else {
stop("Must supply a \'property\' argument")
}
if (!is.null(value)) {
if (!is.character(value)) {
stop("Argument \'value\' must be of class \"character\"")
}
req_body$value <- jsonlite::unbox(value)
} else {
stop("Must supply a \'value\' argument")
}
req_body$verbose <- jsonlite::unbox(verbose)
req_body$exact <- jsonlite::unbox(exact)
res <- otl_POST(path="studies/find_studies/",
body=req_body,
...)
res
}
##' @importFrom jsonlite unbox
##' @importFrom httr content
## Return a list of trees from the OpenTree docstore that match a given properties
.studies_find_trees <- function(property=NULL, value=NULL, verbose=FALSE,
exact=FALSE, ...) {
if (!is.logical(verbose)) {
stop("Argument \'verbose\' must be of class \"logical\"")
}
if (!is.logical(exact)) {
stop("Argument \'exact\' must be of class \"logical\"")
}
req_body <- list()
if (!is.null(property)) {
if (!is.character(property)) {
stop("Argument \'property\' must be of class \"character\"")
}
req_body$property <- jsonlite::unbox(property)
} else {
stop("Must supply a \'property\' argument")
}
if (!is.null(value)) {
if (!is.character(value)) {
stop("Argument \'value\' must be of class \"character\"")
}
req_body$value <- jsonlite::unbox(value)
} else {
stop("Must supply a \'value\' argument")
}
res <- otl_POST(path="studies/find_trees/",
body=c(req_body,
jsonlite::unbox(verbose),
jsonlite::unbox(exact)), ...)
res
}
##' @importFrom httr content
## Return a list of properties that can be used to search studies and trees
.studies_properties <- function() {
res <- otl_POST(path="studies/properties/", body=list())
res
}
##' @importFrom httr content
## Get a study from the OpenTree docstore
.get_study <- function(study_id = NULL, format = c("", "nexus", "newick", "nexml", "json"),
...) {
if (is.null(study_id)) {
stop("Must supply a \'study_id\' argument")
} else if (!is.character(study_id)) {
stop("Argument \'study_id\' must be of class \"character\"")
}
format <- match.arg(format)
res <- otl_GET(path=paste("study",
paste0(study_id, otl_formats(format)), sep="/"),
...)
res
}
##' @importFrom httr content
## Get a tree in a study from the OpenTree docstore
.get_study_tree <- function(study_id=NULL, tree_id=NULL, format=c("json", "newick", "nexus"),
tip_label = c("ot:originallabel", "ot:ottid", "ot:otttaxonname"),
...) {
if (is.null(study_id)) {
stop("Must supply a \'study_id\' argument")
} else if (!is.character(study_id)) {
stop("Argument \'study_id\' must be of class \"character\"")
}
if (is.null(tree_id)) {
stop("Must supply a \'tree\' argument")
} else if (!is.character(tree_id)) {
stop("Argument \'tree\' must be of class \"character\"")
}
format <- match.arg(format)
tip_label <- match.arg(tip_label)
tip_label <- paste0("/?tip_label=", tip_label)
tree_file <- paste0(tree_id, otl_formats(format), tip_label)
res <- otl_GET(path=paste("study", study_id, "tree", tree_file, sep="/"), ...)
res
}
##' @importFrom httr content
.get_study_meta <- function(study_id, ...) {
otl_GET(path= paste("study", study_id, "meta", sep="/"), ...)
}
##' @importFrom httr content
.get_study_subtree <- function(study_id, tree_id, subtree_id,
format=c("newick", "nexus", "nexml"),
tip_label = c("ot:originallabel", "ot:ottid", "ot:otttaxonname"),
...) {
if (is.null(study_id)) {
stop("Must supply a \'study_id\' argument")
} else if (!is.character(study_id)) {
stop("Argument \'study_id\' must be of class \"character\"")
}
if (is.null(tree_id)) {
stop("Must supply a \'tree\' argument")
} else if (!is.character(tree_id)) {
stop("Argument \'tree\' must be of class \"character\"")
}
if (is.null(subtree_id)) {
stop("Must supply a \'subtree\' argument")
} else if (!is.character(subtree_id)) {
stop("Argument \'subtree\' must be of class \"character\"")
}
format <- match.arg(format)
format <- otl_formats(format)
tip_label <- match.arg(tip_label)
url_stem <- paste("study", study_id, "subtree", paste0(tree_id, format), sep="/")
res <- otl_GET(path=paste0(url_stem, "?subtree_id=", subtree_id, "&tip_label=", tip_label), ...)
res
}
### Let's not worry about those for now, as their results could be
### obtained using get_study_tree
get_study_otu <- function(study_id, otu=NULL, ...) {
otl_GET(path=paste("study", study_id, "otu", otu, sep="/"), ...)
}
get_study_otus <- function(study_id, otus, ...) {
otl_GET(path=paste("study", study_id, "otu", otus, sep="/"), ...)
}
get_study_otumap <- function(study_id, ...) {
otl_GET(path=paste("study", study_id,"otumap", sep="/"))
}
rotl/R/taxonomy.R 0000644 0001775 0000144 00000025062 12707504372 013770 0 ustar deepayan users ##' Summary information about the Open Tree Taxaonomy (OTT)
##'
##' Return metadata and information about the taxonomy
##' itself. Currently, the available metadata is fairly sparse, but
##' includes (at least) the version, and the location from which the
##' complete taxonomy source files can be downloaded.
##'
##' @title Information about the Open Tree Taxonomy
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A list with the following properties:
##' \itemize{
##'
##' \item {weburl} {String. The release page for this version
##' of the taxonomy.}
##'
##' \item {author} {String. The author string.}
##'
##' \item {name} {String. The name of the taxonomy.}
##'
##' \item {source} {String. The full identifying information for
##' this version of the taxonomy.}
##'
##' \item {version} {String. The version number of the taxonomy.}
##' }
##' @examples
##' \dontrun{
##' taxonomy_about()
##' }
##' @export
taxonomy_about <- function (...) {
res <- .taxonomy_about(...)
return(res)
}
##' Information about taxa.
##'
##' Given a vector of ott ids, \code{taxonomy_taxon_info} returns
##' information about the specified taxa.
##'
##' The functions \code{tax_rank}, \code{tax_name}, and
##' \code{synonyms} can extract this information from an object
##' created by the \code{taxonomy_taxon_info()}.
##'
##' @title Taxon information
##' @param ott_ids the ott ids of the taxon of interest (numeric or
##' character containing only numbers)
##' @param include_children whether to include information about all
##' the children of this taxon. Default \code{FALSE}.
##' @param include_lineage whether to include information about all
##' the higher level taxa that include the \code{ott_ids}.
##' Default \code{FALSE}.
##' @param include_terminal_descendants whether to include the list of
##' terminal \code{ott_ids} contained in the \code{ott_ids}
##' provided.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @param tax an object generated by the \code{taxonomy_taxon_info}
##' function
##' @return \code{taxonomy_taxon_info} returns a list detailing
##' information about the taxa. \code{tax_rank} and
##' \code{tax_name} return a vector. \code{synonyms} returns a
##' list whose elements are the synonyms for each of the
##' \code{ott_id} requested.
##'
##' @seealso \code{\link{tnrs_match_names}} to obtain \code{ott_id}
##' from a taxonomic name.
##' @examples
##' \dontrun{
##' req <- taxonomy_taxon_info(ott_id=515698)
##' tax_rank(req)
##' tax_name(req)
##' synonyms(req)
##' }
##' @export
taxonomy_taxon_info <- function (ott_ids, include_children = FALSE,
include_lineage = FALSE,
include_terminal_descendants = FALSE, ...) {
res <- lapply(ott_ids, function(x) {
.taxonomy_taxon_info(
ott_id = x,
include_children = include_children,
include_lineage = include_lineage,
include_terminal_descendants = include_terminal_descendants,
...
)
})
names(res) <- ott_ids
class(res) <- "taxon_info"
return(res)
}
##' Given an ott id, return the inclusive taxonomic subtree descended
##' from the specified taxon.
##'
##' If the output of this function is exported to a file, the only
##' possible value for the \code{output_format} argument is
##' \dQuote{\code{newick}}. If the file provided already exists, it
##' will be silently overwritten.
##'
##' @title Taxonomy subtree
##' @param ott_id The ott id of the taxon of interest.
##' @param output_format the format of the object to be returned. See
##' the \sQuote{Return} section.
##' @param label_format Character. Defines the label type; one of
##' \dQuote{\code{name}}, \dQuote{\code{id}}, or
##' \dQuote{\code{name_and_id}} (the default).
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @param file the file name where to save the output of the
##' function. Ignored unless \code{output_format} is set to
##' \dQuote{\code{phylo}}.
##' @return If the \code{file} argument is missing: \itemize{
##'
##' \item{\dQuote{\code{taxa}}} { a list of the taxa names
##' (species) in slot \code{tip_label}, and higher-level taxanomy
##' (e.g., families, genera) in slot \code{edge_label}, descending
##' from the taxa corresponding to the \code{ott_id} provided. }
##'
##' \item{\dQuote{\code{newick}}} { a character vector containing
##' the newick formatted string corresponding to the taxonomic
##' subtree for the \code{ott_id} provided. }
##'
##' \item{\dQuote{\code{phylo}}} { an object of the class
##' \code{phylo} from the \code{\link[ape]{ape}} package. }
##'
##' \item{\dQuote{\code{raw}}} { the direct output from the API,
##' i.e., a list with an element named \sQuote{newick} that
##' contains the subtree as a newick formatted string. }
##'
##' }
##'
##' If a \code{file} argument is provided (and
##' \code{output_format} is set to \dQuote{\code{phylo}}), a
##' logical indicating whether the file was successfully created.
##'
##' @examples
##' \dontrun{
##' req <- taxonomy_subtree(ott_id=515698)
##' plot(taxonomy_subtree(ott_id=515698, output_format="phylo"))
##' }
##' @export
taxonomy_subtree <- function (ott_id=NULL,
output_format = c("taxa", "newick", "phylo", "raw"),
label_format=NULL, file, ...) {
output_format <- match.arg(output_format)
res <- .taxonomy_subtree(ott_id = ott_id, label_format = label_format, ...)
if (!missing(file) && !identical(output_format, "newick"))
warning(sQuote("file"),
" argument is ignored, you can only write newick tree strings to a file.")
if (identical(output_format, "raw")) {
return(res)
} else if (identical(output_format, "newick")) {
res <- res$newick
if (!missing(file)) {
unlink(file)
cat(res, file = file)
invisible(return(file.exists(file)))
}
} else if (identical(output_format, "phylo")) {
res <- phylo_from_otl(res)
} else { ## in all other cases use tree_to_labels
res <- tree_to_labels(res)
}
return(res)
}
##' Taxonomic Least Inclusive Common Ancestor (MRCA)
##'
##' Given a set of OTT ids, get the taxon that is the most recent common
##' ancestor (the MRCA) of all the identified taxa.
##'
##' @title Taxonomic MRCA
##' @param ott_ids a vector of ott ids for the taxa whose MRCA is to
##' be found (numeric).
##' @param tax an object generated by the \code{taxonomy_mrca}
##' function
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return \itemize{
##'
##' \item{\code{taxonomy_mrca}} { returns a list about the
##' taxonomic information relating to the MRCA for the ott_ids
##' provided. }
##'
##' \item{\code{tax_rank}} { returns a character vector of the
##' taxonomic rank for the MRCA. }
##'
##' \item{\code{tax_name}} { returns a character vector the
##' Open Tree Taxonomy name for the MRCA. }
##'
##' \item{\code{ott_id}} { returns a numeric vector of the ott id
##' for the MRCA. }
##'
##' }
##' @examples
##' \dontrun{
##' req <- taxonomy_mrca(ott_ids=c(515698,590452,643717))
##' tax_rank(req)
##' tax_name(req)
##' ott_id(req)
##' }
##' @export
taxonomy_mrca <- function (ott_ids=NULL, ...) {
res <- .taxonomy_mrca(ott_ids = ott_ids, ...)
class(res) <- c("taxon_mrca", class(res))
return(res)
}
### methods for taxonomy_taxon_info ---------------------------------------------
taxon_info_method_factory <- function(.f) {
function(tax, ...) {
res <- lapply(tax, .f)
names(res) <- vapply(tax, .tax_unique_name, character(1))
res <- add_otl_class(res, .f)
res
}
}
##' @export
##' @rdname taxonomy_taxon_info
tax_rank.taxon_info <- taxon_info_method_factory(.tax_rank)
##' @export
##' @rdname taxonomy_taxon_info
tax_name.taxon_info <- taxon_info_method_factory(.tax_name)
##' @export
##' @rdname taxonomy_taxon_info
unique_name.taxon_info <- taxon_info_method_factory(.tax_unique_name)
##' @export
##' @rdname taxonomy_taxon_info
synonyms.taxon_info <- taxon_info_method_factory(.tax_synonyms)
##' @export
##' @rdname taxonomy_taxon_info
ott_id.taxon_info <- taxon_info_method_factory(.tax_ott_id)
##' @export
##' @rdname taxonomy_taxon_info
tax_sources.taxon_info <- taxon_info_method_factory(.tax_sources)
##' @export
##' @rdname taxonomy_taxon_info
is_suppressed.taxon_info <- taxon_info_method_factory(.tax_is_suppressed)
##' @export
##' @rdname taxonomy_taxon_info
flags.taxon_info <- taxon_info_method_factory(.tax_flags)
### methods for taxonomy_mrca ---------------------------------------------------
taxon_mrca_method_factory <- function(.f) {
function(tax, ...) {
res <- list(.f(tax[["mrca"]]))
names(res) <- .tax_unique_name(tax[["mrca"]])
res <- add_otl_class(res, .f)
res
}
}
##' @export
##' @rdname taxonomy_mrca
tax_rank.taxon_mrca <- taxon_mrca_method_factory(.tax_rank)
##' @export
##' @rdname taxonomy_mrca
tax_name.taxon_mrca <- taxon_mrca_method_factory(.tax_name)
##' @export
##' @rdname taxonomy_mrca
ott_id.taxon_mrca <- taxon_mrca_method_factory(.tax_ott_id)
##' @export
##' @rdname taxonomy_mrca
unique_name.taxon_mrca <- taxon_mrca_method_factory(.tax_unique_name)
##' @export
##' @rdname taxonomy_mrca
tax_sources.taxon_mrca <- taxon_mrca_method_factory(.tax_sources)
##' @export
##' @rdname taxonomy_mrca
flags.taxon_mrca <- taxon_mrca_method_factory(.tax_flags)
##' @export
##' @rdname taxonomy_mrca
is_suppressed.taxon_mrca <- taxon_mrca_method_factory(.tax_is_suppressed)
### method for extracting higher taxonomy from taxonomy_taxon_info calls -------
get_lineage <- function(tax) {
check_lineage(tax)
lg <- lapply(tax[["lineage"]], build_lineage)
lg <- do.call("rbind", lg)
as.data.frame(lg, stringsAsFactors = FALSE)
}
build_lineage <- function(x) {
c("rank" = .tax_rank(x),
"name" = .tax_name(x),
"unique_name" = .tax_unique_name(x),
"ott_id" = .tax_ott_id(x))
}
check_lineage <- function(tax) {
if (!exists("lineage", tax)) {
stop("The object needs to be created using ",
sQuote("include_lineage=TRUE"))
}
}
##' @export
##' @rdname tax_lineage
tax_lineage.taxon_info <- function(tax, ...) {
lapply(tax, get_lineage)
}
rotl/R/api-tnrs.R 0000644 0001775 0000144 00000004555 12705157664 013661 0 ustar deepayan users ##' @importFrom jsonlite unbox
##' @importFrom httr content
##' @importFrom assertthat is.string is.flag
## Match taxon names
.tnrs_match_names <- function(names=NULL, context_name=NULL,
do_approximate_matching=TRUE,
ids=NULL, include_suppressed=FALSE, ...) {
if (is.null(names)) {
stop("You must supply a ", sQuote("names"), " argument")
} else if (!is.character(names)) {
stop("Argument ", sQuote("names"), " must be of class ",
sQuote("character"))
}
if (!is.null(ids)) {
if (length(ids) != length(names)) {
stop("Arguments ", sQuote("ids"), " and ",
sQuote("names"), " must be of the same length")
} else if (!is.character(ids)) {
stop("Argument ", sQuote("ids"), " must be of class ",
sQuote("character"))
}
}
if (!assertthat::is.flag(do_approximate_matching)) {
stop("Argument ", sQuote("do_approximate_matching"),
" must be of class ",
sQuote("logical"))
}
if (!assertthat::is.flag(include_suppressed)) {
stop("Argument ", sQuote("include_deprecated"), " must be of class ",
sQuote("logical"))
}
if (!is.null(context_name)){
if(!assertthat::is.string(context_name)) {
stop("Argument ", sQuote("context_name"), " must be of class ",
sQuote("character"))
}
context_name <- jsonlite::unbox(context_name)
}
q <- list(names = names, context_name = context_name,
do_approximate_matching = jsonlite::unbox(do_approximate_matching),
ids = ids, include_suppressed = jsonlite::unbox(include_suppressed))
toKeep <- sapply(q, is.null)
q <- q[!toKeep]
res <- otl_POST("tnrs/match_names", body=q, ...)
res
}
##' @importFrom httr content
## Get OpenTree TNRS contexts
.tnrs_contexts <- function(...) {
res <- otl_POST("tnrs/contexts", body=list(), ...)
res
}
## Infer taxonomic context from a set of names
.tnrs_infer_context <- function(names=NULL, ...) {
if (is.null(names)) {
stop("Must supply a \'names\' argument")
} else if (!is.character(names)) {
stop("Argument \'names\' must be of class \"character\"")
}
q <- list(names=names)
res <- otl_POST("tnrs/infer_context", body=q, ...)
res
}
rotl/R/api-tol.R 0000644 0001775 0000144 00000011256 12707503354 013456 0 ustar deepayan users ##' @importFrom jsonlite unbox
##' @importFrom httr content
##' @importFrom assertthat is.flag
## Summary information about the OpenTree Tree of Life
.tol_about <- function(include_source_list=FALSE, ...) {
if (!assertthat::is.flag(include_source_list)) {
stop("Argument ", sQuote("include_ource_list"), " must be of class ", sQuote("logical"))
}
q <- list(include_source_list=jsonlite::unbox(include_source_list))
res <- otl_POST(path="tree_of_life/about", body=q, ...)
res
}
##' @importFrom jsonlite unbox
##' @importFrom httr content
## Get summary information about a node in the OpenTree Tree of Life
.tol_node_info <- function(ott_id=NULL, node_id=NULL, include_lineage=FALSE, ...) {
if (!is.logical(include_lineage)) {
stop("Argument \'include_lineage\' must be of class \"logical\"")
}
if (is.null(ott_id) && is.null(node_id)) {
stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"))
}
if (!is.null(ott_id) && !is.null(node_id)) {
stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"), ", not both.")
}
if (!is.null(ott_id)) {
ott_id <- check_ott_ids(ott_id)
if (length(ott_id) != 1)
stop("Please provide a single ", sQuote("ott_id"), call. = FALSE)
q <- list(ott_id=jsonlite::unbox(ott_id), include_lineage=jsonlite::unbox(include_lineage))
} else {
if (!check_valid_node_id(node_id)) {
stop("Argument ", sQuote("node_id"), " must look like \'ott123\' or \'mrcaott123ott456\'.")
}
q <- list(node_id=jsonlite::unbox(node_id), include_lineage=jsonlite::unbox(include_lineage))
}
res <- otl_POST(path="tree_of_life/node_info", body=q, ...)
res
}
##' @importFrom httr content
## Get the MRCA of a set of nodes
.tol_mrca <- function(ott_ids=NULL, node_ids=NULL, ...) {
if (is.null(ott_ids) && is.null(node_ids)) {
stop("Must provide ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " (or both).")
}
q <- list()
if (!is.null(ott_ids)) {
ott_ids <- check_ott_ids(ott_ids)
q$ott_ids <- ott_ids
}
if (!is.null(node_ids)) {
check_node_ids(node_ids)
q$node_ids <- node_ids
}
res <- otl_POST(path="tree_of_life/mrca", body=q, ...)
res
}
# ignoring 'include_lineage' for subtree below. arguson only
##' @importFrom jsonlite unbox
##' @importFrom httr content
## Get a subtree from the OpenTree Tree of Life
.tol_subtree <- function(ott_id=NULL, node_id=NULL, label_format=NULL, ...) {
if (is.null(ott_id) && is.null(node_id)) {
stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"))
}
if (!is.null(ott_id) && !is.null(node_id)) {
stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"), ", not both.")
}
if (!is.null(ott_id)) {
ott_id <- check_ott_ids(ott_id)
if (length(ott_id) != 1)
stop("Please provide a single ", sQuote("ott_id"))
q <- list(ott_id=jsonlite::unbox(ott_id))
} else {
if (!check_valid_node_id(node_id)) {
stop("Argument ", sQuote("node_id"), " must look like \'ott123\' or \'mrcaott123ott456\'.")
}
q <- list(node_id=jsonlite::unbox(node_id))
}
if (!is.null(label_format)) {
if (!check_label_format(label_format)) {
stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ",
sQuote("id"), ", or ", sQuote("name_and_id"))
}
q$label_format <- jsonlite::unbox(label_format)
}
res <- otl_POST(path="tree_of_life/subtree", body=q, ...)
res
}
##' @importFrom httr content
## Get an induced subtree from the OpenTree Tree of Life from a set of nodes
.tol_induced_subtree <- function(ott_ids=NULL, node_ids=NULL, label_format=NULL, ...) {
if (is.null(ott_ids) && is.null(node_ids)) {
stop("Must provide ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " (or both).")
}
q <- list()
if (!is.null(label_format)) {
if (!check_label_format(label_format)) {
stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ",
sQuote("id"), ", or ", sQuote("name_and_id"))
}
q$label_format <- jsonlite::unbox(label_format)
}
if (!is.null(ott_ids)) {
ott_ids <- check_ott_ids(ott_ids)
q$ott_ids <- ott_ids
}
if (!is.null(node_ids)) {
check_node_ids(node_ids)
q$node_ids <- node_ids
}
if ((length(ott_ids) + length(node_ids)) < 2) {
stop("At least two valid ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " must be provided.")
}
res <- otl_POST("tree_of_life/induced_subtree", body=q, ...)
res
}
rotl/R/match_names.R 0000644 0001775 0000144 00000035003 12707472154 014367 0 ustar deepayan users ## internal function that match the arguments provided to the correct
## row number in the data frame representing the Open Tree Taxonomy
## for a series of matched names.
check_args_match_names <- function(response, row_number, taxon_name, ott_id) {
orig_order <- attr(response, "original_order")
if (is.null(orig_order)) {
stop(sQuote(substitute(response)), " was not created using ",
sQuote("tnrs_match_names"))
}
if (missing(row_number) && missing(taxon_name) && missing(ott_id)) {
stop("You must specify one of ", sQuote("row_number"),
sQuote("taxon_name"), " or ", sQuote("ott_id"))
} else if (!missing(row_number) && missing(taxon_name) && missing(ott_id)) {
if (!is.numeric(row_number))
stop(sQuote("row_number"), " must be a numeric.")
if (!all(row_number %in% orig_order)) {
stop(sQuote("row_number"), " is not a valid row number.")
}
i <- orig_order[row_number]
} else if (missing(row_number) && !missing(taxon_name) && missing(ott_id)) {
if (!is.character(taxon_name))
stop(sQuote("taxon_name"), " must be a character.")
i <- orig_order[match(tolower(taxon_name), response$search_string)]
if (any(is.na(i)))
stop("Can't find ", taxon_name)
} else if (missing(row_number) && missing(taxon_name) && !missing(ott_id)) {
if (!check_numeric(ott_id))
stop(sQuote("ott_id"), " must look like a number.")
i <- orig_order[match(ott_id, response$ott_id)]
if (any(is.na(i))) stop("Can't find ", ott_id)
} else {
stop("You must use only one of ",
sQuote("row_number"),
sQuote("taxon_name"),
" or ", sQuote("ott_id"), ".")
}
if (length(i) > 1)
stop("You must supply a single element for each argument.")
i
}
match_row_number <- function(response, row_number, taxon_name, ott_id) {
## all the checks on the validity of the arguments are taken care
## by check_args_match_names()
if (missing(row_number) && missing(taxon_name) &&
missing(ott_id)) {
stop("You must specify one of ", sQuote("row_number"), " ",
sQuote("taxon_name"), " ", sQuote("ott_id"))
} else if (!missing(row_number) && (missing(taxon_name) && missing(ott_id))) {
i <- row_number
} else if (!missing(taxon_name) && (missing(row_number) && missing(ott_id))) {
i <- match(tolower(taxon_name), response[["search_string"]])
} else if (!missing(ott_id) && (missing(row_number) && missing(taxon_name))) {
i <- match(ott_id, response[["ott_id"]])
} else {
stop("You must use only one of ", sQuote("row_number"),
" ", sQuote("taxon_name"), " ", sQuote("ott_id"))
}
if (length(i) > 1)
stop("You must supply a single element for each argument.")
i
}
##' Taxonomic names may have different meanings in different taxonomic
##' contexts, as the same genus name can be applied to animals and
##' plants for instance. Additionally, the meaning of a taxonomic name
##' may have change throughout its history, and may have referred to a
##' different taxon in the past. In such cases, a given names might
##' have multiple matches in the Open Tree Taxonomy. These functions
##' allow users to inspect (and update) alternative meaning of a given
##' name and its current taxonomic status according to the Open Tree
##' Taxonomy.
##'
##' To inspect alternative taxonomic meanings of a given name, you
##' need to provide the object resulting from a call to the
##' tnrs_match_names function, as well as one of either the row number
##' corresponding to the name in this object, the name itself (as used
##' in the original query), or the ott_id listed for this name.
##'
##' To update one of the name, you also need to provide the row number
##' in which the name to be replaced appear or its ott id.
##'
##' @title Inspect and Update alternative matches for a name returned
##' by tnrs_match_names
##' @param response an object generated by the
##' \code{\link{tnrs_match_names}} function
##' @param row_number the row number corresponding to the name to
##' inspect
##' @param taxon_name the taxon name corresponding to the name to
##' inspect
##' @param ott_id the ott id corresponding to the name to inspect
##' @param ... currently ignored
##' @return a data frame
##' @seealso \code{\link{tnrs_match_names}}
##' @examples
##' \dontrun{
##' matched_names <- tnrs_match_names(c("holothuria", "diadema", "boletus"))
##' inspect(matched_names, taxon_name="diadema")
##' new_matched_names <- update(matched_names, taxon_name="diadema",
##' new_ott_id = 631176)
##' new_matched_names
##' }
##' @export
##' @rdname match_names
inspect.match_names <- function(response, row_number, taxon_name, ott_id, ...) {
i <- check_args_match_names(response, row_number, taxon_name, ott_id)
j <- match_row_number(response, row_number, taxon_name, ott_id)
if (attr(response, "has_original_match")[j]) {
res <- attr(response, "original_response")
summary_match <- build_summary_match(res, res_id = i, match_id = NULL,
initial_creation = FALSE)
} else {
summary_match <- response[j, ]
}
summary_match
}
##' @export
##' @rdname match_names
inspect <- function(response, ...) UseMethod("inspect")
##' @param object an object created by \code{\link{tnrs_match_names}}
##' @param new_row_number the row number in the output of
##' \code{\link{inspect}} to replace the taxa specified by
##' \code{row_number}, \code{taxon_name}, or \code{ott_id}.
##' @param new_ott_id the ott id of the taxon to replace the taxa
##' specified by \code{row_number}, \code{taxon_name}, or
##' \code{ott_id}.
##' @export
##' @rdname match_names
##' @importFrom stats update
update.match_names <- function(object, row_number, taxon_name, ott_id,
new_row_number, new_ott_id, ...) {
response <- object
i <- check_args_match_names(response, row_number, taxon_name, ott_id)
j <- match_row_number(response, row_number, taxon_name, ott_id)
res <- attr(response, "original_response")
if (!attr(response, "has_original_match")[j]) {
warning("There is no match for this name, ",
"so there is nothing to replace it with.")
return(response)
}
tmpRes <- res$results[[i]]
if (missing(row_number)) {
if (!missing(taxon_name)) {
rnb <- match(tolower(taxon_name), response$search_string)
} else if (!missing(ott_id)) {
rnb <- match(ott_id, response$ott_id)
}
} else {
rnb <- row_number
}
if (missing(new_row_number) && missing(new_ott_id)) {
stop("You must specify either ", sQuote("new_row_number"),
" or ", sQuote("new_ott_id"))
} else if (!missing(new_row_number) && missing(new_ott_id)) {
if (! new_row_number %in% seq_len(length(tmpRes$matches)))
stop(sQuote("new_row_number"), " is not a valid row number.")
j <- new_row_number
} else if (missing(new_row_number) && !missing(new_ott_id)) {
all_ott_id <- sapply(lapply(tmpRes[["matches"]],
function(x) x[["taxon"]]),
function(x) .tax_ott_id(x))
j <- match(new_ott_id, all_ott_id)
if (any(is.na(j))) stop("Can't find ", new_ott_id)
} else {
stop("You must use only one of ", sQuote("new_row_number"),
" or ", sQuote("new_ott_id"))
}
if (length(j) > 1) stop("You must supply a single element for each argument")
summ_match <- summary_row_factory(res, res_id = i, match_id = j)
response[rnb, ] <- summ_match
attr(response, "match_id")[rnb] <- j
response
}
## Access the elements for a given match:
## is_synonym, score, nomenclature_code, is_approximate_match, taxon
get_list_element <- function(response, i, list_name) {
list_content <- lapply(response[["results"]][[i]][["matches"]],
function(x) {
x[[list_name]]
})
list_content
}
match_names_method_factory <- function(list_name) {
function(tax, row_number, taxon_name, ott_id, ...) {
response <- tax
res <- attr(response, "original_response")
no_args <- all(c(missing(row_number), missing(taxon_name),
missing(ott_id)))
if (no_args) {
res_i <- attr(response, "original_order")[attr(response, "has_original_match")]
ret <- lapply(res_i, function(i) {
get_list_element(res, i, list_name)
})
names(ret) <- sapply(res_i, function(i) {
get_list_element(res, i, "matched_name")[[1]]
})
## ret is already in the correct order so we can use a sequence
## to extract the correct element
ret <- mapply(function(x, i) {
ret[[x]][i]
}, seq_along(ret), attr(response, "match_id")[attr(response, "has_original_match")])
if (all(sapply(ret, length) == 1)) {
ret <- unlist(ret, use.names = TRUE)
}
} else {
i <- check_args_match_names(response, row_number, taxon_name, ott_id)
j <- match_row_number(response, row_number, taxon_name, ott_id)
if (attr(response, "has_original_match")[j]) {
ret <- get_list_element(res, i, list_name)[attr(response, "match_id")[j]]
} else {
ret <- list(ott_id = NA_character_,
name = response[["search_string"]][j],
unique_name = NA_character_,
rank = NA_character_,
tax_sources = NA_character_,
flags = NA_character_,
synonyms = NA_character_,
is_suppressed = NA_character_)
ret <- list(ret)
}
}
ret
}
}
match_names_taxon_method_factory <- function(.f) {
function(tax, row_number, taxon_name, ott_id, ...) {
extract_tax_list <- match_names_method_factory("taxon")
tax_info <- extract_tax_list(tax, row_number = row_number,
taxon_name = taxon_name,
ott_id = ott_id)
res <- lapply(tax_info, function(x) .f(x))
names(res) <- vapply(tax_info, function(x) .tax_unique_name(x), character(1))
res <- add_otl_class(res, .f)
res
}
}
##' \code{rotl} provides a collection of functions that allows users
##' to extract relevant information from an object generated by
##' \code{\link{tnrs_match_names}} function.
##'
##' These methods optionally accept one of the arguments
##' \code{row_number}, \code{taxon_name} or \code{ott_id} to retrieve
##' the corresponding information for one of the matches in the object
##' returned by the \code{\link{tnrs_match_names}} function.
##'
##' If these arguments are not provided, these methods can return
##' information for the matches currently listed in the object
##' returned by \code{\link{tnrs_match_names}}.
##'
##' @title \code{ott_id} and \code{flags} for taxonomic names matched
##' by \code{tnrs_match_names}
##' @param tax an object returned by \code{\link{tnrs_match_names}}
##' @param row_number the row number corresponding to the name for
##' which to list the synonyms
##' @param taxon_name the taxon name corresponding to the name for
##' which to list the synonyms
##' @param ott_id the ott id corresponding to the name for which to
##' list the synonyms
##' @param ... currently ignored
##' @return A list of the ott ids or flags for the taxonomic names
##' matched with \code{\link{tnrs_match_names}}, for either one or
##' all the names.
##' @examples
##' \dontrun{
##' rsp <- tnrs_match_names(c("Diadema", "Tyrannosaurus"))
##' rsp$ott_id # ott id for match currently in use
##' ott_id(rsp) # similar as above but elements are named
##'
##' ## flags() is useful for instance to determine if a taxon is extinct
##' flags(rsp, taxon_name="Tyrannosaurus")
##' }
##' @export
##' @rdname match_names-methods
ott_id.match_names <- match_names_taxon_method_factory(.tax_ott_id)
##' @export
##' @rdname match_names-methods
flags.match_names <- match_names_taxon_method_factory(.tax_flags)
##' When querying the Taxonomic Name Resolution Services for a
##' particular taxonomic name, the API returns as possible matches all
##' names that include the queried name as a possible synonym. This
##' function allows you to explore other synonyms for an accepted
##' name, and allows you to determine why the name you queried is
##' returning an accepted synonym.
##'
##' To list synonyms for a given taxonomic name, you need to provide
##' the object resulting from a call to the
##' \code{\link{tnrs_match_names}} function, as well as one of either
##' the row number corresponding to the name in this object, the name
##' itself (as used in the original query), or the ott_id listed for
##' this name. Otherwise, the synonyms for all the currently matched
##' names are returned.
##'
##' @title List the synonyms for a given name
##' @param tax a data frame generated by the
##' \code{\link{tnrs_match_names}} function
##' @param row_number the row number corresponding to the name for
##' which to list the synonyms
##' @param taxon_name the taxon name corresponding to the name for
##' which to list the synonyms
##' @param ott_id the ott id corresponding to the name for which to
##' list the synonyms
##' @param ... currently ignored
##' @return a list whose elements are all synomym names (as vectors of
##' character) for the taxonomic names that match the query (the
##' names of the elements of the list).
##' @examples
##' \dontrun{
##' echino <- tnrs_match_names(c("Diadema", "Acanthaster", "Fromia"))
##' ## These 3 calls are identical
##' synonyms(echino, taxon_name="Acanthaster")
##' synonyms(echino, row_number=2)
##' synonyms(echino, ott_id=337928)
##' }
##' @export
synonyms.match_names <- match_names_taxon_method_factory(.tax_synonyms)
##' @export
tax_sources.match_names <- match_names_taxon_method_factory(.tax_sources)
##' @export
tax_rank.match_names <- match_names_taxon_method_factory(.tax_rank)
##' @export
is_suppressed.match_names <- match_names_taxon_method_factory(.tax_is_suppressed)
##' @export
unique_name.match_names <- match_names_taxon_method_factory(.tax_unique_name)
##' @export
tax_name.match_names <- match_names_taxon_method_factory(.tax_name)
rotl/R/studies.R 0000644 0001775 0000144 00000052641 13003750634 013567 0 ustar deepayan users ##' Return the list of study properties that can be used to search
##' studies and trees used in the synthetic tree.
##'
##' The list returned has 2 elements \code{tree_properties} and
##' \code{studies_properties}. Each of these elements lists additional
##' arguments to customize the API request properties that can be used
##' to search for trees and studies that are contributing to the
##' synthetic tree. The definitions of these properties are available
##' from
##' \url{https://github.com/OpenTreeOfLife/phylesystem-api/wiki/NexSON}
##'
##' @title Properties of the Studies
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A list of the study properties that can be used to find
##' studies and trees that are contributing to the synthetic tree.
##' @seealso \code{\link{studies_find_trees}}
##' @export
##' @examples
##' \dontrun{
##' all_the_properties <- studies_properties()
##' unlist(all_the_properties$tree_properties)
##' }
studies_properties <- function(...) {
res <- .studies_properties(...)
lapply(res, unlist)
}
##' Return the identifiers of studies that match given properties
##'
##' @title Find a Study
##' @param exact Should exact matching be used? (logical, default
##' \code{FALSE})
##' @param property The property to be searched on (character)
##' @param value The property value to be searched on (character)
##' @param detailed If \code{TRUE} (default), the function will return
##' a data frame that summarizes information about the study (see
##' \sQuote{Value}). Otherwise, it only returns the study
##' identifiers.
##' @param verbose Should the output include all metadata (logical
##' default \code{FALSE})
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return If \code{detailed=TRUE}, the function returns a data frame
##' listing the study id (\code{study_ids}), the number of trees
##' associated with this study (\code{n_trees}), the tree ids (at
##' most 5) associated with the studies (\code{tree_ids}), the
##' tree id that is a candidate for the synthetic tree if any
##' (\code{candidate}), the year of publication of the study
##' (\code{study_year}), the title of the publication for the
##' study (\code{title}), and the DOI (Digital Object Identifier)
##' for the study (\code{study_doi}).
##'
##' If \code{detailed=FALSE}, the function returns a data frame
##' with a single column containing the study identifiers.
##' @seealso \code{\link{studies_properties}} which lists properties
##' against which the studies can be
##' searched. \code{\link{list_trees}} that returns a list for all
##' tree ids associated with a study.
##' @export
##' @examples
##' \dontrun{
##' ## To match a study for which the identifier is already known
##' one_study <- studies_find_studies(property="ot:studyId", value="pg_719")
##' list_trees(one_study)
##'
##' ## To find studies pertaining to Mammals
##' mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
##' value="mammalia")
##' ## To extract the tree identifiers for each of the studies
##' list_trees(mammals)
##' ## ... or for a given study
##' list_trees(mammals, "ot_308")
##'
##' ## Just the identifiers without other information about the studies
##' mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
##' value="mammalia", detailed=FALSE)
##' }
studies_find_studies <- function(property=NULL, value=NULL, verbose=FALSE,
exact=FALSE, detailed = TRUE, ...) {
.res <- .studies_find_studies(property = property, value = value,
verbose = verbose, exact = exact, ...)
res <- vapply(.res[["matched_studies"]],
function(x) x[["ot:studyId"]],
character(1))
if (detailed) {
dat <- summarize_meta(res)
} else {
meta_raw <- .res
dat <- data.frame(study_ids = res, stringsAsFactors = FALSE)
attr(dat, "found_trees") <- paste("If you want to get a list of the",
"trees associated with the studies,",
"use", sQuote("detailed = TRUE"))
class(dat) <- c("study_ids", class(dat))
attr(dat, "metadata") <- meta_raw
}
class(dat) <- c("matched_studies", class(dat))
dat
}
##' @export
print.study_ids <- function(x, ...) {
print(format(x), ...)
}
##' Return a list of studies for which trees match a given set of
##' properties
##'
##' The list of possible values to be used as values for the argument
##' \code{property} can be found using the function
##' \code{\link{studies_properties}}.
##'
##' @title Find Trees
##' @param property The property to be searched on (character)
##' @param value The property-value to be searched on (character)
##' @param verbose Should the output include all metadata? (logical,
##' default \code{FALSE})
##' @param exact Should exact matching be used for the value?
##' (logical, default \code{FALSE})
##' @param detailed Should a detailed report be provided? If
##' \code{TRUE} (default), the output will include metadata about
##' the study that include trees matching the property. Otherwise,
##' only information about the trees will be provided.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A data frame that summarizes the trees found (and their
##' associated studies) for the requested criteria. If a study has
##' more than 5 trees, the \code{tree_ids} of the first ones will
##' be shown, followed by \code{...} to indicate that more are
##' present.
##'
##' If \code{detailed=FALSE}, the data frame will include the
##' study ids of the study (\code{study_ids}), the number of trees
##' in this study that match the search criteria
##' (\code{n_matched_trees}), the tree ids that match the search
##' criteria (\code{match_tree_ids}).
##'
##' If \code{detailed=TRUE}, in addition of the fields listed
##' above, the data frame will also contain the total number of
##' trees associated with the study (\code{n_trees}), all the tree
##' ids associated with the study (\code{tree_ids}), the tree id
##' that is a potential candidate for inclusion in the synthetic
##' tree (if any) (\code{candidate}), the year the study was
##' published (\code{study_year}), the title of the study
##' (\code{title}), the DOI for the study (\code{study_doi}).
##'
##' @seealso \code{\link{studies_properties}} which lists properties
##' the studies can be searched on. \code{\link{list_trees}} for
##' listing the trees that match the query.
##' @export
##' @importFrom stats setNames
##' @examples
##' \dontrun{
##' res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
##' detailed=FALSE)
##' ## summary of the trees and associated studies that match this criterion
##' res
##' ## With metadata about the studies (default)
##' res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
##' detailed=TRUE)
##' ## The list of trees for each study that match the search criteria
##' list_trees(res)
##' ## the trees for a given study
##' list_trees(res, study_id = "pg_2769")
##' }
studies_find_trees <- function(property=NULL, value=NULL, verbose=FALSE,
exact=FALSE, detailed = TRUE, ...) {
.res <- .studies_find_trees(property = property, value = value,
verbose = verbose, exact = exact, ...)
study_ids <- vapply(.res[["matched_studies"]],
function(x) x[["ot:studyId"]],
character(1))
n_matched_trees <- vapply(.res[["matched_studies"]],
function(x) length(x[["matched_trees"]]),
numeric(1))
match_tree_ids <- lapply(.res[["matched_studies"]],
function(x) {
sapply(x[["matched_trees"]],
function(y) y[["nexson_id"]])
})
# this one doesn't return all of the treeids. confusing, bc trees are what is wanted
#tree_str <- vapply(match_tree_ids, limit_trees, character(1))
tree_str <- sapply(match_tree_ids, function(x) paste(x, collapse = ", "))
res <- data.frame(study_ids, n_matched_trees, match_tree_ids = tree_str,
stringsAsFactors = FALSE)
if (detailed) {
meta <- summarize_meta(study_ids)
# the next bit seems really slow (JWB)
res <- merge(meta, res)
attr(res, "metadata") <- attr(meta, "metadata")
} else {
attr(res, "metadata") <- .res
}
attr(res, "found_trees") <- stats::setNames(match_tree_ids, study_ids)
class(res) <- c("matched_studies", class(res))
res
}
##' Returns the trees associated with a given study
##'
##' If \code{file_format} is missing, the function returns an object
##' of the class \code{phylo} from the \code{\link[ape]{ape}} package
##' (default), or an object of the class \code{nexml} from the
##' \code{RNeXML} package.
##'
##' Otherwise \code{file_format} can be either \code{newick},
##' \code{nexus}, \code{nexml} or \code{json}, and the function will
##' generate a file of the selected format. In this case, a file name
##' needs to be provided using the argument \code{file}. If a file
##' with the same name already exists, it will be silently
##' overwritten.
##'
##' @title Get all the trees associated with a particular study
##' @param study_id the study ID for the study of interest (character)
##' @param object_format the class of the object the query should
##' return (either \code{phylo} or \code{nexml}). Ignored if
##' \code{file_format} is specified.
##' @param file_format the format of the file to be generated
##' (\code{newick}, \code{nexus}, \code{nexml} or \code{json}).
##' @param file the file name where the output of the function will be
##' saved.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return if \code{file_format} is missing, an object of class
##' \code{phylo} or \code{nexml}, otherwise a logical indicating
##' whether the file was successfully created.
##' @seealso \code{\link{get_study_meta}}
##' @export
##' @importFrom jsonlite toJSON
##' @examples
##' \dontrun{
##' that_one_study <- get_study(study_id="pg_719", object_format="phylo")
##' if (require(RNeXML)) { ## if RNeXML is installed get the object directly
##' nexml_study <- get_study(study_id="pg_719", object_format="nexml")
##' } else { ## otherwise write it to a file
##' get_study(study_id="pg_719", file_format="nexml", file=tempfile(fileext=".nexml"))
##' }
##' }
get_study <- function(study_id = NULL, object_format = c("phylo", "nexml"),
file_format, file, ...) {
object_format <- match.arg(object_format)
if (!missing(file)) {
if (!missing(file_format)) {
file_format <- match.arg(file_format, c("newick", "nexus", "nexml", "json"))
res <- .get_study(study_id, format = file_format)
unlink(file)
if (identical(file_format, "json")) {
cat(jsonlite::toJSON(res), file=file)
} else {
cat(res, file=file)
}
return(invisible(file.exists(file)))
} else {
stop(sQuote("file_format"), " must be specified.")
}
} else if (identical(object_format, "phylo")) {
file_format <- "newick"
res <- .get_study(study_id = study_id, format=file_format, ...)
res <- phylo_from_otl(res)
} else if (identical(object_format, "nexml")) {
file_format <- "nexml"
res <- .get_study(study_id = study_id, format = file_format, ...)
res <- nexml_from_otl(res)
} else stop("Something is very wrong. Contact us.")
res
}
##' Returns a specific tree from within a study
##'
##' @title Study Tree
##' @param study_id the identifier of a study (character)
##' @param tree_id the identifier of a tree within the study
##' @param object_format the class of the object to be returned
##' (default and currently only possible value \code{phylo} from
##' the \code{\link[ape]{ape}} package).
##' @param tip_label the format of the tip
##' labels. \dQuote{\code{original_label}} (default) returns the
##' original labels as provided in the study,
##' \dQuote{\code{ott_id}} labels are replaced by their ott IDs,
##' \dQuote{\code{ott_taxon_name}} labels are replaced by their
##' Open Tree Taxonomy taxon name.
##' @param file_format the format of the file to be generated
##' (\code{newick} default, \code{nexus}, or \code{json}).
##' @param file the file name where the output of the function will be
##' saved.
##' @param deduplicate logical (default \code{TRUE}). If the tree
##' returned by the study contains duplicated taxon names, should they
##' be made unique? It is normally illegal for NEXUS/Newick tree
##' strings to contain duplicated tip names. This is a workaround to
##' circumvent this requirement. If \code{TRUE}, duplicated tip labels
##' will be appended \code{_1}, \code{_2}, etc.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return if \code{file_format} is missing, an object of class
##' \code{phylo}, otherwise a logical indicating whether the file
##' was successfully created.
##' @export
##' @importFrom jsonlite toJSON
##' @examples
##' \dontrun{
##' tree <- get_study_tree(study_id="pg_1144", tree_id="tree2324")
##'
##' ## comparison of the first few tip labels depending on the options used
##' head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="original_label")$tip.label)
##' head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_id")$tip.label)
##' head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_taxon_name")$tip.label)
##' }
get_study_tree <- function(study_id = NULL, tree_id = NULL, object_format = c("phylo"),
tip_label = c("original_label", "ott_id", "ott_taxon_name"),
file_format, file, deduplicate = TRUE, ...) {
object_format <- match.arg(object_format)
tip_label <- match.arg(tip_label)
tip_label <- switch(tip_label,
original_labels = "ot:originallabel",
ott_id = "ot:ottid",
ott_taxon_name = "ot:otttaxonname")
if (!missing(file)) {
if (!missing(file_format)) {
file_format <- match.arg(file_format, c("newick", "nexus", "json"))
if (missing(file)) stop("You must specify a file to write your output")
res <- .get_study_tree(study_id = study_id, tree_id = tree_id,
format=file_format, tip_label = tip_label, ...)
unlink(file)
if (identical(file_format, "json")) {
cat(jsonlite::toJSON(res), file=file)
} else {
cat(res, file=file)
}
return(invisible(file.exists(file)))
} else {
stop(sQuote("file_format"), " must be specified.")
}
} else if (identical(object_format, "phylo")) {
file_format <- "newick"
res <- .get_study_tree(study_id = study_id, tree_id = tree_id,
format=file_format, tip_label = tip_label, ...)
res <- phylo_from_otl(res, dedup = deduplicate)
} else stop("Something is very wrong. Contact us.")
res
}
##' Retrieve metadata about a study in the Open Tree of Life datastore.
##'
##' \code{get_study_meta} returns a long list of attributes for the
##' studies that are contributing to the synthetic tree. To help with
##' the extraction of relevant information from this list, several
##' helper functions exists: \itemize{
##'
##' \item {get_tree_ids} { The identifiers of the trees
##' associated with the study }
##'
##' \item {get_publication} { The citation information of the
##' publication for the study. The DOI (or URL) for the study is
##' available as an attribute to the returned object (i.e.,
##' \code{attr(object, "DOI")} ) }.
##'
##' \item {candidate_for_synth} { The identifier of the tree(s) from
##' the study used in the synthetic tree. This is a subset of the
##' result of \code{get_tree_ids}.
##'
##' \item {get_study_year} { The year of publication of the study. }
##'
##' }
##' }
##'
##' @title Study Metadata
##' @param study_id the study identifier (character)
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @param sm an object created by \code{get_study_meta}
##' @return named-list containing the metadata associated with the
##' study requested
##' @export
##' @examples
##' \dontrun{
##' req <- get_study_meta("pg_719")
##' get_tree_ids(req)
##' candidate_for_synth(req)
##' get_publication(req)
##' get_study_year(req)
##' }
get_study_meta <- function(study_id, ...) {
res <- .get_study_meta(study_id = study_id, ...)
class(res) <- "study_meta"
attr(res, "study_id") <- study_id
res
}
##' @export
print.study_meta <- function(x, ...) {
cat("Metadata for OToL study ", attr(x, "study_id"), ". Contents:\n", sep="")
cat(paste0(" $nexml$", names(x$nexml)), sep="\n")
}
##' Retrieve subtree from a specific tree in the Open Tree of Life data store
##'
##' @title Study Subtree
##' @param study_id the study identifier (character)
##' @param tree_id the tree identifier (character)
##' @param object_format the class of the object returned by the
##' function (default, and currently only possibility \code{phylo}
##' from the \code{\link[ape]{ape}} package)
##' @param tip_label the format of the tip
##' labels. \dQuote{\code{original_label}} (default) returns the
##' original labels as provided in the study,
##' \dQuote{\code{ott_id}} labels are replaced by their ott IDs,
##' \dQuote{\code{ott_taxon_name}} labels are replaced by their
##' Open Tree Taxonomy taxon name.
##' @param file_format character, the file format to use to save the
##' results of the query (possible values, \sQuote{newick} or
##' \sQuote{nexus}).
##' @param file character, the path and file name where the output
##' should be written.
##' @param deduplicate logical (default \code{TRUE}). If the tree
##' returned by the study contains duplicated taxon names, should
##' they be made unique? It is normally illegal for NEXUS/Newick
##' tree strings to contain duplicated tip names. This is a
##' workaround to circumvent this requirement. If \code{TRUE},
##' duplicated tip labels will be appended \code{_1}, \code{_2},
##' etc.
##' @param subtree_id, either a node id that specifies a subtree or
##' \dQuote{ingroup} which returns the ingroup for this subtree.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @export
##' @examples
##' \dontrun{
##' small_tr <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="node991044")
##' ingroup <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup")
##' nexus_file <- tempfile(fileext=".nex")
##' get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup", file=nexus_file,
##' file_format="nexus")
##' }
get_study_subtree <- function(study_id, tree_id, subtree_id, object_format=c("phylo"),
tip_label = c("original_label", "ott_id", "ott_taxon_name"),
file_format, file, deduplicate = TRUE, ...) {
object_format <- match.arg(object_format)
tip_label <- match.arg(tip_label)
tip_label <- switch(tip_label,
original_labels = "ot:originallabel",
ott_id = "ot:ottid",
ott_taxon_name = "ot:otttaxonname")
if (!missing(file)) {
if (!missing(file_format)) {
if (missing(file)) stop("You must specify a file to write your output")
file_format <- match.arg(file_format, c("newick", "nexus"))
res <- .get_study_subtree(study_id = study_id, tree_id = tree_id,
subtree_id = subtree_id, format=file_format,
tip_label = tip_label, ...)
unlink(file)
cat(res, file=file)
return(invisible(file.exists(file)))
} else {
stop(sQuote("file_format"), " must be specified.")
}
} else if (identical(object_format, "phylo")) {
file_format <- "newick"
res <- .get_study_subtree(study_id = study_id, tree_id = tree_id,
subtree_id = subtree_id, format=file_format,
tip_label = tip_label, ...)
res <- phylo_from_otl(res, dedup = deduplicate)
## NeXML should be possible for both object_format and file_format but it seems there
## is something wrong with the server at this time (FM - 2015-06-07)
## } else if (identical(object_format, "nexml")) {
## file_format <- "nexml"
## res <- .get_study_subtree(study_id, tree_id, subtree_id, format=file_format)
## res <- nexml_from_otl(res)
} else stop("Something is very wrong. Contact us.")
res
}
rotl/R/tree_to_labels.R 0000644 0001775 0000144 00000003014 12567643646 015102 0 ustar deepayan users ## Function to extract tip and edge labels from newick formatted strings
## useful when the tree is too small to be read in by ape/rncl.
## tr needs to be a newick formatted tree string
## - missing tips are removed (OK for OTL as it won't happen)
tree_to_labels <- function(tr, remove_quotes = TRUE) {
n_right <- unlist(gregexpr("\\)", tr))
n_left <- unlist(gregexpr("\\(", tr))
if (n_right[1] == -1) n_right <- 0 else n_right <- length(n_right)
if (n_left[1] == -1) n_left <- 0 else n_left <- length(n_left)
if (!identical(n_right, n_left)) {
stop("invalid newick string, numbers of ( and ) don't match")
}
## remove white spaces
tr <- gsub("\\s+", "", tr)
## remove branch lengths
tr <- gsub(":[0-9]+(\\.[0-9]+)?", "", tr)
## TODO?: remove comments
if (n_right < 1) {
## if only 1 tip
tip_lbl <- gsub(";$", "", tr)
edge_lbl <- character(0)
} else {
## extract edge labels
edge_lbl <- unlist(strsplit(tr, ")"))
edge_lbl <- grep("^[^\\(]", edge_lbl, value = T)
edge_lbl <- gsub("(,|;).*$", "", edge_lbl)
edge_lbl <- edge_lbl[nzchar(edge_lbl)]
## extract tips
tip_lbl <- unlist(strsplit(tr, ","))
tip_lbl <- gsub("^\\(*", "", tip_lbl)
tip_lbl <- gsub("\\).*$", "", tip_lbl)
tip_lbl <- tip_lbl[nzchar(tip_lbl)]
}
if (remove_quotes) {
tip_lbl <- gsub("^(\\\"|\\\')(.+)(\\\'|\\\")$", "\\2", tip_lbl)
}
list(tip_label = tip_lbl, edge_label = edge_lbl)
}
rotl/R/base.R 0000644 0001775 0000144 00000013213 12707501110 013002 0 ustar deepayan users otl_url <- function(dev=FALSE) {
if (dev) {
"https://devapi.opentreeoflife.org"
} else {
"https://api.opentreeoflife.org"
}
}
otl_version <- function(version) {
if (missing(version)) {
return("v3")
} else {
return(version)
}
}
# Take a request object and return list (if JSON) or plain text (if another
# type)
##' @importFrom httr content
##' @importFrom jsonlite fromJSON
otl_parse <- function(req) {
if (grepl("application/json", req[["headers"]][["content-type"]]) ){
return(jsonlite::fromJSON(httr::content(req, "text", encoding = "UTF-8"), simplifyVector = FALSE))
}
txt <- httr::content(req, as="text", encoding = "UTF-8")
if(identical(txt, "")){
stop("No output to parse; check your query.", call. = FALSE)
}
txt
}
otl_check_error <- function(cont) {
if (is.list(cont)) {
if (exists("description", cont)) {
if (exists("Error", cont$description)) {
stop(paste("Error: ", cont$description$error, "\n", sep = ""))
} else if (exists("message", cont)) {
stop(paste("Message: ", cont$descrption$message, "\n", sep = ""))
}
}
}
}
## Check and parse result of query
otl_check <- function(req) {
if (!req$status_code < 400) {
msg <- otl_parse(req)
stop("HTTP failure: ", req$status_code, "\n", msg, call. = FALSE)
}
desc <- otl_parse(req)
otl_check_error(desc)
desc
}
##' @importFrom httr GET
otl_GET <- function(path, url = otl_url(...), otl_v = otl_version(...), ...) {
req <- httr::GET(url, path=paste(otl_v, path, sep="/"), ...)
otl_check(req)
}
##' @importFrom jsonlite toJSON
##' @importFrom httr POST
otl_POST <- function(path, body, url = otl_url(...), otl_v = otl_version(...), ...) {
stopifnot(is.list(body))
body_json <- ifelse(length(body), jsonlite::toJSON(body), "")
req <- httr::POST(url,
path=paste(otl_v, path, sep="/"),
body=body_json, ...)
otl_check(req)
}
otl_formats <- function(format) {
switch(tolower(format),
"nexus" = ".nex",
"newick" = ".tre",
"nexml" = ".nexml",
"json" = ".json",
"") #fall through is no extension = nex(j)son
}
## Strip all characters except the ottId from a OpenTree label (internal or terminal)
otl_ottid_from_label <- function(label) {
return(as.numeric(gsub("(.+[ _]ott)([0-9]+)", "\\2", label)));
}
##' @importFrom rncl read_newick_phylo
phylo_from_otl <- function(res, dedup = FALSE) {
if (is.list(res)) {
if (!is.null(res$newick)) {
tree <- res$newick
} else if (!is.null(res$subtree)) {
tree <- res$subtree
} else {
stop("Cannot find tree")
}
} else if (is.character(res)) {
tree <- res
} else stop("I don't know how to deal with this format.")
if (grepl("\\(", tree)) {
fnm <- tempfile()
cat(tree, file = fnm)
if (!dedup) {
phy <- rncl::read_newick_phylo(fnm)
} else {
dedup_tr <- deduplicate_labels(fnm)
phy <- rncl::read_newick_phylo(dedup_tr)
unlink(dedup_tr)
}
unlink(fnm)
} else {
phy <- tree_to_labels(tree)$tip_label
}
return(phy)
}
nexml_from_otl <- function(res) {
if (!requireNamespace("RNeXML", quietly = TRUE)) {
stop("The RNeXML package is needed to use the nexml file format")
}
fnm <- tempfile()
cat(res, file=fnm)
phy <- RNeXML::nexml_read(x=fnm)
unlink(fnm)
phy
}
## check if the argument provided looks like a number (can be coerced
## to integer/numeric).
check_numeric <- function(x) {
if (is.null(x)) {
return(FALSE)
}
if (length(x) != 1) {
stop("only 1 element should be provided")
}
if (!is.numeric(x)) {
x <- as.character(x)
if (any(is.na(x))) return(FALSE)
return(grepl("^[0-9]+$", x))
} else {
return(x %% 1 == 0)
}
}
## Check that ott_ids are not NULL, not NAs and look like numbers
check_ott_ids <- function(ott_ids) {
if (!is.null(ott_ids)) {
if (inherits(ott_ids, "otl_ott_id")) {
## convert objects returned by ott_id method to a vector
ott_ids <- unlist(ott_ids)
}
if (any(is.na(ott_ids))) {
stop("NAs are not allowed")
}
if (!all(sapply(ott_ids, check_numeric))) {
stop(sQuote("ott_ids"), " must look like numbers.")
}
} else {
stop("You must supply some OTT ids.")
}
ott_ids
}
## all nodes have a node_id (character, e.g. "ott12345" or "mrcaott123ott456")
check_valid_node_id <- function(x) {
if (length(x) != 1) {
stop("only 1 element should be provided")
}
if (!is.character(x)) {
return(FALSE)
}
if (grepl('^mrcaott\\d+ott\\d+', x) || grepl('^ott\\d+', x)) {
return(TRUE)
} else {
return(FALSE)
}
}
check_node_ids <- function(node_ids) {
if (!is.null(node_ids)) {
if (!is.character(node_ids)) {
stop("Argument ", sQuote("node_ids"), " must be of type character.")
}
if (any(is.na(node_ids))) {
stop("NAs are not allowed")
}
if (!all(sapply(node_ids, check_valid_node_id))) {
stop(sQuote("node_ids"), " must look like \'ott123\' or \'mrcaott123ott456\'.")
}
}
}
# node labels for tree_of_life subtree and induced_subtree
# might also be useful for taxonomy queries
check_label_format <- function (x) {
if (x %in% c("name", "id", "name_and_id")) {
return(TRUE)
} else {
return(FALSE)
}
}
rotl/R/deduplicate_labels.R 0000644 0001775 0000144 00000002670 13003747740 015715 0 ustar deepayan users ## Create a vector (character) that contains the NEWICK tree strings
## found in a file
parse_newick <- function(file) {
trs <- readLines(file, warn = FALSE)
trs <- strsplit(trs, split = ";")
trs <- sapply(trs, function(x) gsub("^\\s+|\\s+$", "", x))
trs <- unlist(trs)
trs <- gsub("\\s", "_", trs)
trs <- trs[nchar(trs) > 0]
trs
}
## Internal function to be used by `deduplicate_labels` that:
## 1. identify duplicated labels
## 2. make them unique
## 3. replace the duplicated labels by their unique counterparts
dedup_lbl <- function(tr_str) {
tr_lbl <- tree_to_labels(tr_str, remove_quotes = TRUE)$tip_label
tr_lbl_unq <- make.unique(tr_lbl, sep = "_")
if (!identical(tr_lbl, tr_lbl_unq)) {
for (i in seq_along(tr_lbl)) {
tr_str <- sub(paste0("([\\(|,]\\'?)\\Q", tr_lbl[i], "\\E(\\'?[:|\\)|,])"),
paste0("\\1", tr_lbl_unq[i], "\\2"), tr_str)
}
warning("Some tip labels were duplicated and have been modified: ",
paste(tr_lbl[duplicated(tr_lbl)], collapse = ", "),
call. = FALSE)
}
paste0(tr_str, ";")
}
## Main function: takes a file with potentially duplicated tip labels
## and reate a new file with unique labels
deduplicate_labels <- function(file) {
tr_strs <- parse_newick(file)
tr_dedup <- sapply(tr_strs, dedup_lbl)
tmp_tr <- tempfile()
cat(tr_dedup, file = tmp_tr, sep = "\n")
tmp_tr
}
rotl/R/external_data.R 0000644 0001775 0000144 00000011646 12706240532 014722 0 ustar deepayan users ##' Get external identifiers for data associated with an Open Tree study
##'
##' Data associated with studies contributing to the Open Tree synthesis may
##' be available from other databases. In particular, trees and alignments
##' may be available from treebase and DNA sequences and bibliographic
##' information associated with a given study may be available from the NCBI.
##' This function retrieves that information for a given study.
##'
##' @param study_id An open tree study ID
##' @return A study_external_data object (which inherits from a list) which
##' contains some of the following.
##' @return doi, character, the DOI for the paper describing this study
##' @return external_data_url, character, a URL to an external data repository
##' (e.g. a treebase entry) if one exists.
##' @return pubmed_id character, the unique ID for this study in the NCBI's pubmed database
##' @return popset_ids character, vector of IDs for the NCBI's popset database
##' @return nucleotide_ids character, vector of IDs for the NCBI's nucleotide database
##' @seealso studies_find_studies (used to discover study IDs)
##' @importFrom httr parse_url
##' @importFrom rentrez entrez_search
##' @importFrom rentrez entrez_link
##' @examples
##' \dontrun{
##' flies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Drosophilidae")
##' study_external_IDs(flies[2,]$study_ids)
##' }
##' @export
study_external_IDs <- function(study_id){
meta <- get_study_meta(study_id)
data_deposit <- meta[["nexml"]][["^ot:dataDeposit"]][["@href"]]
url <- attr(get_publication(meta), "DOI")
doi <- parse_url(url)$path
pmid <- get_pmid(doi, study_id)
res <- list( doi = doi,
pubmed_id = pmid,
external_data_url = data_deposit)
if(!is.null(pmid)){
res$popset_ids <- entrez_link(dbfrom="pubmed", db="popset", id=pmid)[["links"]][["pubmed_popset"]]
res$nucleotide_ids <- entrez_link(dbfrom="pubmed", db="nuccore", id=pmid)[["links"]][["pubmed_nuccore"]]
}
structure(res, class=c("study_external_data", "list"), id=study_id)
}
##' Get external identifiers for data associated with an Open Tree taxon
##'
##' The Open Tree taxonomy is a synthesis of multiple reference taxonomies. This
##' function retrieves identifiers to external taxonomic records that have
##' contributed the rank, position and definition of a given Open Tree taxon.
##'
##' @param taxon_id An open tree study ID
##' @return a data.frame in which each row represents a unique record in an
##' external databse. The column "source" provides and abbreviated name for the
##' database, and "id" the unique ID for the record.
##' @seealso tnrs_matchnames, which can be used to search for taxa by name.
##' @seealso taxonomy_taxon, for more information about a given taxon.
##' @examples
##' \dontrun{
##' gibbon_IDs <- taxon_external_IDs(712902)
##' }
##' @export
taxon_external_IDs <- function(taxon_id){
taxon_info <- taxonomy_taxon_info(taxon_id)
srcs <- taxon_info[[1]][["tax_sources"]]
res <- do.call(rbind.data.frame, strsplit(unlist(srcs), ":"))
names(res) <- c("source", "id")
res
}
#'@export
print.study_external_data <- function(x, ...){
cat("External data identifiers for study", attr(x, "study_id"), "\n")
cat(" $doi: ", x[["doi"]], "\n")
if(!is.null(x$pubmed_id)){
cat(" $pubmed_id: ", x[["pubmed_id"]], "\n")
}
if(!is.null(x$popset_ids)){
cat(" $popset_ids: vector of", length(x[["popset_ids"]]), "IDs \n")
}
if(!is.null(x$nucleotide_ids)){
cat(" $nucleotide_ids: vector of", length(x[["nucleotide_ids"]]), "IDs\n")
}
if(nchar(x[["external_data_url"]]) > 0){
cat(" $external_data_url", x[["external_data_url"]], "\n")
}
cat("\n")
}
##Maybe include these functions to get summary information about a
## set of linked sequences?
#summarize_nucleotide_data <- function(id_vector){
# summs <- entrez_summary(db="nuccore", id=id_vector)
# interesting <- extract_from_esummary(summs, c("uid", "title", "slen", "organism", "completeness"), simplify=FALSE)
# do.call(rbind.data.frame, interesting)
#}
#
#summarize_popset_data <- function(id_vector){
# summs <- entrez_summary(db="popset", id=id_vector)
# interesting <- extract_from_esummary(summs, c("uid", "title"), simplify=FALSE)
# do.call(rbind.data.frame, interesting)
#}
#
#Un-exported function to convert doi->pmid. Also takes study_id as an argument in
#order to provide a helpful error message when 0 or >1 pmids are returned.
get_pmid <- function(doi, study_id){
pubmed_search <- entrez_search(db="pubmed", term=paste0(doi, "[DOI]"))
if(length(pubmed_search$ids) == 0){
warning("Could not find PMID for study'", study_id, "', skipping NCBI data")
return(NULL)
}
if(length(pubmed_search$ids) > 1){
warning("Found more than one PMID matching study'", study_id, "', skipping NCBI data")
return(NULL)
}
pubmed_search$ids
}
rotl/R/tol.R 0000644 0001775 0000144 00000061625 13056043307 012707 0 ustar deepayan users
.source_list <- function(tax, ...) {
if (! exists("source_id_map", tax)) {
## it should only be missing with tol_about when using
## include_source_list=FALSE
stop("Make sure that your object has been created using ",
sQuote("tol_about(include_source_list = TRUE)"))
}
tt <- lapply(tax[["source_id_map"]], function(x) {
c(x[["study_id"]], x[["tree_id"]], x[["git_sha"]])
})
tt <- do.call("rbind", tt)
setNames(as.data.frame(tt, stringsAsFactors=FALSE),
c("study_id", "tree_id", "git_sha"))
}
##' Basic information about the Open Tree of Life (the synthetic tree)
##'
##' @title Information about the Tree of Life
##'
##' @details Summary information about the current draft tree of life,
##' including information about the list of trees and the taxonomy
##' used to build it. The object returned by \code{tol_about} can
##' be passed to the taxonomy methods (\code{tax_name()},
##' \code{tax_rank()}, \code{tax_sources()}, \code{ott_id}), to
##' extract relevant taxonomic information for the root of the
##' synthetic tree.
##'
##' @param include_source_list Logical (default =
##' \code{FALSE}). Return an ordered list of source trees.
##' @param tax an object created with a call to \code{tol_about}.
##' @param ... additional arguments to customize the API call (see
##' \code{\link{rotl}} for more information).
##'
##' @return An invisible list of synthetic tree summary statistics:
##'
##' \itemize{
##'
##' \item {date_created} {String. The creation date of the tree.}
##'
##' \item {num_source_studies} {Integer. The number of studies
##' (publications)used as sources.}
##'
##' \item {num_source_trees} {The number of trees used as sources
##' (may be >1 tree per study).}
##'
##' \item {taxonomy_version} {The Open Tree Taxonomy version used
##' as a source.}
##'
##' \item {filtered_flags} {List. Taxa with these taxonomy flags were
##' not used in construction of the tree.}
##'
##' \item {root} {List. Describes the root node:}
##' \itemize{
##' \item {node_id} {String. The canonical identifier of the node.}
##'
##' \item {num_tips} {Numeric. The number of descendent tips.}
##'
##' \item {taxon} {A list of taxonomic properties:}
##' \itemize{
##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ott_id).}
##'
##' \item {name} {String. The taxonomic name of the queried node.}
##'
##' \item {unique_name} {String. The string that uniquely
##' identifies the taxon in OTT.}
##'
##' \item {rank} {String. The taxonomic rank of the taxon in OTT.}
##'
##' \item {tax_sources} {List. A list of identifiers for taxonomic
##' sources, such as other taxonomies, that define taxa judged
##' equivalent to this taxon.}
##' }
##' }
##'
##' \item {source_list} {List. Present only if
##' \code{include_source_list} is \code{TRUE}. The sourceid
##' ordering is the precedence order for synthesis, with
##' relationships from earlier trees in the list having priority
##' over those from later trees in the list. See
##' \code{source_id_map} below for study details.}
##'
##' \item {source_id_map} {Named list of lists. Present only if
##' \code{include_source_list} is \code{TRUE}. Names correspond to
##' the \sQuote{sourceids} used in \code{source_list}
##' above. Source trees will have the following properties:}
##'
##' \itemize{
##' \item {git_sha} {String. The git SHA identifying a particular source
##' version.}
#'
##' \item {tree_id} {String. The tree id associated with the study id used.}
##'
##' \item {study_id} {String. The study identifier. Will typically include
##' a prefix ("pg_" or "ot_").}
##' }
##'
##' \item {synth_id} {The unique string for this version of the tree.}
##' }
##' @seealso \code{\link{source_list}} to explore the list of studies
##' used in the synthetic tree (see example).
##'
##' @examples
##' \dontrun{
##' res <- tol_about()
##' tax_sources(res)
##' ott_id(res)
##' studies <- source_list(tol_about(include_source_list=TRUE))}
##' @rdname tol_about
##' @export
tol_about <- function(include_source_list=FALSE, ...) {
res <- .tol_about(include_source_list=include_source_list, ...)
class(res) <- c("tol_summary", class(res))
res
}
##' @export
print.tol_summary <- function(x, ...) {
cat("\nOpenTree Synthetic Tree of Life.\n\n")
cat("Tree version: ", x$synth_id, "\n", sep="")
cat("Taxonomy version: ", x$taxonomy, "\n", sep="")
cat("Constructed on: ", x$date_created, "\n", sep="")
cat("Number of terminal taxa: ", x$root$num_tips, "\n", sep="")
cat("Number of source trees: ", x$num_source_trees, "\n", sep="")
cat("Number of source studies: ", x$num_source_studies, "\n", sep = "")
cat("Source list present: ", ifelse(exists("source_list", x), "true", "false"), "\n", sep="")
cat("Root taxon: ", x$root$taxon$name, "\n", sep="")
cat("Root ott_id: ", x$root$taxon$ott_id, "\n", sep="")
cat("Root node_id: ", x$root$node_id, "\n", sep="")
}
tol_about_method_factory <- function(.f) {
function(tax, ...) {
res <- list(.f(tax[["root"]][["taxon"]]))
names(res) <- .tax_unique_name(tax[["root"]][["taxon"]])
res <- add_otl_class(res, .f)
res
}
}
##' @export
##' @rdname tol_about
tax_rank.tol_summary <- tol_about_method_factory(.tax_rank)
##' @export
##' @rdname tol_about
tax_sources.tol_summary <- tol_about_method_factory(.tax_sources)
##' @export
##' @rdname tol_about
unique_name.tol_summary <- tol_about_method_factory(.tax_unique_name)
##' @export
##' @rdname tol_about
tax_name.tol_summary <- tol_about_method_factory(.tax_name)
##' @export
##' @rdname tol_about
ott_id.tol_summary <- tol_about_method_factory(.tax_ott_id)
##' @export
##' @rdname source_list
source_list.tol_summary <- .source_list
##' Most Recent Common Ancestor for a set of nodes
##'
##' @title MRCA of taxa from the synthetic tree
##'
##' @details Get the MRCA of a set of nodes on the current synthetic
##' tree. Accepts any combination of node ids and ott ids as
##' input. Returns information about the most recent common
##' ancestor (MRCA) node as well as the most recent taxonomic
##' ancestor (MRTA) node (the closest taxonomic node to the MRCA
##' node in the synthetic tree; the MRCA and MRTA may be the same
##' node). If they are the same, the taxonomic information will be
##' in the \code{mrca} slot, otherwise they will be in the
##' \code{nearest_taxon} slot of the list. If any of the specified
##' nodes is not in the synthetic tree an error will be returned.
##'
##' Taxonomic methods (\code{tax_sources()}, \code{ott_id()},
##' \code{unique_name()}, ...) are availble on the objects
##' returned by \code{tol_mrca()}. If the MRCA node is MRTA, the
##' name of the object returned by these methods will start with
##' \sQuote{ott}, otherwise it will start with \sQuote{mrca}.
##'
##' @param ott_ids Numeric vector. The ott ids for which the MRCA is desired.
##' @param node_ids Character vector. The node ids for which the MRCA is desired.
##' @param tax an object returned by \code{tol_mrca()}.
##' @param ... additional arguments to customize the API call (see
##' \code{\link{rotl}} for more information).
##'
##' @return An invisible list of the MRCA node properties:
##'
##' \itemize{
##'
##' \item {mrca} {List of node properties.}
##'
##' \itemize{
##' \item {node_id} {String. The canonical identifier of the node.}
##'
##' \item {num_tips} {Numeric. The number of descendent tips.}
##'
##' \item {taxon} {A list of taxonomic properties. Only returned if
##' the queried node is a taxon. (If the node is not a taxon, a
##' \code{nearest_taxon} list is returned (see below)).}
##'
##' \itemize{
##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
##'
##' \item {name} {String. The taxonomic name of the queried node.}
##'
##' \item {unique_name} {String. The string that uniquely
##' identifies the taxon in OTT.}
##'
##' \item {rank} {String. The taxonomic rank of the taxon in OTT.}
##'
##' \item {tax_sources} {List. A list of identifiers for taxonomic
##' sources, such as other taxonomies, that define taxa judged
##' equivalent to this taxon.}
##' }
##'
##' The following properties list support/conflict for the node across
##' synthesis source trees. All properties involve sourceid keys and
##' nodeid values (see \code{source_id_map} below) Not all properties are
##' are present for every node.
##'
##' \item {partial_path_of} {List. The edge below this synthetic tree node
##' is compatible with the edge below each of these input tree nodes (one
##' per tree). Each returned element is reported as sourceid:nodeid.}
##'
##' \item {supported_by} {List. Input tree nodes (one per tree) that support
##' this synthetic tree node. Each returned element is reported as
##' sourceid:nodeid.}
##'
##' \item {terminal} {List. Input tree nodes (one per tree) that are equivalent
##' to this synthetic tree node (via an exact mapping, or the input tree
##' terminal may be the only terminal descended from this synthetic tree node.
##' Each returned element is reported as sourceid:nodeid.}
##'
##' \item {conflicts_with} {Named list of lists. Names correspond to
##' sourceid keys. Each list contains input tree node ids (one or more per tree)
##' that conflict with this synthetic node.}
##' }
##'
##' \item {nearest_taxon} {A list of taxonomic properties of the nearest rootward
##' taxon node to the MRCA node. Only returned if the MRCA node is a not taxon
##' (otherwise the \code{taxon} list above is returned).}
##'
##' \itemize{
##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
##'
##' \item {name} {String. The taxonomic name of the queried node.}
##'
##' \item {unique_name} {String. The string that uniquely
##' identifies the taxon in OTT.}
##'
##' \item {rank} {String. The taxonomic rank of the taxon in OTT.}
##'
##' \item {tax_sources} {List. A list of identifiers for taxonomic
##' sources, such as other taxonomies, that define taxa judged
##' equivalent to this taxon.}
##' }
##'
##' \item {source_id_map} {Named list of lists. Names correspond to the
##' sourceid keys used in the support/conflict properties of the \code{mrca}
##' list above. Source trees will have the following properties:}
##'
##' \itemize{
##' \item {git_sha} {The git SHA identifying a particular source
##' version.}
##'
##' \item {tree_id} {The tree id associated with the study id used.}
##'
##' \item {study_id} {The study identifier. Will typically include
##' a prefix ("pg_" or "ot_").}
##' }
##' The only sourceid that does not correspond to a source tree is the taxonomy,
##' which will have the name "ott"+`taxonomy_version`, and the value is the
##' ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
##' appear in \code{supported_by}.
##'
##' }
##'
##' @examples
##' \dontrun{
##' birds_mrca <- tol_mrca(ott_ids=c(412129, 536234))
##' ott_id(birds_mrca)
##' tax_sources(birds_mrca)}
##' @rdname tol_mrca
##' @export
tol_mrca <- function(ott_ids=NULL, node_ids=NULL, ...) {
res <- .tol_mrca(ott_ids=ott_ids, node_ids=node_ids, ...)
class(res) <- c("tol_mrca", class(res))
return(res)
}
##' @export
print.tol_mrca <- function(x, ...) {
cat("\nOpenTree MRCA node.\n\n")
cat("Node id: ", x$mrca$node_id, "\n", sep="")
cat("Number of terminal descendants: ", x$mrca$num_tips, "\n", sep="")
if (is_taxon(x[["mrca"]][["taxon"]])) {
cat("Is taxon: TRUE\n")
cat("Name: ", x$mrca$taxon$name, "\n", sep="")
cat("ott id: ", x$mrca$taxon$ott_id, "\n", sep="")
} else {
cat("Is taxon: FALSE\n")
cat("Nearest taxon:\n")
cat(" Name: ", x$nearest_taxon$name, "\n", sep="")
cat(" ott id: ", x$nearest_taxon$ott_id, "\n", sep="")
}
}
tol_mrca_method_factory <- function(.f) {
function(tax, ...) {
if (is_taxon(tax[["mrca"]][["taxon"]])) {
res <- list(.f(tax[["mrca"]][["taxon"]]))
names(res) <- .tax_unique_name(tax[["mrca"]][["taxon"]])
attr(res, "taxon_type") <- "mrca"
} else {
res <- list(.f(tax[["nearest_taxon"]]))
names(res) <- .tax_unique_name(tax[["nearest_taxon"]])
attr(res, "taxon_type") <- "nearest_taxon"
}
res <- add_otl_class(res, .f)
res
}
}
##' @export
##' @rdname tol_mrca
tax_sources.tol_mrca <- tol_mrca_method_factory(.tax_sources)
##' @export
##' @rdname tol_mrca
unique_name.tol_mrca <- tol_mrca_method_factory(.tax_unique_name)
##' @export
##' @rdname tol_mrca
tax_name.tol_mrca <- tol_mrca_method_factory(.tax_name)
##' @export
##' @rdname tol_mrca
tax_rank.tol_mrca <- tol_mrca_method_factory(.tax_rank)
##' @export
##' @rdname tol_mrca
ott_id.tol_mrca <- tol_mrca_method_factory(.tax_ott_id)
##' @export
##' @rdname tol_mrca
source_list.tol_mrca <- .source_list
##' Extract a subtree from the synthetic tree from an Open Tree node id.
##'
##' @title Extract a subtree from the synthetic tree
##'
##' @details Given a node, return the subtree of the synthetic tree descended
##' from that node. The start node may be specified using either a node id
##' or an ott id, but not both. If the specified node is not in the
##' synthetic tree an error will be returned. There is a size limit of
##' 25000 tips for this method.
##'
##' @param ott_id Numeric. The ott id of the node in the tree that should
##' serve as the root of the tree returned.
##' @param node_id Character. The node id of the node in the tree that should
##' serve as the root of the tree returned.
##' @param label_format Character. Defines the label type; one of
##' \dQuote{\code{name}}, \dQuote{\code{id}}, or
##' \dQuote{\code{name_and_id}} (the default).
##' @param file If specified, the function will write the subtree to a
##' file in newick format.
##' @param ... additional arguments to customize the API call (see
##' \code{\link{rotl}} for more information).
##'
##' @return If no value is specified to the \code{file} argument
##' (default), a phyogenetic tree of class \code{phylo}.
##' Otherwise, the function returns invisibly a logical indicating
##' whether the file was successfully created.
##'
##' @examples
##' \dontrun{
##' res <- tol_subtree(ott_id=241841)}
##' @export
tol_subtree <- function(ott_id=NULL, node_id=NULL, label_format=NULL,
file, ...) {
res <- .tol_subtree(ott_id=ott_id, node_id=node_id,
label_format=label_format, ...)
if (!missing(file)) {
unlink(file)
cat(res$newick, file=file)
return(invisible(file.exists(file)))
} else {
phy <- phylo_from_otl(res)
return(phy)
}
}
##' Return the induced subtree on the synthetic tree that relates a list of nodes.
##'
##' @title Subtree from the Open Tree of Life
##'
##' @details Return a tree with tips corresponding to the nodes identified in
##' the input set that is consistent with the topology of the current
##' synthetic tree. This tree is equivalent to the minimal subtree
##' induced on the draft tree by the set of identified nodes.
##'
##' @param ott_ids Numeric vector. OTT ids indicating nodes to be used
##' as tips in the induced tree.
##' @param node_ids Character vector. Node ids indicating nodes to be used
##' as tips in the induced tree.
##' @param label_format Character. Defines the label type; one of
##' \dQuote{\code{name}}, \dQuote{\code{id}}, or
##' \dQuote{\code{name_and_id}} (the default).
##' @param file If specified, the function will write the subtree to a
##' file in newick format.
##' @param ... additional arguments to customize the API call (see
##' \code{\link{rotl}} for more information).
##'
##' @return If no value is specified to the \code{file} argument
##' (default), a phyogenetic tree of class \code{phylo}.
##'
##' Otherwise, the function returns invisibly a logical indicating
##' whether the file was successfully created.
##'
##' @examples
##' \dontrun{
##' res <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710))
##' tree_file <- tempfile(fileext=".tre")
##' tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710),
##' file=tree_file)}
##' @export
tol_induced_subtree <- function(ott_ids=NULL, node_ids=NULL, label_format=NULL,
file, ...) {
res <- .tol_induced_subtree(ott_ids=ott_ids, node_ids=node_ids,
label_format=label_format, ...)
if (!missing(file)) {
unlink(file)
cat(res$newick, file=file)
return(file.exists(file))
} else {
phy <- phylo_from_otl(res)
return(phy)
}
}
##' Strip OTT ids from tip labels
##' @param tip_labels a character vector containing tip labels (most
##' likely the \code{tip.label} element from a tree returned by
##' \code{\link{tol_induced_subtree}}
##' @param remove_underscores logical (defaults to FALSE). If set to
##' TRUE underscores in tip labels are converted to spaces
##' @return A character vector containing the contents of
##' \code{tip_labels} with any OTT ids removed.
##'
##' @examples
##' \dontrun{
##' genera <- c("Perdix", "Dendroica", "Cinclus", "Selasphorus", "Struthio")
##' tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 102710))
##' tr$tip.label %in% genera
##' tr$tip.label <- strip_ott_ids(tr$tip.label)
##' tr$tip.label %in% genera}
##'@export
strip_ott_ids <- function(tip_labels, remove_underscores=FALSE){
stripped <- sub("_ott\\d+$", "", tip_labels)
if(remove_underscores){
return(gsub("_", " ", stripped))
}
stripped
}
##' Get summary information about a node in the synthetic tree
##'
##' @title Node info
##'
##' @details Returns summary information about a node in the graph. The
##' node of interest may be specified using either a node id or an
##' taxon id, but not both. If the specified node or OTT id is not
##' in the graph, an error will be returned.
##'
##' If the argument \code{include_lineage=TRUE} is used, you can
##' use \code{tax_lineage()} or \code{tol_lineage} to return the
##' taxonomic information or the node information for all the
##' ancestors to this node, down to the root of the tree.
##'
##'
##' @param ott_id Numeric. The OpenTree taxonomic identifier.
##' @param node_id Character. The OpenTree node identifier.
##' @param include_lineage Logical (default = FALSE). Whether to return the
##' lineage of the node from the synthetic tree.
##' @param ... additional arguments to customize the API call (see
##' ?rotl for more information)
##'
##' @return \code{tol_node_info} returns an invisible list of summary
##' information about the queried node:
##'
##' \itemize{
##'
##' \item {node_id} {String. The canonical identifier of the node.}
##'
##' \item {num_tips} {Numeric. The number of descendent tips.}
##'
##' \item {taxon} {A list of taxonomic properties. Only returned if
##' the queried node is a taxon. Each source has:}
##'
##' \itemize{
##' \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
##'
##' \item {name} {String. The taxonomic name of the queried node.}
##'
##' \item {unique_name} {String. The string that uniquely
##' identifies the taxon in OTT.}
##'
##' \item {rank} {String. The taxonomic rank of the taxon in OTT.}
##'
##' \item {tax_sources} {List. A list of identifiers for taxonomic
##' sources, such as other taxonomies, that define taxa judged
##' equivalent to this taxon.}
##' }
##'
##' The following properties list support/conflict for the node across
##' synthesis source trees. All properties involve sourceid keys and
##' nodeid values (see \code{source_id_map} below).
##'
##' \item {partial_path_of} {List. The edge below this synthetic tree node
##' is compatible with the edge below each of these input tree nodes (one
##' per tree). Each returned element is reported as sourceid:nodeid.}
##'
##' \item {supported_by} {List. Input tree nodes (one per tree) that support
##' this synthetic tree node. Each returned element is reported as
##' sourceid:nodeid.}
##'
##' \item {terminal} {List. Input tree nodes (one per tree) that are equivalent
##' to this synthetic tree node (via an exact mapping, or the input tree
##' terminal may be the only terminal descended from this synthetic tree node.
##' Each returned element is reported as sourceid:nodeid.}
##'
##' \item {conflicts_with} {Named list of lists. Names correspond to
##' sourceid keys. Each list contains input tree node ids (one or more per tree)
##' that conflict with this synthetic node.}
##'
##' \item {source_id_map} {Named list of lists. Names correspond to the
##' sourceid keys used in the 4 properties above. Source trees will have the
##' following properties:}
##'
##' \itemize{
##' \item {git_sha} {The git SHA identifying a particular source
##' version.}
##'
##' \item {tree_id} {The tree id associated with the study id used.}
##'
##' \item {study_id} {The study identifier. Will typically include
##' a prefix ("pg_" or "ot_").}
##' }
##' The only sourceid that does not correspond to a source tree is the taxonomy,
##' which will have the name "ott"+`taxonomy_version`, and the value is the
##' ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
##' appear in \code{supported_by}.
##'
##' }
##'
##' \code{tol_lineage} and \code{tax_lineage} return data
##' frames. \code{tol_lineage} indicate for each ancestor its
##' node identifier, the number of tips descending from that
##' node, and whether it corresponds to a taxonomic level.
##'
##' @examples
##' \dontrun{
##' birds <- tol_node_info(ott_id=81461, include_lineage=TRUE)
##' source_list(birds)
##' tax_rank(birds)
##' ott_id(birds)
##' tax_lineage(birds)
##' tol_lineage(birds)}
##' @export
tol_node_info <- function(ott_id=NULL, node_id=NULL, include_lineage=FALSE, ...) {
res <- .tol_node_info(ott_id=ott_id, node_id=node_id,
include_lineage=include_lineage, ...)
class(res) <- c("tol_node", class(res))
return(res)
}
tol_node_method_factory <- function(.f) {
function(tax, ...) {
res <- setNames(list(.f(tax[["taxon"]])),
.tax_unique_name(tax[["taxon"]]))
res <- add_otl_class(res, .f)
res
}
}
##' @export
print.tol_node <- function(x, ...) {
cat("\nOpenTree node.\n\n")
cat("Node id: ", x$node_id, "\n", sep="")
cat("Number of terminal descendants: ", x$num_tips, "\n", sep="")
if (is_taxon(x[["taxon"]])) {
cat("Is taxon: TRUE\n")
cat("Name: ", x$taxon$name, "\n", sep="")
cat("Rank: ", x$taxon$rank, "\n", sep="")
cat("ott id: ", x$taxon$ott_id, "\n", sep="")
} else {
cat("Is taxon: FALSE\n")
}
}
##' @export
##' @param tax an object returned by \code{tol_node_info}.
##' @rdname tol_node_info
tax_rank.tol_node <- tol_node_method_factory(.tax_rank)
##' @export
##' @rdname tol_node_info
tax_sources.tol_node <- tol_node_method_factory(.tax_sources)
##' @export
##' @rdname tol_node_info
unique_name.tol_node <- tol_node_method_factory(.tax_unique_name)
##' @export
##' @rdname tol_node_info
tax_name.tol_node <- tol_node_method_factory(.tax_name)
##' @export
##' @rdname tol_node_info
ott_id.tol_node <- tol_node_method_factory(.tax_ott_id)
##' @export
##' @rdname tol_node_info
source_list.tol_node <- .source_list
##' @export
##' @rdname tol_node_info
tax_lineage.tol_node <- function(tax, ...) {
check_lineage(tax)
lg <- lapply(tax[["lineage"]], function(x) {
if (exists("taxon", x)) {
build_lineage(x[["taxon"]])
} else {
NULL
}
})
lg <- do.call("rbind", lg)
as.data.frame(lg, stringsAsFactors = FALSE)
}
##' @export
##' @rdname tol_node_info
tol_lineage.tol_node <- function(tax, ...) {
check_lineage(tax)
lg <- lapply(tax[["lineage"]], function(x) {
c("node_id" = x[["node_id"]],
"num_tips" = x[["num_tips"]],
"is_taxon" = exists("taxon", x))
})
lg <- do.call("rbind", lg)
as.data.frame(lg, stringsAsFactors = FALSE)
}
rotl/R/studies-methods.R 0000644 0001775 0000144 00000005203 12602532772 015225 0 ustar deepayan users
### list_trees -----------------------------------------------------------------
##' List trees ids in objects returned by
##' \code{\link{studies_find_studies}} and
##' \code{\link{studies_find_trees}}.
##'
##' \code{list_trees} returns all trees associated with a particular
##' study when used on an object returned by
##' \code{\link{studies_find_studies}}, but only the trees that match
##' the search criteria when used on objects returned by
##' \code{\link{studies_find_trees}}.
##'
##' @param matched_studies an object created by
##' \code{studies_find_trees} or \code{studies_find_studies}.
##' @param study_id a \code{study_id} listed in the object returned by
##' \code{studies_find_trees}
##' @param ... Currently unused
##' @return \code{list_trees} returns a list of the tree_ids for each
##' study that match the requested criteria. If a \code{study_id}
##' is provided, then only the trees for this study are returned
##' as a vector.
##' @seealso \code{\link{studies_find_studies}} and
##' \code{\link{studies_find_trees}}. The help for these functions
##' have examples demonstrating the use of \code{list_trees}.
##' @export
list_trees <- function(matched_studies, ...) UseMethod("list_trees")
##' @rdname list_trees
##' @export
list_trees.matched_studies <- function(matched_studies, study_id, ...) {
res <- attr(matched_studies, "found_trees")
if (missing(study_id)) {
res
} else {
if (is.na(match(study_id, names(res))))
stop(sQuote(study_id), " isn't a valid id.")
else
res[[study_id]]
}
}
##' @export
##' @rdname get_study_meta
get_tree_ids <- function(sm) UseMethod("get_tree_ids")
##' @export
##' @rdname get_study_meta
get_publication <- function(sm) UseMethod("get_publication")
##' @export
##' @rdname get_study_meta
candidate_for_synth <- function(sm) UseMethod("candidate_for_synth")
##' @export
##' @rdname get_study_meta
get_study_year <- function(sm) UseMethod("get_study_year")
##' @export
##' @rdname get_study_meta
get_tree_ids.study_meta <- function(sm) {
unlist(sm[["nexml"]][["treesById"]][[sm[["nexml"]][["^ot:treesElementOrder"]][[1]]]][["^ot:treeElementOrder"]])
}
##' @export
##' @rdname get_study_meta
get_publication.study_meta <- function(sm) {
pub <- sm[["nexml"]][["^ot:studyPublicationReference"]]
attr(pub, "DOI") <- sm[["nexml"]][["^ot:studyPublication"]][["@href"]]
pub
}
##' @export
##' @rdname get_study_meta
candidate_for_synth.study_meta <- function(sm) {
unlist(sm[["nexml"]][["^ot:candidateTreeForSynthesis"]])
}
##' @export
##' @rdname get_study_meta
get_study_year.study_meta <- function(sm) {
sm[["nexml"]][["^ot:studyYear"]]
}
rotl/R/rotl-package.R 0000644 0001775 0000144 00000003740 12554042726 014462 0 ustar deepayan users ##' An Interface to the Open Tree of Life API
##'
##' The Open Tree of Life is an NSF funded project that is generating
##' an online, comprehensive phylogenetic tree for 1.8 million
##' species. \code{rotl} provides an interface that allows you to
##' query and retrive the parts of the tree of life that is of
##' interest to you.
##'
##' \code{rotl} provides function to most of the end points the API
##' provides. The documentation of the API is available at:
##' \url{https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs}
##'
##' @section Customizing API calls:
##'
##' All functions that use API end points can take 2 arguments to
##' customize the API call and are passed as \code{...} arguments.
##'
##' \itemize{
##'
##' \item{ \code{otl_v} } { This argument controls which version
##' of the API your call is using. The default value for this
##' argument is a call to the non-exported function
##' \code{otl_version()} which returns the current version of the
##' Open Tree of Life APIs (v2).}
##'
##' \item{ \code{dev_url} } { This argument controls whether to use
##' the development version of the API. By default, \code{dev_url}
##' is set to \code{FALSE}, using \code{dev_url = TRUE} in your
##' function calls will use the development version.}
##'
##' }
##'
##' For example, to use the development version of the API, you
##' could use: \code{tnrs_match_names("anas", dev_url=TRUE)}
##'
##' Additional arguments can also be passed to the
##' \code{\link[httr]{GET}} and \code{\link[httr]{POST}} methods.
##'
##'
##' @section Acknowledgments:
##'
##' This package was started during the Open Tree of Life
##' \href{http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/}{Hackathon}
##' organized by OpenTree, the NESCent Hackathon Interoperability
##' Phylogenetic group, and Arbor.
##'
##' @name rotl
##' @docType package
##' @import ape
NULL
rotl/vignettes/ 0000755 0001775 0000144 00000000000 13056407503 013565 5 ustar deepayan users rotl/vignettes/data_mashups.Rmd 0000644 0001775 0000144 00000017643 12706240532 016713 0 ustar deepayan users ---
title: "Connecting data to Open Tree trees"
author: "David Winter"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Connecting data to Open Tree trees}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
## Combining data from OToL and other sources.
One of the major goals of `rotl` is to help users combine data from other
sources with the phylogenetic trees in the Open Tree database. This examples
document describes some of the ways in whih a user might connect data to trees
from Open Tree.
## Get Open Tree IDs to match your data.
Let's say you have a dataset where each row represents a measurement taken from
one species, and your goal is to put these measurements in some phylogenetic
context. Here's a small example: the best estimate of the mutation rate for a
set of unicellular Eukaryotes along with some other property of those species
which might explain the mutation rate:
```{r, data}
csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
mu <- read.csv(csv_path, stringsAsFactors=FALSE)
mu
```
If we want to get a tree for these species we need to start by finding the
unique ID for each of these species in the Open Tree database. We can use the
Taxonomic Name Resolution Service (`tnrs`) functions to do this. Before we do
that we should see if any of the taxonomic contexts, which can be used to narrow
a search and avoid conflicts between different codes, apply to our group of species:
```{r, context}
library(rotl)
tnrs_contexts()
```
Hmm, none of those groups contain all of our species. In this case we can
search using the `All life` context and the function `tnrs_match_names`:
```{r, match}
taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
knitr::kable(taxon_search)
```
Good, all of our species are known to Open Tree. Note, though, that one of the names
is a synonym. _Saccharomyces pombe_ is older name for what is now called
_Schizosaccharomyces pombe_. As the name suggests, the Taxonomic Name
Resolution Service is designed to deal with these problems (and similar ones
like misspellings), but it is always a good idea to check the results of
`tnrs_match_names` closely to ensure the results are what you expect.
In this case we have a good ID for each of our species so we can move on. Before
we do that, let's ensure we can match up our original data to the Open Tree
names and IDs by adding them to our `data.frame`:
```{r, munge}
mu$ott_name <- taxon_search$unique_name
mu$ott_id <- taxon_search$ott_id
```
## Find a tree with your taxa
Now let's find a tree. There are two possible options here: we can search for
published studies that include our taxa or we can use the 'synthetic tree' from
Open Tree. We can try both approaches.
### Published trees
Before we can search for published studies or trees, we should check out the
list of properties we can use to perform such searches:
```{r, properties}
studies_properties()
```
We have `ottIds` for our taxa, so let's use those IDs to search for trees that
contain them. Starting with our first species _Tetrahymena thermophila_ we can
use `studies_find_trees` to do this search.
```{r taxon_count}
studies_find_trees(property="ot:ottId", value="180195")
```
Well... that's not very promising. We can repeat that process for all of the IDs
to see if the other species are better represented.
```{r, all_taxa_count}
hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
sapply(hits, function(x) sum(x[["n_matched_trees"]]))
```
OK, most of our species are not in any of the published trees available. You can
help fix this sort of problem by [making sure you submit your published trees to
Open Tree](https://tree.opentreeoflife.org/curator).
### A part of the synthesis tree
Thankfully, we can still use the complete Tree of Life made from the
combined results of all of the published trees and taxonomies that go into Open
Tree. The function `tol_induced_subtree` will fetch a tree relating a set of IDs.
Using the default arguments you can get a tree object into your R session:
```{r subtree, fig.width=7, fig.height=4}
tr <- tol_induced_subtree(ott_ids=mu$ott_id)
plot(tr)
```
### Connect your data to the tips of your tree
Now we have a tree for of our species, how can we use the tree and the data
together?
The package `phylobase` provide an object class called `phylo4d`, which is
designed to represent a phylogeny and data associated with its tips. In oder to
get our tree and data into one of these objects we have to make sure the labels
in the tree and in our data match exactly. That's not quite the case at the
moment (tree labels have underscores and IDs appended):
```{r, match_names}
mu$ott_name[1]
tr$tip.label[4]
```
`rotl` provides a convienence function `strip_ott_ids` to deal with these.
```{r, sub}
tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label %in% mu$ott_name
```
Ok, now the tips are together we can make a new dataset. The `phylo4d()`
functions matches tip labels to the row names of a `data.frame`, so let's make
a new dataset that contains just the relevant data and has row names to match
the tree
```{r phylobase}
library(phylobase)
mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
rownames(mu_numeric) <- mu$ott_name
tree_data <- phylo4d(tr, mu_numeric)
```
And now we can plot the data and the tree together
```{r, fig.width=7, fig.height=5}
plot(tree_data)
```
##Find external data associated with studies, trees and taxa from Open Tree
In the above example we looked for a tree that related species in another dataset.
Now we will go the other way, and try to find data associated with Open Tree records
in other databases.
### Get external data from a study
Let's imagine you were interested in extending or reproducing the results of a
published study. If that study is included in Open Tree you can find it via
`studies_find_studies` or `studies_find_trees` and retrieve the published trees
with `get_study`. `rotl` will also help you find external. The function
`study_external_IDs` retrieves the DOI for a given study, and uses that to
gather some more data:
```{r}
extra_data <- study_external_IDs("pg_1980")
extra_data
```
Here the returned object contains an `external_data_url` (in this case a link to
the study in Treebase), a pubmed ID for the paper and a vector IDs for the
NCBI's nuleotide database. The packages `treebase` and `rentrez` provide
functions to make use of these IDs within R.
As an example, let's use `rentrez` to download the first two DNA seqences and
print them.
```{r}
library(rentrez)
seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
cat(seqs)
```
You could further process these sequences in R with the function `read.dna` from
`ape` or save them to disk by specifying a file name with `cat`.
### Find a OTT taxon in another taxonomic database
It is also possible map an Open Tree taxon to a record in another taxonomic
database. For instance, if we wanted to search for data about one of the tips of
the sub-tree we fetched in the example above we could do so using
`taxon_external_IDs`:
```{r}
Tt_ids <- taxon_external_IDs(mu$ott_id[2])
Tt_ids
```
A user could then use `rgbif` to find locality records using the gbif ID or
`rentrez` to get genetic or bibliometric data about from the NCBI's databases.
## What next
The demonstration gets you to the point of visualizing your data in a
phylogenetic context. But there's a lot more you do with this sort of data in R.
For instance, you could use packages like `ape`, `caper`, `phytools` and
`mcmcGLMM` to perform phylogenetic comparative analyses of your data. You could
gather more data on your species using packages that connect to
trait databases like `rfishbase`, `AntWeb` or `rnpn` which provides data from
the US National Phenology Network. You could also use `rentrez` to find genetic
data for each of your species, and use that data to generate branch lengths for
the phylogeny.
rotl/vignettes/meta-analysis.Rmd 0000644 0001775 0000144 00000022407 13056077352 017012 0 ustar deepayan users ---
title: "Using the Open Tree synthesis in a comparative analysis"
author: "David Winter"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Using the Open Tree synthesis in a comparative analysis}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
## Phylogenetic Comparative Methods
The development of phylogenetic comparative methods has made phylogenies and
important source of data in fields as diverse as ecology, genomic and medicine.
Comparative methods can be used to investigate patterns in the evolution of
traits or the diversification of lineages. In other cases a phylogeny is treated
as a "nuisance parameter", allowing with the autocorrelation created by the shared
evolutionary history of the different species included to be controlled for.
In many cases finding a tree that relates the species for which trait data are
available is a rate-limiting step in such comparative analyses. Here we show
how the synthetic tree provided by Open Tree of Life (and made available in R via
`rotl`) can help to fill this gap.
## A phylogenetic meta-analysis
To demonstrate the use of `rotl` in a comparative analysis, we will partially
reproduce the results of [Rutkowska _et al_ 2014](dx.doi.org/10.1111/jeb.12282).
Very briefly, this study is a meta-analysis summarising the results of multiple
studies testing for systematic differences in the size of eggs which contain
male and female offspring. Such a difference might mean that birds invest more
heavily in one sex than the other.
Because this study involves data from 51 different species, Rutkowska _et al_
used a phylogenetic comparative approach to account for the shared evolutionary
history among some of the studied-species.
### Gather the data
If we are going to reproduce this analysis, we will first need to gather the
data. Thankfully, the data is available as supplementary material from the
publisher's website. We can collect the data from using `fulltext` (with the
papers DOI as input) and read it into memory with `gdata`:
```{r egg_data, cache=TRUE}
library(rotl)
if (require(readxl) && require(fulltext)) {
doi <- "10.1111/jeb.12282"
xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
egg_data <- read_excel(xl_file)
} else {
egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
}
head(egg_data)
```
The most important variable in this dataset is `Zr`, which is a [normalized
effect size](https://en.wikipedia.org/wiki/Fisher_transformation) for difference
in size between eggs that contain males and females. Values close to zero come
from studies that found the sex of an egg's inhabitant had little effect in its size,
while large positive or negative values correspond to studies with substantial
sex biases (towards males and females respectively). Since this is a
meta-analysis we should produce the classic [funnel plot](https://en.wikipedia.org/wiki/Funnel_plot)
with effects-size on the y-axis and precision (the inverse of the sample
standard error) on the x-axis. Here we calculate precision from the sample
variance (`Vzr`):
```{r eggs_in_a_funnel, fig.width=6, fig.height=3}
plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
ylab="Effect size (Zr)",
xlab="Precision (1/SE)",
main="Effect sizes for sex bias in egg size among 51 brid species" )
```
In order to use this data later on we need to first convert it to a standard
`data.frame`. We can also convert the `animal` column (the species names) to
lower case which will make it easier to match names later on:
```{r, clean_eggs}
egg_data <- as.data.frame(egg_data)
egg_data$animal <- tolower(egg_data$animal)
```
### Find the species in OTT
We can use the OTL synthesis tree to relate these species. To do so we first need to
find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the
Taxonomic Name Resolution Service function `tnrs_match_names`:
```{r, birds, cache=TRUE}
taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
head(taxa)
```
All of these species are in OTT, but a few of them go by different names in the
Open Tree than we have in our data set. Because the tree `rotl` fetches
will have Open Tree names, we need to create a named vector that maps the names
we have for each species to the names Open Tree uses for them:
```{r bird_map}
taxon_map <- structure(taxa$search_string, names=taxa$unique_name)
```
Now we can use this map to retrieve "data set names" from "OTT names":
```{r odd_duck}
taxon_map["Anser caerulescens"]
```
### Get a tree
Now we can get the tree. There are really too many tips here to show nicely, so
we will leave them out of this plot
```{r birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'}
tr <- tol_induced_subtree(taxa$ott_id)
plot(tr, show.tip.label=FALSE)
```
There are a few things to note here. First, the tree has not branch lengths.
At present this is true for the whole of the Open Tree synthetic tree. Some
comparative methods require either branch lengths or an ultrametric tree. Before
you can use one of those methods you will need to get a tree with branch
lengths. You could try looking for published trees made available by the Open
Tree with `studies_find_trees`. Alternatively, you could estimate branch lengths
from the toplogy of a phylogeny returned by `tol_induced_subtree`, perhaps by
downloading DNA sequences from the NCBI with `rentrez` or "hanging" the tree on
nodes of known-age using penalized likelihood method in `ape::chronos`.
In this case, we will use only the topology of the tree as input to our
comparative analysis, so we can skip these steps.
Second, the tip labels contain OTT IDs, which means they will not perfectly
match the species names in our dataset or the taxon map that we created earlier:
```{r tip_lab}
tr$tip.label[1:4]
```
Finally, the tree contains node labels for those nodes that match a higher taxonomic
group, and empty character vectors (`""`) for all other nodes. Some
comparative methods either do no expect node labels at all, or require all
labeled nodes to have a unique name (meaning multiple "empty" labels will cause
and error).
We can deal with all these details easily. `rotl` provides the convenience
function `strip_ott_ids` to remove the extra information from the tip labels.
With the IDs removed, we can use our taxon map to replace the tip labels in the tree
with the species names from dataset.
```{r clean_tips}
otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
tr$tip.label <- taxon_map[ otl_tips ]
```
Finally, we can remove the node labels by setting the `node.label` attribute of
the tree to `NULL`.
```{r remove_nodes}
tr$node.label <- NULL
```
### Perform the meta-analysis
Now we have data and a tree, and we know the names in the tree match the ones in
the data. It's time to do the comparative analysis. Rutkowska _et al_. used `MCMCglmm`, a
Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis,
and we will do the same. Of course, to properly analyse these data you would
take some care in deciding on the appropriate priors to use and inspect the
results carefully. In this case, we are really interested in using this as a
demonstration, so we will just run a simple model.
Specifically we sill fit a model where the only variable that might explain the
values of `Zr` is the random factor `animal`, which corresponds to the
phylogenetic relationships among species. We also provide `Zvr` as the measurement
error variance, effectively adding extra weight to the results of more powerful
studies. Here's how we specify and fit that model with `MCMCglmm`:
```{r model}
library(MCMCglmm, quiet=TRUE)
set.seed(123)
pr<-list(R=list(V=1,nu=0.002),
G=list(G1=list(V=1,nu=0.002))
)
model <- MCMCglmm(Zr~1,random=~animal,
pedigree=tr,
mev=egg_data$VZr,
prior=pr,
data=egg_data,
verbose=FALSE)
```
Now that we have a result we can find out how much phylogenetic signal exists
for sex-biased differences in egg-size. In a multi-level model we can use variance
components to look at this, specifically the proportion of the total variance
that can be explained by phylogeny is called the phylogenetic reliability, _H_. Let's
calculate the _H_ for this model:
```{r PhyH}
var_comps <- colMeans(model$VCV )
var_comps["animal"] / sum(var_comps)
```
It appears there is almost no phylogenetic signal to the data.
The relationships among species explain much less that one percent of the total
variance in the data. If you were wondering, Rutkowska _et al_. report a similar result,
even after adding more predictors to their model most of the variance in `Zr`
was left unexplained.
## What other comparative methods can I use in R?
Here we have demonstrated just one comparative analysis that you might do in R.
There are an ever-growing number of packages that allow an ever-growing number
of analysis to performed in R. Some "classics" like ancestral state
reconstruction, phylogenetic independent contrasts and lineage through time plots
are implemented in `ape`. Packages like `phytools`, `caper` and `diversitree`
provide extensions to these methods. The [CRAN Phylogenetics Taskview](https://CRAN.R-project.org/view=Phylogenetics)
gives a good idea of the diversity of packages and analyses that can be
completed in R.
rotl/vignettes/how-to-use-rotl.Rmd 0000644 0001775 0000144 00000025531 13056073735 017232 0 ustar deepayan users ---
title: "How to use rotl?"
author: "François Michonneau"
date: "`r Sys.Date()`"
output:
rmarkdown::html_vignette:
css: vignette.css
vignette: >
%\VignetteIndexEntry{How to use rotl?}
%\VignetteEngine{knitr::rmarkdown}
\usepackage[utf8]{inputenc}
---
`rotl` provides an interface to the Open Tree of Life (OTL) API and allows users
to query the API, retrieve parts of the Tree of Life and integrate these parts
with other R packages.
The OTL API provides services to access:
* the **Tree of Life** a.k.a. TOL (the synthetic tree): a single draft tree that is
a combination of **the OTL taxonomy** and the **source trees** (studies)
* the **Taxonomic name resolution services** a.k.a. TNRS: the methods for
resolving taxonomic names to the internal identifiers used by the TOL and the
GOL (the `ott ids`).
* the **Taxonomy** a.k.a. OTT (for Open Tree Taxonomy): which represents the
synthesis of the different taxonomies used as a backbone of the TOL when no
studies are available.
* the **Studies** containing the source trees used to build the TOL, and
extracted from the scientific literature.
In `rotl`, each of these services correspond to functions with different
prefixes:
| Service | `rotl` prefix |
|---------------|---------------|
| Tree of Life | `tol_` |
| TNRS | `tnrs_` |
| Taxonomy | `taxonomy_` |
| Studies | `studies_` |
`rotl` also provides a few other functions and methods that can be used to
extract relevant information from the objects returned by these functions.
## Demonstration of a basic workflow
The most common use for `rotl` is probably to start from a list of species and
get the relevant parts of the tree for these species. This is a two step
process:
1. the species names need to be matched to their `ott_id` (the Open Tree
Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)
1. these `ott_id` will then be used to retrieve the relevant parts of the Tree
of Life.
### Step 1: Matching taxonomy to the `ott_id`
Let's start by doing a search on a diverse group of taxa: a tree frog (genus
_Hyla_), a fish (genus _Salmo_), a sea urchin (genus _Diadema_), and a nautilus
(genus _Nautilus_).
```{r}
library(rotl)
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
```
It's always a good idea to check that the resolved names match what you
intended:
`r knitr::kable(resolved_names)`
The column `unique_name` sometimes indicates the higher taxonomic level
associated with the name. The column `number_matches` indicates the number of
`ott_id` that corresponds to a given name. In this example, our search on
_Diadema_ returns 2 matches, and the one returned by default is indeed the sea
urchin that we want for our query. The argument `context_name` allows you to
limit the taxonomic scope of your search. _Diadema_ is also the genus name of a
fungus. To ensure that our search is limited to animal names, we could do:
```{r}
resolved_names <- tnrs_match_names(taxa, context_name = "Animals")
```
If you are trying to build a tree with deeply divergent taxa that the argument
`context_name` cannot fix, see "How to change the ott ids assigned to my taxa?"
in the FAQ below.
### Step 2: Getting the tree corresponding to our taxa
Now that we have the correct `ott_id` for our taxa, we can ask for the tree
using the `tol_induced_subtree()` function. By default, the object returned by
`tol_induced_subtree` is a phylo object (from the
[ape](https://cran.r-project.org/package=ape) package), so we can plot it
directly.
```{r, fig.width=7, fig.height=4}
my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
plot(my_tree, no.margin=TRUE)
```
## FAQ
### How to change the ott ids assigned to my taxa?
If you realize that `tnrs_match_names` assigns the incorrect taxonomic group to
your name (e.g., because of synonymy) and changing the `context_name` does not
help, you can use the function `inspect`. This function takes the object
resulting from `tnrs_match_names()`, and either the row number, the taxon name
(you used in your search in lowercase), or the `ott_id` returned by the initial
query.
To illustrate this, let's re-use the previous query but this time pretending that
we are interested in the fungus _Diadema_ and not the sea urchin:
```{r}
taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
resolved_names <- tnrs_match_names(taxa)
resolved_names
inspect(resolved_names, taxon_name = "diadema")
```
In our case, we want the second row in this data frame to replace the
information that initially matched for _Diadema_. We can now use the `update()`
function, to change to the correct taxa (the fungus not the sea urchin):
```{r}
resolved_names <- update(resolved_names, taxon_name = "diadema",
new_row_number = 2)
## we could also have used the ott_id to replace this taxon:
## resolved_names <- update(resolved_names, taxon_name = "diadema",
## new_ott_id = 4930522)
```
And now our `resolved_names` data frame includes the taxon we want:
`r knitr::kable(resolved_names)`
### How do I know that the taxa I'm asking for is the correct one?
The function `taxonomy_taxon_info()` takes `ott_ids` as arguments and returns
taxonomic information about the taxa. This output can be passed to some helpers
functions to extract the relevant information. Let's illustrate this with our
_Diadema_ example
```{r}
diadema_info <- taxonomy_taxon_info(631176)
tax_rank(diadema_info)
synonyms(diadema_info)
tax_name(diadema_info)
```
In some cases, it might also be useful to investigate the taxonomic tree
descending from an `ott_id` to check that it's the correct taxon and to
determine the species included in the Open Tree Taxonomy:
```{r}
diadema_tax_tree <- taxonomy_subtree(631176)
diadema_tax_tree
```
By default, this function return all taxa (including self, and internal)
descending from this `ott_id` but it also possible to return `phylo` object.
### How do I get the tree for a particular taxonomic group?
If you are looking to get the tree for a particular taxonomic group, you need to
first identify it by its node id or ott id, and then use the `tol_subtree()`
function:
```{r, fig.width=7, fig.height=4}
mono_id <- tnrs_match_names("Monotremata")
mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
plot(mono_tree)
```
### How do I find trees from studies focused on my favourite taxa?
The function `studies_find_trees()` allows the user to search for studies
matching a specific criteria. The function `studies_properties()` returns the
list of properties that can be used in the search.
```{r}
furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
furry_ids <- furry_studies$study_ids
```
Now that we know the `study_id`, we can ask for the meta data information
associated with this study:
```{r}
furry_meta <- get_study_meta("pg_2550")
get_publication(furry_meta) ## The citation for the source of the study
get_tree_ids(furry_meta) ## This study has 10 trees associated with it
candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
```
Using `get_study("pg_2550")` would returns a `multiPhylo` object (default) with
all the trees associated with this particular study, while
`get_study_tree("pg_2550", "tree5513")` would return one of these trees.
### The tree returned by the API has duplicated tip labels, how can I work around it?
You may encounter the following error message:
```
Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
angulata) has already been encountered in this tree. Duplication of taxa in a
tree is prohibited.
```
This message occurs as duplicate labels are not allowed in the NEXUS format and
it is stricly enforced by the part of the code used by `rotl` to import the
trees in memory.
If you use a version of `rotl` more recent than 0.4.1, this should not happen by
default for the function `get_study_tree`. If it happens with another function,
please [let us know](https://github.com/ropensci/rotl/issues).
The easiest way to work around this is to save the tree in a file, and use APE
to read it in memory:
```{r, eval=FALSE}
get_study_tree(study_id="pg_710", tree_id="tree1277",
tip_label='ott_taxon_name', file = "/tmp/tree.tre",
file_format = "newick")
tr <- ape::read.tree(file = "/tmp/tree.tre")
```
### How do I get the higher taxonomy for a given taxa?
If you encounter a taxon name you are not familiar with, it might be useful to
obtain its higher taxonomy to see where it fits in the tree of life. We can
combine several taxonomy methods to extract this information easily.
```{r}
giant_squid <- tnrs_match_names("Architeuthis")
tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
```
### Why are OTT IDs discovered with `rotl` missing from an induced subtree?
Some taxonomic names that can be retrieved through the taxonomic name
resolution service are not part of the Open Tree's synthesis tree. These are
usually traditional higher-level taxa that have been found to be paraphyletic.
For instance, if you wanted to fetch a tree relating the three birds that go
into a [Turkducken](https://en.wikipedia.org/wiki/Turducken) as well as the pork
used for stuffing, you might search for the turkey, duck, chicken, and pork
genera:
```{r}
turducken <- c("Meleagris", "Anas", "Gallus", "Sus")
taxa <- tnrs_match_names(turducken, context="Animals")
taxa
```
We have the OTT ids for each genus, however, if we tried to get the induced
subtree from these results, we would get an error:
```{r, error=TRUE}
tr <- tol_induced_subtree(ott_id(taxa))
```
As the error message suggests, some of the taxa are not found in the synthetic
tree. This occurs for 2 main reasons: either the taxa is invalid, or it is part
of a group that is not monophyletic in the synthetic tree. There are two ways to
get around this issue: (1) removing the taxa that are not part of the Open Tree;
(2) using the complete species name.
#### Removing the taxa missing from the synthetic tree
To help with this situation, `rotl` provides a way to identify the OTT ids that
are not part of the synthetic tree. The function `is_in_tree()` takes the output
of the `ott_id()` function and returns a vector of logical indicating whether
the taxa are part of the synthetic tree. We can then use to only keep the taxa that appear in the synthetic tree:
```{r}
in_tree <- is_in_tree(ott_id(taxa))
in_tree
tr <- tol_induced_subtree(ott_id(taxa)[in_tree])
```
#### Using the full taxonomic names
The best way to avoid these problems is to specify complete species names
(species being the lowest level of classification in the Open Tree taxonomy they
are guaranteed to be monophyletic):
```{r, fig.width=7, fig.height=4}
turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus", "Sus scrofa")
taxa <- tnrs_match_names(turducken_spp, context="Animals")
tr <- tol_induced_subtree(ott_id(taxa))
plot(tr)
```
rotl/vignettes/vignette.css 0000644 0001775 0000144 00000007266 12647504570 016146 0 ustar deepayan users body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#header {
text-align: center;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
/*white-space: pre-wrap; /* Wrap long lines */
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
/* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css
Colours from https://gist.github.com/robsimmons/1172277 */
code > span.kw { color: #555; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal (decimal values) */
code > span.bn { color: #d14; } /* BaseN */
code > span.fl { color: #d14; } /* Float */
code > span.ch { color: #d14; } /* Char */
code > span.st { color: #d14; } /* String */
code > span.co { color: #888888; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* OtherToken */
code > span.al { color: #ff0000; font-weight: bold; } /* AlertToken */
code > span.fu { color: #900; font-weight: bold; } /* Function calls */
code > span.er { color: #a61717; background-color: #e3d2d2; } /* ErrorTok */
rotl/README.md 0000644 0001775 0000144 00000012632 13056116500 013032 0 ustar deepayan users
[](https://travis-ci.org/ropensci/rotl) [](https://ci.appveyor.com/project/fmichonneau/rotl) [](https://codecov.io/github/ropensci/rotl?branch=master) [](http://www.r-pkg.org/pkg/rotl) [](http://www.r-pkg.org/pkg/rotl) [](http://depsy.org/package/r/rotl)
An R interface to Open Tree API
===============================
`rotl` is an R package to interact with the Open Tree of Life data APIs. It was initially developed as part of the [NESCENT/OpenTree/Arbor hackathon](http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/).
Client libraries to interact with the Open Tree of Life API also exists for [Python](https://github.com/OpenTreeOfLife/pyopentree) and [Ruby](https://github.com/SpeciesFileGroup/bark).
Installation
------------
The current stable version is available from CRAN, and can be installed by typing the following at the prompt in R:
``` r
install.packages("rotl")
```
If you want to test the development version, you first need to install [ghit](https://github.com/cloudyr/ghit) (`ghit` is a more lightweight version of [devtools](https://github.com/hadley/devtools) if your sole purpose is to install packages that are hosted on GitHub).
``` r
install.packages("ghit")
```
Then you can install `rotl` using:
``` r
library(ghit) # or library(devtools)
install_github("ropensci/rotl")
```
Vignettes
---------
There are three vignettes:
- Start by checking out the "How to use `rotl`?" by typing: `vignette("how-to-use-rotl", package="rotl")` after installing the package.
- Then explore how you can use `rotl` with other packages to combine your data with trees from the Open Tree of Life project by typing: `vignette("data_mashups", package="rotl")`.
- The vignette "Using the Open Tree Synthesis in a comparative analsysis" demonstrates how you can reproduce an analysis of a published paper by downloading the tree they used, and data from the supplementary material: `vignette("meta-analysis", package="rotl")`.
The vignettes are also available from CRAN: [How to use `rotl`?](https://cran.r-project.org/package=rotl/vignettes/how-to-use-rotl.html), [Data mashups](https://cran.r-project.org/package=rotl/vignettes/data_mashups.html), and [Using the Open Tree synthesis in a comparative analysis](https://cran.r-project.org/package=rotl/vignettes/meta-analysis.html).
Quick start
-----------
### Get a little bit of the big Open Tree tree
Taxonomic names are represented in the Open Tree by numeric identifiers, the `ott_ids` (Open Tree Taxonomy identifiers). To extract a portion of a tree from the Open Tree, you first need to find `ott_ids` for a set of names using the `tnrs_match_names` function:
``` r
library(rotl)
```
## Warning: package 'rotl' was built under R version 3.4.0
``` r
apes <- c("Pan", "Pongo", "Pan", "Gorilla", "Hoolock", "Homo")
(resolved_names <- tnrs_match_names(apes))
```
## search_string unique_name approximate_match ott_id is_synonym flags
## 1 pan Pan FALSE 417957 FALSE
## 2 pongo Pongo FALSE 417949 FALSE
## 3 pan Pan FALSE 417957 FALSE
## 4 gorilla Gorilla FALSE 417969 FALSE
## 5 hoolock Hoolock FALSE 712902 FALSE
## 6 homo Homo FALSE 770309 FALSE
## number_matches
## 1 2
## 2 2
## 3 2
## 4 1
## 5 1
## 6 1
Now we can get the tree with just those tips:
``` r
tr <- tol_induced_subtree(ott_ids=ott_id(resolved_names))
plot(tr)
```

The code above can be summarized in a single pipe:
``` r
library(magrittr)
## or expressed as a pipe:
c("Pan", "Pongo", "Pan", "Gorilla", "Hoolock", "Homo") %>%
tnrs_match_names %>%
ott_id %>%
tol_induced_subtree %>%
plot
```

Versioning
----------
Starting with v3.0.0 of the package, the major and minor version numbers (the first 2 digits of the version number) will be matched to those of the API. The patch number (the 3rd digit of the version number) will be used to reflect bug fixes and other changes that are independent from changes to the API.
`rotl` can be used to access other versions of the API (if they are available) but most likely the high level functions will not work. Instead, you will need to parse the output yourself using the "raw" returns from the unexported low-level functions (all prefixed with a `.`). For instance to use the `tnrs/match_names` endpoint for `v2` of the API:
``` r
rotl:::.tnrs_match_names(c("pan", "pango", "gorilla", "hoolock", "homo"), otl_v="v2")
```
### Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
[](http://ropensci.org)
rotl/MD5 0000644 0001775 0000144 00000011575 13056457107 012103 0 ustar deepayan users bef13507a27f905c474b5ac48e21dc51 *DESCRIPTION
41ba68f62ff5f58f05f96a0dbd08b282 *LICENSE
1b572ebec7a85682af97cf46a8954c85 *NAMESPACE
f3ff4e9fdc6e6eaf6b8b101d5f9dec82 *NEWS.md
9a7c4a54d4f95926b66ed6e06476df86 *R/api-collections.R
9bd92e4fd5729c1c914772ea6051c0d5 *R/api-studies.R
cf7e3dc77115551f7a666294970d090c *R/api-taxonomy.R
08064573a536c31daf9a479eec383e25 *R/api-tnrs.R
9049ace110a1de5d23d81cc09e1022b5 *R/api-tol.R
f1d6f185eb969bcb30e8fa01828389b9 *R/base.R
6286c2d7b82011d166cf7e0b68391d82 *R/deduplicate_labels.R
71769e61f96290be93a642ed5e7e1bf1 *R/external_data.R
67afa5276ae39353db1be4c9748202e4 *R/is_in_tree.R
8f6a526c44396e5afe7a980ea0116d39 *R/match_names.R
dc83309cfcec969f308dc5ef128b24c8 *R/methods.R
885012a0fef7e432f1e3ab49ac05a621 *R/rotl-package.R
ecec63dfc2513887fcc35f62ad8bbf10 *R/studies-methods.R
e3689dd313ea6b632450c24f11ec56e8 *R/studies-utils.R
c2833f9705fb7e369e1678a37d87250b *R/studies.R
f1f397f5739142664c28e4068893be34 *R/tax_utils.R
3c36224fa41a50053831efb15ff2675c *R/taxonomy.R
2b2d1c28d1e4b229a59780ba94059a47 *R/tnrs.R
a884ca1e7339f1d913dc058a4d4a3653 *R/tol.R
cadefaffbca9725850fbc2218753179a *R/tree_to_labels.R
41bc3a21bc29333d534950c8e8ac0084 *README.md
f9b8ccf74106a49197ffc1dbc4e04e75 *build/vignette.rds
f0fd4ea89eb96fe8746602e268fbf502 *inst/CITATION
319582efa0a202fed973883555cf0e51 *inst/doc/data_mashups.R
64c62d69bc9cec722c7f26e6d025ef35 *inst/doc/data_mashups.Rmd
11af7fe6047e439efb39b4eaace09c9b *inst/doc/data_mashups.html
4f8c602b56730ad3c3a97caaf33d4719 *inst/doc/how-to-use-rotl.R
8001eb5bddaaf93285424d383c3195f3 *inst/doc/how-to-use-rotl.Rmd
1b8e7ff8dc6a4dd14cab710f443bc71d *inst/doc/how-to-use-rotl.html
b5e8ba63b31d0baf609e763d168b8b14 *inst/doc/meta-analysis.R
ef518f65da0362f9bf5b5fefa5329cd0 *inst/doc/meta-analysis.Rmd
171d4496da0b82d1f9dbf22220fd2d91 *inst/doc/meta-analysis.html
09d93b49b2b38f2495761ea86e134006 *inst/extdata/egg.csv
5e5eb20ee387ba7192c62e629a3383f2 *inst/extdata/protist_mutation_rates.csv
f30954a01d7420beb303c1916ea08971 *man/get_study.Rd
d261a5dcd926401050929be39c1ed44d *man/get_study_meta.Rd
9297cffa642515239d15c2efebdce3a2 *man/get_study_subtree.Rd
f60b580ce8808e50d0479e4d22c6779d *man/get_study_tree.Rd
7376b3c8fbc5c5d83c06e9c2756fbb35 *man/is_in_tree.Rd
8572338a55159b4b45eea44191cbc82f *man/list_trees.Rd
e0386442684f2019f009dbd9dc75b9b3 *man/match_names-methods.Rd
ce1a6b4c35d7ca5209b664938b081d7b *man/match_names.Rd
9f9c58fcedf44fecf5cfc9741ef54a3d *man/rotl.Rd
ea8938c2220f0b358930fbaa9b9af738 *man/source_list.Rd
6310137f447dbe11a8bda6f2a1c1ed40 *man/strip_ott_ids.Rd
5b3f7b8629b906dc79ac5d1de9e11255 *man/studies_find_studies.Rd
ddf9261b772dbcf6846955b1d34b1683 *man/studies_find_trees.Rd
b52b8d1f1701bf1d7d1b4549d56ccc90 *man/studies_properties.Rd
76ee971ef937ff9fe23ceca701b35ce6 *man/study_external_IDs.Rd
39e2c7bacba5befea2f3d8bc6be223a9 *man/synonyms.match_names.Rd
ccb54bd7485bddbe4ee59b4e117a0b21 *man/tax_lineage.Rd
0c0a51d7f1b9365e27f9c84b8095a0da *man/taxon_external_IDs.Rd
5f58b0e589b08000432acb5656fc2be2 *man/taxonomy-methods.Rd
f72255eacbb9eadb1fa2eb513557afaa *man/taxonomy_about.Rd
5fa0e3a349e2d596e24ce621c567497b *man/taxonomy_mrca.Rd
5b43cd3d8577fc8f9df28a7f1276972c *man/taxonomy_subtree.Rd
40ef74245553816685e5c7b07096ec8b *man/taxonomy_taxon_info.Rd
c01395cc8ca421b4e607c110fd714a53 *man/tnrs_contexts.Rd
a16c0de937a7de364fbff3a7e1581c44 *man/tnrs_infer_context.Rd
a3e1aa967d58fadf26e62d458ed075e5 *man/tnrs_match_names.Rd
0399a2876a415a962c1c35f52ad418b6 *man/tol_about.Rd
34b418796c5f85fbfecbe1b663e86197 *man/tol_induced_subtree.Rd
69cc652562e542dc2572068edbe28d6c *man/tol_mrca.Rd
16bd284ab4cf10574bcd7700d074266d *man/tol_node_info.Rd
a520ea68a2b1b4943a0941832761beae *man/tol_subtree.Rd
43c8ae96b072c5ffa02ee1476ab517af *tests/test-all.R
8aa44593dfd95bba9fc05fe726562a4f *tests/testthat/test-API.R
0978d0c6f16459de48230f0ca7242888 *tests/testthat/test-api-studies.R
35dc779abfea3d163251ad3ba9681b06 *tests/testthat/test-api-taxonomy.R
620a4b277bc83dc51d84c7dc4416b32d *tests/testthat/test-api-tnrs.R
eec189b1b610d1fab12164c96ba55ff5 *tests/testthat/test-api-tol.R
9a25dd598f4420dc68f2280ce323c367 *tests/testthat/test-base.R
91e80d240d100527a3496b9c2040c771 *tests/testthat/test-deduplicate_labels.R
51c9a683157c0ca97ca7fa7656dbf4a2 *tests/testthat/test-external.R
47dd8941c039e76be1a91266434680b4 *tests/testthat/test-match_names.R
4ca3762a30b73adc31d149a81e15c3a4 *tests/testthat/test-studies.R
d4eb07768af70ce855fbad1323a0994e *tests/testthat/test-taxonomy.R
57bcbfabee0c58d895dce12a81166027 *tests/testthat/test-tnrs.R
da7d30c00521dcf6ebaa16a1ddd5dc50 *tests/testthat/test-tol.R
85a49232f1097e80d7f059bea79d885d *tests/testthat/test-tree_to_labels.R
fb4b19651907e0fdb26e6f4c0581af83 *tests/tree_of_life.json
64c62d69bc9cec722c7f26e6d025ef35 *vignettes/data_mashups.Rmd
8001eb5bddaaf93285424d383c3195f3 *vignettes/how-to-use-rotl.Rmd
ef518f65da0362f9bf5b5fefa5329cd0 *vignettes/meta-analysis.Rmd
da8cb33974fa673158fea8ccf2d8bbac *vignettes/vignette.css
rotl/build/ 0000755 0001775 0000144 00000000000 13056407503 012654 5 ustar deepayan users rotl/build/vignette.rds 0000644 0001775 0000144 00000000473 13056407503 015217 0 ustar deepayan users R=O0uPh%>J{22PUH T$6d%ۑeC86C.{^<'D$#xل$df33]n=lS#ӭTISR&Xjr^H! 7xIt l-*Ԫ\ՓͦAPV2K0L3d,W^;b ;!!ING~Ywwl{V<0L`SWLU
Me~avljphrtTB3%̋ۻDyʹִUa?f_b rotl/DESCRIPTION 0000644 0001775 0000144 00000002415 13056457107 013272 0 ustar deepayan users Package: rotl
Title: Interface to the 'Open Tree of Life' API
Version: 3.0.3
Authors@R: c(
person("Francois", "Michonneau", role=c("aut", "cre"),
email="francois.michonneau@gmail.com"),
person("Joseph", "Brown", role="aut"),
person("David", "Winter", role="aut"))
Description: An interface to the 'Open Tree of Life' API to retrieve
phylogenetic trees, information about studies used to assemble the synthetic
tree, and utilities to match taxonomic names to 'Open Tree identifiers'. The
'Open Tree of Life' aims at assembling a comprehensive phylogenetic tree for all
named species.
URL: https://github.com/ropensci/rotl
BugReports: https://github.com/ropensci/rotl/issues
Depends: R (>= 3.1.1)
Imports: httr, jsonlite, assertthat (>= 0.1), rncl (>= 0.6.0), ape,
rentrez
License: BSD_2_clause + file LICENSE
Suggests: knitr (>= 1.12), rmarkdown (>= 0.7), testthat, RNeXML,
phylobase, MCMCglmm, fulltext (>= 0.1.6), readxl
VignetteBuilder: knitr
LazyData: true
RoxygenNote: 6.0.1
NeedsCompilation: no
Packaged: 2017-03-04 00:50:11 UTC; francois
Author: Francois Michonneau [aut, cre],
Joseph Brown [aut],
David Winter [aut]
Maintainer: Francois Michonneau
Repository: CRAN
Date/Publication: 2017-03-04 07:27:19
rotl/man/ 0000755 0001775 0000144 00000000000 13056407503 012330 5 ustar deepayan users rotl/man/rotl.Rd 0000644 0001775 0000144 00000003702 12670356150 013603 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rotl-package.R
\docType{package}
\name{rotl}
\alias{rotl}
\alias{rotl-package}
\title{An Interface to the Open Tree of Life API}
\description{
The Open Tree of Life is an NSF funded project that is generating
an online, comprehensive phylogenetic tree for 1.8 million
species. \code{rotl} provides an interface that allows you to
query and retrive the parts of the tree of life that is of
interest to you.
}
\details{
\code{rotl} provides function to most of the end points the API
provides. The documentation of the API is available at:
\url{https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs}
}
\section{Customizing API calls}{
All functions that use API end points can take 2 arguments to
customize the API call and are passed as \code{...} arguments.
\itemize{
\item{ \code{otl_v} } { This argument controls which version
of the API your call is using. The default value for this
argument is a call to the non-exported function
\code{otl_version()} which returns the current version of the
Open Tree of Life APIs (v2).}
\item{ \code{dev_url} } { This argument controls whether to use
the development version of the API. By default, \code{dev_url}
is set to \code{FALSE}, using \code{dev_url = TRUE} in your
function calls will use the development version.}
}
For example, to use the development version of the API, you
could use: \code{tnrs_match_names("anas", dev_url=TRUE)}
Additional arguments can also be passed to the
\code{\link[httr]{GET}} and \code{\link[httr]{POST}} methods.
}
\section{Acknowledgments}{
This package was started during the Open Tree of Life
\href{http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/}{Hackathon}
organized by OpenTree, the NESCent Hackathon Interoperability
Phylogenetic group, and Arbor.
}
rotl/man/tnrs_match_names.Rd 0000644 0001775 0000144 00000006064 13055075704 016155 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tnrs.R
\name{tnrs_match_names}
\alias{tnrs_match_names}
\title{Match names to the Open Tree Taxonomy}
\usage{
tnrs_match_names(names = NULL, context_name = NULL,
do_approximate_matching = TRUE, ids = NULL, include_suppressed = FALSE,
...)
}
\arguments{
\item{names}{taxon names to be queried. Currently limited to
10,000 names for exact matches and 2,500 names for approximate
matches (character vector)}
\item{context_name}{name of the taxonomic context to be searched
(length-one character vector). Must match (case sensitive) one
of the values returned by \code{\link{tnrs_contexts}}.}
\item{do_approximate_matching}{A logical indicating whether or not
to perform approximate string (a.k.a. \dQuote{fuzzy})
matching. Using \code{FALSE} will greatly improve
speed. Default, however, is \code{TRUE}.}
\item{ids}{A vector of ids to use for identifying names. These
will be assigned to each name in the names array. If ids is
provided, then ids and names must be identical in length.}
\item{include_suppressed}{Ordinarily, some quasi-taxa, such as
incertae sedis buckets and other non-OTUs, are suppressed from
TNRS results. If this parameter is true, these quasi-taxa are
allowed as possible TNRS results.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
A data frame summarizing the results of the query. The
original query output is appended as an attribute to the
returned object (and can be obtained using \code{attr(object,
"original_response")}).
}
\description{
Match taxonomic names to the Open Tree Taxonomy.
}
\details{
Accepts one or more taxonomic names and returns information about
potential matches for these names to known taxa in the Open Tree
Taxononmy.
This service uses taxonomic contexts to disambiguate homonyms and
misspelled names; a context may be specified using the
\code{context_name} argument. If no context is specified, then the
context will be inferred (i.e., the shallowest taxonomic context
that contains all unambiguous names in the input). Taxonomic
contexts are uncontested higher taxa that have been selected to
allow limits to be applied to the scope of TNRS searches
(e.g. 'match names only within flowering plants'). Once a context
has been identified (either user-specified or inferred), all taxon
name matches will performed only against taxa within that
context. For a list of available taxonomic contexts, see
\code{\link{tnrs_contexts}}.
A name is considered unambiguous if it is not a synonym and has
only one exact match to any taxon name in the entire taxonomy.
Several functions listed in the \sQuote{See also} section can be
used to inspect and manipulate the object generated by this
function.
}
\examples{
\dontrun{
deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha",
"chordata", "hemichordata"))
}
}
\seealso{
\code{\link{inspect.match_names}},
\code{\link{update.match_names}},
\code{\link{synonyms.match_names}}.
}
rotl/man/taxonomy_taxon_info.Rd 0000644 0001775 0000144 00000004445 13055075704 016733 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/taxonomy.R
\name{taxonomy_taxon_info}
\alias{taxonomy_taxon_info}
\alias{tax_rank.taxon_info}
\alias{tax_name.taxon_info}
\alias{unique_name.taxon_info}
\alias{synonyms.taxon_info}
\alias{ott_id.taxon_info}
\alias{tax_sources.taxon_info}
\alias{is_suppressed.taxon_info}
\alias{flags.taxon_info}
\title{Taxon information}
\usage{
taxonomy_taxon_info(ott_ids, include_children = FALSE,
include_lineage = FALSE, include_terminal_descendants = FALSE, ...)
\method{tax_rank}{taxon_info}(tax, ...)
\method{tax_name}{taxon_info}(tax, ...)
\method{unique_name}{taxon_info}(tax, ...)
\method{synonyms}{taxon_info}(tax, ...)
\method{ott_id}{taxon_info}(tax, ...)
\method{tax_sources}{taxon_info}(tax, ...)
\method{is_suppressed}{taxon_info}(tax, ...)
\method{flags}{taxon_info}(tax, ...)
}
\arguments{
\item{ott_ids}{the ott ids of the taxon of interest (numeric or
character containing only numbers)}
\item{include_children}{whether to include information about all
the children of this taxon. Default \code{FALSE}.}
\item{include_lineage}{whether to include information about all
the higher level taxa that include the \code{ott_ids}.
Default \code{FALSE}.}
\item{include_terminal_descendants}{whether to include the list of
terminal \code{ott_ids} contained in the \code{ott_ids}
provided.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
\item{tax}{an object generated by the \code{taxonomy_taxon_info}
function}
}
\value{
\code{taxonomy_taxon_info} returns a list detailing
information about the taxa. \code{tax_rank} and
\code{tax_name} return a vector. \code{synonyms} returns a
list whose elements are the synonyms for each of the
\code{ott_id} requested.
}
\description{
Information about taxa.
}
\details{
Given a vector of ott ids, \code{taxonomy_taxon_info} returns
information about the specified taxa.
The functions \code{tax_rank}, \code{tax_name}, and
\code{synonyms} can extract this information from an object
created by the \code{taxonomy_taxon_info()}.
}
\examples{
\dontrun{
req <- taxonomy_taxon_info(ott_id=515698)
tax_rank(req)
tax_name(req)
synonyms(req)
}
}
\seealso{
\code{\link{tnrs_match_names}} to obtain \code{ott_id}
from a taxonomic name.
}
rotl/man/tnrs_contexts.Rd 0000644 0001775 0000144 00000002005 13055075704 015534 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tnrs.R
\name{tnrs_contexts}
\alias{tnrs_contexts}
\title{TNRS contexts}
\usage{
tnrs_contexts(...)
}
\arguments{
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
Returns invisibly a list for each major clades (e.g.,
animals, microbes, plants, fungi, life) whose elements
contains the possible contexts.
}
\description{
This function returns a list of pre-defined taxonomic contexts
(i.e. clades) which can be used to limit the scope of tnrs
queries.
}
\details{
Taxonomic contexts are available to limit the scope of TNRS
searches. These contexts correspond to uncontested higher taxa
such as 'Animals' or 'Land plants'. This service returns a list
containing all available taxonomic context names, which may be
used as input (via the \code{context_name} argument in other
functions) to limit the search scope of other services including
\code{\link{tnrs_match_names}}.
}
rotl/man/tol_about.Rd 0000644 0001775 0000144 00000007660 13055075704 014623 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tol.R
\name{tol_about}
\alias{tol_about}
\alias{tax_rank.tol_summary}
\alias{tax_sources.tol_summary}
\alias{unique_name.tol_summary}
\alias{tax_name.tol_summary}
\alias{ott_id.tol_summary}
\title{Information about the Tree of Life}
\usage{
tol_about(include_source_list = FALSE, ...)
\method{tax_rank}{tol_summary}(tax, ...)
\method{tax_sources}{tol_summary}(tax, ...)
\method{unique_name}{tol_summary}(tax, ...)
\method{tax_name}{tol_summary}(tax, ...)
\method{ott_id}{tol_summary}(tax, ...)
}
\arguments{
\item{include_source_list}{Logical (default =
\code{FALSE}). Return an ordered list of source trees.}
\item{...}{additional arguments to customize the API call (see
\code{\link{rotl}} for more information).}
\item{tax}{an object created with a call to \code{tol_about}.}
}
\value{
An invisible list of synthetic tree summary statistics:
\itemize{
\item {date_created} {String. The creation date of the tree.}
\item {num_source_studies} {Integer. The number of studies
(publications)used as sources.}
\item {num_source_trees} {The number of trees used as sources
(may be >1 tree per study).}
\item {taxonomy_version} {The Open Tree Taxonomy version used
as a source.}
\item {filtered_flags} {List. Taxa with these taxonomy flags were
not used in construction of the tree.}
\item {root} {List. Describes the root node:}
\itemize{
\item {node_id} {String. The canonical identifier of the node.}
\item {num_tips} {Numeric. The number of descendent tips.}
\item {taxon} {A list of taxonomic properties:}
\itemize{
\item {ott_id} {Numeric. The OpenTree Taxonomy ID (ott_id).}
\item {name} {String. The taxonomic name of the queried node.}
\item {unique_name} {String. The string that uniquely
identifies the taxon in OTT.}
\item {rank} {String. The taxonomic rank of the taxon in OTT.}
\item {tax_sources} {List. A list of identifiers for taxonomic
sources, such as other taxonomies, that define taxa judged
equivalent to this taxon.}
}
}
\item {source_list} {List. Present only if
\code{include_source_list} is \code{TRUE}. The sourceid
ordering is the precedence order for synthesis, with
relationships from earlier trees in the list having priority
over those from later trees in the list. See
\code{source_id_map} below for study details.}
\item {source_id_map} {Named list of lists. Present only if
\code{include_source_list} is \code{TRUE}. Names correspond to
the \sQuote{sourceids} used in \code{source_list}
above. Source trees will have the following properties:}
\itemize{
\item {git_sha} {String. The git SHA identifying a particular source
version.}
\item {tree_id} {String. The tree id associated with the study id used.}
\item {study_id} {String. The study identifier. Will typically include
a prefix ("pg_" or "ot_").}
}
\item {synth_id} {The unique string for this version of the tree.}
}
}
\description{
Basic information about the Open Tree of Life (the synthetic tree)
}
\details{
Summary information about the current draft tree of life,
including information about the list of trees and the taxonomy
used to build it. The object returned by \code{tol_about} can
be passed to the taxonomy methods (\code{tax_name()},
\code{tax_rank()}, \code{tax_sources()}, \code{ott_id}), to
extract relevant taxonomic information for the root of the
synthetic tree.
}
\examples{
\dontrun{
res <- tol_about()
tax_sources(res)
ott_id(res)
studies <- source_list(tol_about(include_source_list=TRUE))}
}
\seealso{
\code{\link{source_list}} to explore the list of studies
used in the synthetic tree (see example).
}
rotl/man/get_study_subtree.Rd 0000644 0001775 0000144 00000004303 13055075704 016362 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies.R
\name{get_study_subtree}
\alias{get_study_subtree}
\title{Study Subtree}
\usage{
get_study_subtree(study_id, tree_id, subtree_id, object_format = c("phylo"),
tip_label = c("original_label", "ott_id", "ott_taxon_name"), file_format,
file, deduplicate = TRUE, ...)
}
\arguments{
\item{study_id}{the study identifier (character)}
\item{tree_id}{the tree identifier (character)}
\item{subtree_id, }{either a node id that specifies a subtree or
\dQuote{ingroup} which returns the ingroup for this subtree.}
\item{object_format}{the class of the object returned by the
function (default, and currently only possibility \code{phylo}
from the \code{\link[ape]{ape}} package)}
\item{tip_label}{the format of the tip
labels. \dQuote{\code{original_label}} (default) returns the
original labels as provided in the study,
\dQuote{\code{ott_id}} labels are replaced by their ott IDs,
\dQuote{\code{ott_taxon_name}} labels are replaced by their
Open Tree Taxonomy taxon name.}
\item{file_format}{character, the file format to use to save the
results of the query (possible values, \sQuote{newick} or
\sQuote{nexus}).}
\item{file}{character, the path and file name where the output
should be written.}
\item{deduplicate}{logical (default \code{TRUE}). If the tree
returned by the study contains duplicated taxon names, should
they be made unique? It is normally illegal for NEXUS/Newick
tree strings to contain duplicated tip names. This is a
workaround to circumvent this requirement. If \code{TRUE},
duplicated tip labels will be appended \code{_1}, \code{_2},
etc.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\description{
Retrieve subtree from a specific tree in the Open Tree of Life data store
}
\examples{
\dontrun{
small_tr <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="node991044")
ingroup <- get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup")
nexus_file <- tempfile(fileext=".nex")
get_study_subtree(study_id="pg_1144", tree_id="tree5800", subtree_id="ingroup", file=nexus_file,
file_format="nexus")
}
}
rotl/man/tol_induced_subtree.Rd 0000644 0001775 0000144 00000003225 13055075704 016646 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tol.R
\name{tol_induced_subtree}
\alias{tol_induced_subtree}
\title{Subtree from the Open Tree of Life}
\usage{
tol_induced_subtree(ott_ids = NULL, node_ids = NULL, label_format = NULL,
file, ...)
}
\arguments{
\item{ott_ids}{Numeric vector. OTT ids indicating nodes to be used
as tips in the induced tree.}
\item{node_ids}{Character vector. Node ids indicating nodes to be used
as tips in the induced tree.}
\item{label_format}{Character. Defines the label type; one of
\dQuote{\code{name}}, \dQuote{\code{id}}, or
\dQuote{\code{name_and_id}} (the default).}
\item{file}{If specified, the function will write the subtree to a
file in newick format.}
\item{...}{additional arguments to customize the API call (see
\code{\link{rotl}} for more information).}
}
\value{
If no value is specified to the \code{file} argument
(default), a phyogenetic tree of class \code{phylo}.
Otherwise, the function returns invisibly a logical indicating
whether the file was successfully created.
}
\description{
Return the induced subtree on the synthetic tree that relates a list of nodes.
}
\details{
Return a tree with tips corresponding to the nodes identified in
the input set that is consistent with the topology of the current
synthetic tree. This tree is equivalent to the minimal subtree
induced on the draft tree by the set of identified nodes.
}
\examples{
\dontrun{
res <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710))
tree_file <- tempfile(fileext=".tre")
tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710),
file=tree_file)}
}
rotl/man/get_study_meta.Rd 0000644 0001775 0000144 00000003736 13055075704 015650 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies-methods.R, R/studies.R
\name{get_tree_ids}
\alias{get_tree_ids}
\alias{get_publication}
\alias{candidate_for_synth}
\alias{get_study_year}
\alias{get_tree_ids.study_meta}
\alias{get_publication.study_meta}
\alias{candidate_for_synth.study_meta}
\alias{get_study_year.study_meta}
\alias{get_study_meta}
\title{Study Metadata}
\usage{
get_tree_ids(sm)
get_publication(sm)
candidate_for_synth(sm)
get_study_year(sm)
\method{get_tree_ids}{study_meta}(sm)
\method{get_publication}{study_meta}(sm)
\method{candidate_for_synth}{study_meta}(sm)
\method{get_study_year}{study_meta}(sm)
get_study_meta(study_id, ...)
}
\arguments{
\item{sm}{an object created by \code{get_study_meta}}
\item{study_id}{the study identifier (character)}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
named-list containing the metadata associated with the
study requested
}
\description{
Retrieve metadata about a study in the Open Tree of Life datastore.
}
\details{
\code{get_study_meta} returns a long list of attributes for the
studies that are contributing to the synthetic tree. To help with
the extraction of relevant information from this list, several
helper functions exists: \itemize{
\item {get_tree_ids} { The identifiers of the trees
associated with the study }
\item {get_publication} { The citation information of the
publication for the study. The DOI (or URL) for the study is
available as an attribute to the returned object (i.e.,
\code{attr(object, "DOI")} ) }.
\item {candidate_for_synth} { The identifier of the tree(s) from
the study used in the synthetic tree. This is a subset of the
result of \code{get_tree_ids}.
\item {get_study_year} { The year of publication of the study. }
}
}
}
\examples{
\dontrun{
req <- get_study_meta("pg_719")
get_tree_ids(req)
candidate_for_synth(req)
get_publication(req)
get_study_year(req)
}
}
rotl/man/match_names-methods.Rd 0000644 0001775 0000144 00000003556 13055075704 016553 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/match_names.R, R/methods.R
\name{ott_id.match_names}
\alias{ott_id.match_names}
\alias{flags.match_names}
\alias{flags}
\title{\code{ott_id} and \code{flags} for taxonomic names matched
by \code{tnrs_match_names}}
\usage{
\method{ott_id}{match_names}(tax, row_number, taxon_name, ott_id, ...)
\method{flags}{match_names}(tax, row_number, taxon_name, ott_id, ...)
flags(tax, ...)
}
\arguments{
\item{tax}{an object returned by \code{\link{tnrs_match_names}}}
\item{row_number}{the row number corresponding to the name for
which to list the synonyms}
\item{taxon_name}{the taxon name corresponding to the name for
which to list the synonyms}
\item{ott_id}{the ott id corresponding to the name for which to
list the synonyms}
\item{...}{currently ignored}
}
\value{
A list of the ott ids or flags for the taxonomic names
matched with \code{\link{tnrs_match_names}}, for either one or
all the names.
}
\description{
\code{rotl} provides a collection of functions that allows users
to extract relevant information from an object generated by
\code{\link{tnrs_match_names}} function.
}
\details{
These methods optionally accept one of the arguments
\code{row_number}, \code{taxon_name} or \code{ott_id} to retrieve
the corresponding information for one of the matches in the object
returned by the \code{\link{tnrs_match_names}} function.
If these arguments are not provided, these methods can return
information for the matches currently listed in the object
returned by \code{\link{tnrs_match_names}}.
}
\examples{
\dontrun{
rsp <- tnrs_match_names(c("Diadema", "Tyrannosaurus"))
rsp$ott_id # ott id for match currently in use
ott_id(rsp) # similar as above but elements are named
## flags() is useful for instance to determine if a taxon is extinct
flags(rsp, taxon_name="Tyrannosaurus")
}
}
rotl/man/taxonomy-methods.Rd 0000644 0001775 0000144 00000001657 13055075704 016152 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{tax_rank}
\alias{tax_rank}
\alias{ott_id}
\alias{synonyms}
\alias{tax_sources}
\alias{is_suppressed}
\alias{unique_name}
\alias{tax_name}
\title{Methods for Taxonomy}
\usage{
tax_rank(tax, ...)
ott_id(tax, ...)
synonyms(tax, ...)
tax_sources(tax, ...)
is_suppressed(tax, ...)
unique_name(tax, ...)
tax_name(tax, ...)
}
\arguments{
\item{tax}{an object returned by \code{\link{taxonomy_taxon_info}},
\code{\link{taxonomy_mrca}}, or \code{\link{tnrs_match_names}}}
\item{...}{additional arguments (see
\code{\link{tnrs_match_names}})}
}
\description{
Methods for dealing with objects containing taxonomic information
(Taxonomy, TNRS endpoints)
}
\details{
This is the page for the generic methods. See the help pages for
\code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, and
\code{\link{tnrs_match_names}} for more information.
}
rotl/man/tax_lineage.Rd 0000644 0001775 0000144 00000001633 13055075704 015105 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R, R/taxonomy.R
\name{tax_lineage}
\alias{tax_lineage}
\alias{tax_lineage.taxon_info}
\title{Lineage of a taxon}
\usage{
tax_lineage(tax, ...)
\method{tax_lineage}{taxon_info}(tax, ...)
}
\arguments{
\item{tax}{an object created by \code{\link{taxonomy_taxon_info}}
using the argument \code{include_lineage=TRUE}.}
\item{...}{additional arguments (currently unused).}
}
\value{
A list with one slot per taxon that contains a data frame
with 3 columns: the taxonomy rank, the name, and unique name
for all taxa included in the lineage of the taxon up to the
root of the tree.
}
\description{
Extract the lineage information (higher taxonomy) from an object
returned by \code{\link{taxonomy_taxon_info}}.
}
\details{
The object passed to this function must have been created using
the argument \code{include_lineage=TRUE}.
}
rotl/man/list_trees.Rd 0000644 0001775 0000144 00000002441 13055075704 015000 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies-methods.R
\name{list_trees}
\alias{list_trees}
\alias{list_trees.matched_studies}
\title{List trees ids in objects returned by
\code{\link{studies_find_studies}} and
\code{\link{studies_find_trees}}.}
\usage{
list_trees(matched_studies, ...)
\method{list_trees}{matched_studies}(matched_studies, study_id, ...)
}
\arguments{
\item{matched_studies}{an object created by
\code{studies_find_trees} or \code{studies_find_studies}.}
\item{...}{Currently unused}
\item{study_id}{a \code{study_id} listed in the object returned by
\code{studies_find_trees}}
}
\value{
\code{list_trees} returns a list of the tree_ids for each
study that match the requested criteria. If a \code{study_id}
is provided, then only the trees for this study are returned
as a vector.
}
\description{
\code{list_trees} returns all trees associated with a particular
study when used on an object returned by
\code{\link{studies_find_studies}}, but only the trees that match
the search criteria when used on objects returned by
\code{\link{studies_find_trees}}.
}
\seealso{
\code{\link{studies_find_studies}} and
\code{\link{studies_find_trees}}. The help for these functions
have examples demonstrating the use of \code{list_trees}.
}
rotl/man/study_external_IDs.Rd 0000644 0001775 0000144 00000002564 13055075704 016442 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/external_data.R
\name{study_external_IDs}
\alias{study_external_IDs}
\title{Get external identifiers for data associated with an Open Tree study}
\usage{
study_external_IDs(study_id)
}
\arguments{
\item{study_id}{An open tree study ID}
}
\value{
A study_external_data object (which inherits from a list) which
contains some of the following.
doi, character, the DOI for the paper describing this study
external_data_url, character, a URL to an external data repository
(e.g. a treebase entry) if one exists.
pubmed_id character, the unique ID for this study in the NCBI's pubmed database
popset_ids character, vector of IDs for the NCBI's popset database
nucleotide_ids character, vector of IDs for the NCBI's nucleotide database
}
\description{
Data associated with studies contributing to the Open Tree synthesis may
be available from other databases. In particular, trees and alignments
may be available from treebase and DNA sequences and bibliographic
information associated with a given study may be available from the NCBI.
This function retrieves that information for a given study.
}
\examples{
\dontrun{
flies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Drosophilidae")
study_external_IDs(flies[2,]$study_ids)
}
}
\seealso{
studies_find_studies (used to discover study IDs)
}
rotl/man/taxonomy_subtree.Rd 0000644 0001775 0000144 00000004366 13055075704 016242 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/taxonomy.R
\name{taxonomy_subtree}
\alias{taxonomy_subtree}
\title{Taxonomy subtree}
\usage{
taxonomy_subtree(ott_id = NULL, output_format = c("taxa", "newick", "phylo",
"raw"), label_format = NULL, file, ...)
}
\arguments{
\item{ott_id}{The ott id of the taxon of interest.}
\item{output_format}{the format of the object to be returned. See
the \sQuote{Return} section.}
\item{label_format}{Character. Defines the label type; one of
\dQuote{\code{name}}, \dQuote{\code{id}}, or
\dQuote{\code{name_and_id}} (the default).}
\item{file}{the file name where to save the output of the
function. Ignored unless \code{output_format} is set to
\dQuote{\code{phylo}}.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
If the \code{file} argument is missing: \itemize{
\item{\dQuote{\code{taxa}}} { a list of the taxa names
(species) in slot \code{tip_label}, and higher-level taxanomy
(e.g., families, genera) in slot \code{edge_label}, descending
from the taxa corresponding to the \code{ott_id} provided. }
\item{\dQuote{\code{newick}}} { a character vector containing
the newick formatted string corresponding to the taxonomic
subtree for the \code{ott_id} provided. }
\item{\dQuote{\code{phylo}}} { an object of the class
\code{phylo} from the \code{\link[ape]{ape}} package. }
\item{\dQuote{\code{raw}}} { the direct output from the API,
i.e., a list with an element named \sQuote{newick} that
contains the subtree as a newick formatted string. }
}
If a \code{file} argument is provided (and
\code{output_format} is set to \dQuote{\code{phylo}}), a
logical indicating whether the file was successfully created.
}
\description{
Given an ott id, return the inclusive taxonomic subtree descended
from the specified taxon.
}
\details{
If the output of this function is exported to a file, the only
possible value for the \code{output_format} argument is
\dQuote{\code{newick}}. If the file provided already exists, it
will be silently overwritten.
}
\examples{
\dontrun{
req <- taxonomy_subtree(ott_id=515698)
plot(taxonomy_subtree(ott_id=515698, output_format="phylo"))
}
}
rotl/man/tol_node_info.Rd 0000644 0001775 0000144 00000011572 13055075704 015446 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R, R/tol.R
\name{tol_lineage}
\alias{tol_lineage}
\alias{tol_node_info}
\alias{tax_rank.tol_node}
\alias{tax_sources.tol_node}
\alias{unique_name.tol_node}
\alias{tax_name.tol_node}
\alias{ott_id.tol_node}
\alias{source_list.tol_node}
\alias{tax_lineage.tol_node}
\alias{tol_lineage.tol_node}
\title{Node info}
\usage{
tol_lineage(tax, ...)
tol_node_info(ott_id = NULL, node_id = NULL, include_lineage = FALSE, ...)
\method{tax_rank}{tol_node}(tax, ...)
\method{tax_sources}{tol_node}(tax, ...)
\method{unique_name}{tol_node}(tax, ...)
\method{tax_name}{tol_node}(tax, ...)
\method{ott_id}{tol_node}(tax, ...)
\method{source_list}{tol_node}(tax, ...)
\method{tax_lineage}{tol_node}(tax, ...)
\method{tol_lineage}{tol_node}(tax, ...)
}
\arguments{
\item{tax}{an object returned by \code{tol_node_info}.}
\item{...}{additional arguments to customize the API call (see
?rotl for more information)}
\item{ott_id}{Numeric. The OpenTree taxonomic identifier.}
\item{node_id}{Character. The OpenTree node identifier.}
\item{include_lineage}{Logical (default = FALSE). Whether to return the
lineage of the node from the synthetic tree.}
}
\value{
\code{tol_node_info} returns an invisible list of summary
information about the queried node:
\itemize{
\item {node_id} {String. The canonical identifier of the node.}
\item {num_tips} {Numeric. The number of descendent tips.}
\item {taxon} {A list of taxonomic properties. Only returned if
the queried node is a taxon. Each source has:}
\itemize{
\item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
\item {name} {String. The taxonomic name of the queried node.}
\item {unique_name} {String. The string that uniquely
identifies the taxon in OTT.}
\item {rank} {String. The taxonomic rank of the taxon in OTT.}
\item {tax_sources} {List. A list of identifiers for taxonomic
sources, such as other taxonomies, that define taxa judged
equivalent to this taxon.}
}
The following properties list support/conflict for the node across
synthesis source trees. All properties involve sourceid keys and
nodeid values (see \code{source_id_map} below).
\item {partial_path_of} {List. The edge below this synthetic tree node
is compatible with the edge below each of these input tree nodes (one
per tree). Each returned element is reported as sourceid:nodeid.}
\item {supported_by} {List. Input tree nodes (one per tree) that support
this synthetic tree node. Each returned element is reported as
sourceid:nodeid.}
\item {terminal} {List. Input tree nodes (one per tree) that are equivalent
to this synthetic tree node (via an exact mapping, or the input tree
terminal may be the only terminal descended from this synthetic tree node.
Each returned element is reported as sourceid:nodeid.}
\item {conflicts_with} {Named list of lists. Names correspond to
sourceid keys. Each list contains input tree node ids (one or more per tree)
that conflict with this synthetic node.}
\item {source_id_map} {Named list of lists. Names correspond to the
sourceid keys used in the 4 properties above. Source trees will have the
following properties:}
\itemize{
\item {git_sha} {The git SHA identifying a particular source
version.}
\item {tree_id} {The tree id associated with the study id used.}
\item {study_id} {The study identifier. Will typically include
a prefix ("pg_" or "ot_").}
}
The only sourceid that does not correspond to a source tree is the taxonomy,
which will have the name "ott"+`taxonomy_version`, and the value is the
ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
appear in \code{supported_by}.
}
\code{tol_lineage} and \code{tax_lineage} return data
frames. \code{tol_lineage} indicate for each ancestor its
node identifier, the number of tips descending from that
node, and whether it corresponds to a taxonomic level.
}
\description{
Get summary information about a node in the synthetic tree
}
\details{
Returns summary information about a node in the graph. The
node of interest may be specified using either a node id or an
taxon id, but not both. If the specified node or OTT id is not
in the graph, an error will be returned.
If the argument \code{include_lineage=TRUE} is used, you can
use \code{tax_lineage()} or \code{tol_lineage} to return the
taxonomic information or the node information for all the
ancestors to this node, down to the root of the tree.
}
\examples{
\dontrun{
birds <- tol_node_info(ott_id=81461, include_lineage=TRUE)
source_list(birds)
tax_rank(birds)
ott_id(birds)
tax_lineage(birds)
tol_lineage(birds)}
}
rotl/man/tnrs_infer_context.Rd 0000644 0001775 0000144 00000001771 13055075704 016545 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tnrs.R
\name{tnrs_infer_context}
\alias{tnrs_infer_context}
\title{Infer the taxonomic context from a list of names}
\usage{
tnrs_infer_context(names = NULL, ...)
}
\arguments{
\item{names}{Vector of taxon names.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
A list including the context name, the context ott id and
possibly the names in the query that have an ambiguous
taxonomic meaning in the query.
}
\description{
Return a taxonomic context given a list of taxonomic names
}
\details{
Find the least inclusive taxonomic context that includes all the
unambiguous names in the input set. Unambiguous names are names
with exact matches to non-homonym taxa. Ambiguous names (those
without exact matches to non-homonym taxa) are indicated in
results.
}
\examples{
\dontrun{
res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus"))
}
}
rotl/man/source_list.Rd 0000644 0001775 0000144 00000001511 13055075704 015153 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R, R/tol.R
\name{source_list}
\alias{source_list}
\alias{source_list.tol_summary}
\title{List of studies used in the Tree of Life}
\usage{
source_list(tax, ...)
\method{source_list}{tol_summary}(tax, ...)
}
\arguments{
\item{tax}{a list containing a \code{source_id_map} slot.}
\item{...}{additional arguments (currently unused)}
}
\value{
a data frame
}
\description{
Retrieve the detailed information for the list of studies used in
the Tree of Life.
}
\details{
This function takes the object resulting from
\code{tol_about(study_list = TRUE)}, \code{tol_mrca()},
\code{tol_node_info()}, and returns a data frame listing the
\code{tree_id}, \code{study_id} and \code{git_sha} for the
studies currently included in the Tree of Life.
}
rotl/man/taxonomy_about.Rd 0000644 0001775 0000144 00000002125 13055075704 015672 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/taxonomy.R
\name{taxonomy_about}
\alias{taxonomy_about}
\title{Information about the Open Tree Taxonomy}
\usage{
taxonomy_about(...)
}
\arguments{
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
A list with the following properties:
\itemize{
\item {weburl} {String. The release page for this version
of the taxonomy.}
\item {author} {String. The author string.}
\item {name} {String. The name of the taxonomy.}
\item {source} {String. The full identifying information for
this version of the taxonomy.}
\item {version} {String. The version number of the taxonomy.}
}
}
\description{
Summary information about the Open Tree Taxaonomy (OTT)
}
\details{
Return metadata and information about the taxonomy
itself. Currently, the available metadata is fairly sparse, but
includes (at least) the version, and the location from which the
complete taxonomy source files can be downloaded.
}
\examples{
\dontrun{
taxonomy_about()
}
}
rotl/man/studies_find_trees.Rd 0000644 0001775 0000144 00000005615 13055075704 016513 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies.R
\name{studies_find_trees}
\alias{studies_find_trees}
\title{Find Trees}
\usage{
studies_find_trees(property = NULL, value = NULL, verbose = FALSE,
exact = FALSE, detailed = TRUE, ...)
}
\arguments{
\item{property}{The property to be searched on (character)}
\item{value}{The property-value to be searched on (character)}
\item{verbose}{Should the output include all metadata? (logical,
default \code{FALSE})}
\item{exact}{Should exact matching be used for the value?
(logical, default \code{FALSE})}
\item{detailed}{Should a detailed report be provided? If
\code{TRUE} (default), the output will include metadata about
the study that include trees matching the property. Otherwise,
only information about the trees will be provided.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
A data frame that summarizes the trees found (and their
associated studies) for the requested criteria. If a study has
more than 5 trees, the \code{tree_ids} of the first ones will
be shown, followed by \code{...} to indicate that more are
present.
If \code{detailed=FALSE}, the data frame will include the
study ids of the study (\code{study_ids}), the number of trees
in this study that match the search criteria
(\code{n_matched_trees}), the tree ids that match the search
criteria (\code{match_tree_ids}).
If \code{detailed=TRUE}, in addition of the fields listed
above, the data frame will also contain the total number of
trees associated with the study (\code{n_trees}), all the tree
ids associated with the study (\code{tree_ids}), the tree id
that is a potential candidate for inclusion in the synthetic
tree (if any) (\code{candidate}), the year the study was
published (\code{study_year}), the title of the study
(\code{title}), the DOI for the study (\code{study_doi}).
}
\description{
Return a list of studies for which trees match a given set of
properties
}
\details{
The list of possible values to be used as values for the argument
\code{property} can be found using the function
\code{\link{studies_properties}}.
}
\examples{
\dontrun{
res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
detailed=FALSE)
## summary of the trees and associated studies that match this criterion
res
## With metadata about the studies (default)
res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
detailed=TRUE)
## The list of trees for each study that match the search criteria
list_trees(res)
## the trees for a given study
list_trees(res, study_id = "pg_2769")
}
}
\seealso{
\code{\link{studies_properties}} which lists properties
the studies can be searched on. \code{\link{list_trees}} for
listing the trees that match the query.
}
rotl/man/match_names.Rd 0000644 0001775 0000144 00000005002 13055075704 015076 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/match_names.R
\name{inspect.match_names}
\alias{inspect.match_names}
\alias{inspect}
\alias{update.match_names}
\title{Inspect and Update alternative matches for a name returned
by tnrs_match_names}
\usage{
\method{inspect}{match_names}(response, row_number, taxon_name, ott_id, ...)
inspect(response, ...)
\method{update}{match_names}(object, row_number, taxon_name, ott_id,
new_row_number, new_ott_id, ...)
}
\arguments{
\item{response}{an object generated by the
\code{\link{tnrs_match_names}} function}
\item{row_number}{the row number corresponding to the name to
inspect}
\item{taxon_name}{the taxon name corresponding to the name to
inspect}
\item{ott_id}{the ott id corresponding to the name to inspect}
\item{...}{currently ignored}
\item{object}{an object created by \code{\link{tnrs_match_names}}}
\item{new_row_number}{the row number in the output of
\code{\link{inspect}} to replace the taxa specified by
\code{row_number}, \code{taxon_name}, or \code{ott_id}.}
\item{new_ott_id}{the ott id of the taxon to replace the taxa
specified by \code{row_number}, \code{taxon_name}, or
\code{ott_id}.}
}
\value{
a data frame
}
\description{
Taxonomic names may have different meanings in different taxonomic
contexts, as the same genus name can be applied to animals and
plants for instance. Additionally, the meaning of a taxonomic name
may have change throughout its history, and may have referred to a
different taxon in the past. In such cases, a given names might
have multiple matches in the Open Tree Taxonomy. These functions
allow users to inspect (and update) alternative meaning of a given
name and its current taxonomic status according to the Open Tree
Taxonomy.
}
\details{
To inspect alternative taxonomic meanings of a given name, you
need to provide the object resulting from a call to the
tnrs_match_names function, as well as one of either the row number
corresponding to the name in this object, the name itself (as used
in the original query), or the ott_id listed for this name.
To update one of the name, you also need to provide the row number
in which the name to be replaced appear or its ott id.
}
\examples{
\dontrun{
matched_names <- tnrs_match_names(c("holothuria", "diadema", "boletus"))
inspect(matched_names, taxon_name="diadema")
new_matched_names <- update(matched_names, taxon_name="diadema",
new_ott_id = 631176)
new_matched_names
}
}
\seealso{
\code{\link{tnrs_match_names}}
}
rotl/man/studies_properties.Rd 0000644 0001775 0000144 00000002203 13055075704 016553 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies.R
\name{studies_properties}
\alias{studies_properties}
\title{Properties of the Studies}
\usage{
studies_properties(...)
}
\arguments{
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
A list of the study properties that can be used to find
studies and trees that are contributing to the synthetic tree.
}
\description{
Return the list of study properties that can be used to search
studies and trees used in the synthetic tree.
}
\details{
The list returned has 2 elements \code{tree_properties} and
\code{studies_properties}. Each of these elements lists additional
arguments to customize the API request properties that can be used
to search for trees and studies that are contributing to the
synthetic tree. The definitions of these properties are available
from
\url{https://github.com/OpenTreeOfLife/phylesystem-api/wiki/NexSON}
}
\examples{
\dontrun{
all_the_properties <- studies_properties()
unlist(all_the_properties$tree_properties)
}
}
\seealso{
\code{\link{studies_find_trees}}
}
rotl/man/get_study.Rd 0000644 0001775 0000144 00000003744 13055075704 014641 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies.R
\name{get_study}
\alias{get_study}
\title{Get all the trees associated with a particular study}
\usage{
get_study(study_id = NULL, object_format = c("phylo", "nexml"), file_format,
file, ...)
}
\arguments{
\item{study_id}{the study ID for the study of interest (character)}
\item{object_format}{the class of the object the query should
return (either \code{phylo} or \code{nexml}). Ignored if
\code{file_format} is specified.}
\item{file_format}{the format of the file to be generated
(\code{newick}, \code{nexus}, \code{nexml} or \code{json}).}
\item{file}{the file name where the output of the function will be
saved.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
if \code{file_format} is missing, an object of class
\code{phylo} or \code{nexml}, otherwise a logical indicating
whether the file was successfully created.
}
\description{
Returns the trees associated with a given study
}
\details{
If \code{file_format} is missing, the function returns an object
of the class \code{phylo} from the \code{\link[ape]{ape}} package
(default), or an object of the class \code{nexml} from the
\code{RNeXML} package.
Otherwise \code{file_format} can be either \code{newick},
\code{nexus}, \code{nexml} or \code{json}, and the function will
generate a file of the selected format. In this case, a file name
needs to be provided using the argument \code{file}. If a file
with the same name already exists, it will be silently
overwritten.
}
\examples{
\dontrun{
that_one_study <- get_study(study_id="pg_719", object_format="phylo")
if (require(RNeXML)) { ## if RNeXML is installed get the object directly
nexml_study <- get_study(study_id="pg_719", object_format="nexml")
} else { ## otherwise write it to a file
get_study(study_id="pg_719", file_format="nexml", file=tempfile(fileext=".nexml"))
}
}
}
\seealso{
\code{\link{get_study_meta}}
}
rotl/man/tol_mrca.Rd 0000644 0001775 0000144 00000013434 13055075704 014427 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tol.R
\name{tol_mrca}
\alias{tol_mrca}
\alias{tax_sources.tol_mrca}
\alias{unique_name.tol_mrca}
\alias{tax_name.tol_mrca}
\alias{tax_rank.tol_mrca}
\alias{ott_id.tol_mrca}
\alias{source_list.tol_mrca}
\title{MRCA of taxa from the synthetic tree}
\usage{
tol_mrca(ott_ids = NULL, node_ids = NULL, ...)
\method{tax_sources}{tol_mrca}(tax, ...)
\method{unique_name}{tol_mrca}(tax, ...)
\method{tax_name}{tol_mrca}(tax, ...)
\method{tax_rank}{tol_mrca}(tax, ...)
\method{ott_id}{tol_mrca}(tax, ...)
\method{source_list}{tol_mrca}(tax, ...)
}
\arguments{
\item{ott_ids}{Numeric vector. The ott ids for which the MRCA is desired.}
\item{node_ids}{Character vector. The node ids for which the MRCA is desired.}
\item{...}{additional arguments to customize the API call (see
\code{\link{rotl}} for more information).}
\item{tax}{an object returned by \code{tol_mrca()}.}
}
\value{
An invisible list of the MRCA node properties:
\itemize{
\item {mrca} {List of node properties.}
\itemize{
\item {node_id} {String. The canonical identifier of the node.}
\item {num_tips} {Numeric. The number of descendent tips.}
\item {taxon} {A list of taxonomic properties. Only returned if
the queried node is a taxon. (If the node is not a taxon, a
\code{nearest_taxon} list is returned (see below)).}
\itemize{
\item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
\item {name} {String. The taxonomic name of the queried node.}
\item {unique_name} {String. The string that uniquely
identifies the taxon in OTT.}
\item {rank} {String. The taxonomic rank of the taxon in OTT.}
\item {tax_sources} {List. A list of identifiers for taxonomic
sources, such as other taxonomies, that define taxa judged
equivalent to this taxon.}
}
The following properties list support/conflict for the node across
synthesis source trees. All properties involve sourceid keys and
nodeid values (see \code{source_id_map} below) Not all properties are
are present for every node.
\item {partial_path_of} {List. The edge below this synthetic tree node
is compatible with the edge below each of these input tree nodes (one
per tree). Each returned element is reported as sourceid:nodeid.}
\item {supported_by} {List. Input tree nodes (one per tree) that support
this synthetic tree node. Each returned element is reported as
sourceid:nodeid.}
\item {terminal} {List. Input tree nodes (one per tree) that are equivalent
to this synthetic tree node (via an exact mapping, or the input tree
terminal may be the only terminal descended from this synthetic tree node.
Each returned element is reported as sourceid:nodeid.}
\item {conflicts_with} {Named list of lists. Names correspond to
sourceid keys. Each list contains input tree node ids (one or more per tree)
that conflict with this synthetic node.}
}
\item {nearest_taxon} {A list of taxonomic properties of the nearest rootward
taxon node to the MRCA node. Only returned if the MRCA node is a not taxon
(otherwise the \code{taxon} list above is returned).}
\itemize{
\item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
\item {name} {String. The taxonomic name of the queried node.}
\item {unique_name} {String. The string that uniquely
identifies the taxon in OTT.}
\item {rank} {String. The taxonomic rank of the taxon in OTT.}
\item {tax_sources} {List. A list of identifiers for taxonomic
sources, such as other taxonomies, that define taxa judged
equivalent to this taxon.}
}
\item {source_id_map} {Named list of lists. Names correspond to the
sourceid keys used in the support/conflict properties of the \code{mrca}
list above. Source trees will have the following properties:}
\itemize{
\item {git_sha} {The git SHA identifying a particular source
version.}
\item {tree_id} {The tree id associated with the study id used.}
\item {study_id} {The study identifier. Will typically include
a prefix ("pg_" or "ot_").}
}
The only sourceid that does not correspond to a source tree is the taxonomy,
which will have the name "ott"+`taxonomy_version`, and the value is the
ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
appear in \code{supported_by}.
}
}
\description{
Most Recent Common Ancestor for a set of nodes
}
\details{
Get the MRCA of a set of nodes on the current synthetic
tree. Accepts any combination of node ids and ott ids as
input. Returns information about the most recent common
ancestor (MRCA) node as well as the most recent taxonomic
ancestor (MRTA) node (the closest taxonomic node to the MRCA
node in the synthetic tree; the MRCA and MRTA may be the same
node). If they are the same, the taxonomic information will be
in the \code{mrca} slot, otherwise they will be in the
\code{nearest_taxon} slot of the list. If any of the specified
nodes is not in the synthetic tree an error will be returned.
Taxonomic methods (\code{tax_sources()}, \code{ott_id()},
\code{unique_name()}, ...) are availble on the objects
returned by \code{tol_mrca()}. If the MRCA node is MRTA, the
name of the object returned by these methods will start with
\sQuote{ott}, otherwise it will start with \sQuote{mrca}.
}
\examples{
\dontrun{
birds_mrca <- tol_mrca(ott_ids=c(412129, 536234))
ott_id(birds_mrca)
tax_sources(birds_mrca)}
}
rotl/man/synonyms.match_names.Rd 0000644 0001775 0000144 00000003467 13055075704 017011 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/match_names.R
\name{synonyms.match_names}
\alias{synonyms.match_names}
\title{List the synonyms for a given name}
\usage{
\method{synonyms}{match_names}(tax, row_number, taxon_name, ott_id, ...)
}
\arguments{
\item{tax}{a data frame generated by the
\code{\link{tnrs_match_names}} function}
\item{row_number}{the row number corresponding to the name for
which to list the synonyms}
\item{taxon_name}{the taxon name corresponding to the name for
which to list the synonyms}
\item{ott_id}{the ott id corresponding to the name for which to
list the synonyms}
\item{...}{currently ignored}
}
\value{
a list whose elements are all synomym names (as vectors of
character) for the taxonomic names that match the query (the
names of the elements of the list).
}
\description{
When querying the Taxonomic Name Resolution Services for a
particular taxonomic name, the API returns as possible matches all
names that include the queried name as a possible synonym. This
function allows you to explore other synonyms for an accepted
name, and allows you to determine why the name you queried is
returning an accepted synonym.
}
\details{
To list synonyms for a given taxonomic name, you need to provide
the object resulting from a call to the
\code{\link{tnrs_match_names}} function, as well as one of either
the row number corresponding to the name in this object, the name
itself (as used in the original query), or the ott_id listed for
this name. Otherwise, the synonyms for all the currently matched
names are returned.
}
\examples{
\dontrun{
echino <- tnrs_match_names(c("Diadema", "Acanthaster", "Fromia"))
## These 3 calls are identical
synonyms(echino, taxon_name="Acanthaster")
synonyms(echino, row_number=2)
synonyms(echino, ott_id=337928)
}
}
rotl/man/tol_subtree.Rd 0000644 0001775 0000144 00000003017 13055075704 015152 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tol.R
\name{tol_subtree}
\alias{tol_subtree}
\title{Extract a subtree from the synthetic tree}
\usage{
tol_subtree(ott_id = NULL, node_id = NULL, label_format = NULL, file, ...)
}
\arguments{
\item{ott_id}{Numeric. The ott id of the node in the tree that should
serve as the root of the tree returned.}
\item{node_id}{Character. The node id of the node in the tree that should
serve as the root of the tree returned.}
\item{label_format}{Character. Defines the label type; one of
\dQuote{\code{name}}, \dQuote{\code{id}}, or
\dQuote{\code{name_and_id}} (the default).}
\item{file}{If specified, the function will write the subtree to a
file in newick format.}
\item{...}{additional arguments to customize the API call (see
\code{\link{rotl}} for more information).}
}
\value{
If no value is specified to the \code{file} argument
(default), a phyogenetic tree of class \code{phylo}.
Otherwise, the function returns invisibly a logical indicating
whether the file was successfully created.
}
\description{
Extract a subtree from the synthetic tree from an Open Tree node id.
}
\details{
Given a node, return the subtree of the synthetic tree descended
from that node. The start node may be specified using either a node id
or an ott id, but not both. If the specified node is not in the
synthetic tree an error will be returned. There is a size limit of
25000 tips for this method.
}
\examples{
\dontrun{
res <- tol_subtree(ott_id=241841)}
}
rotl/man/taxon_external_IDs.Rd 0000644 0001775 0000144 00000001676 13055075704 016426 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/external_data.R
\name{taxon_external_IDs}
\alias{taxon_external_IDs}
\title{Get external identifiers for data associated with an Open Tree taxon}
\usage{
taxon_external_IDs(taxon_id)
}
\arguments{
\item{taxon_id}{An open tree study ID}
}
\value{
a data.frame in which each row represents a unique record in an
external databse. The column "source" provides and abbreviated name for the
database, and "id" the unique ID for the record.
}
\description{
The Open Tree taxonomy is a synthesis of multiple reference taxonomies. This
function retrieves identifiers to external taxonomic records that have
contributed the rank, position and definition of a given Open Tree taxon.
}
\examples{
\dontrun{
gibbon_IDs <- taxon_external_IDs(712902)
}
}
\seealso{
tnrs_matchnames, which can be used to search for taxa by name.
taxonomy_taxon, for more information about a given taxon.
}
rotl/man/studies_find_studies.Rd 0000644 0001775 0000144 00000004762 13055075704 017053 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies.R
\name{studies_find_studies}
\alias{studies_find_studies}
\title{Find a Study}
\usage{
studies_find_studies(property = NULL, value = NULL, verbose = FALSE,
exact = FALSE, detailed = TRUE, ...)
}
\arguments{
\item{property}{The property to be searched on (character)}
\item{value}{The property value to be searched on (character)}
\item{verbose}{Should the output include all metadata (logical
default \code{FALSE})}
\item{exact}{Should exact matching be used? (logical, default
\code{FALSE})}
\item{detailed}{If \code{TRUE} (default), the function will return
a data frame that summarizes information about the study (see
\sQuote{Value}). Otherwise, it only returns the study
identifiers.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
If \code{detailed=TRUE}, the function returns a data frame
listing the study id (\code{study_ids}), the number of trees
associated with this study (\code{n_trees}), the tree ids (at
most 5) associated with the studies (\code{tree_ids}), the
tree id that is a candidate for the synthetic tree if any
(\code{candidate}), the year of publication of the study
(\code{study_year}), the title of the publication for the
study (\code{title}), and the DOI (Digital Object Identifier)
for the study (\code{study_doi}).
If \code{detailed=FALSE}, the function returns a data frame
with a single column containing the study identifiers.
}
\description{
Return the identifiers of studies that match given properties
}
\examples{
\dontrun{
## To match a study for which the identifier is already known
one_study <- studies_find_studies(property="ot:studyId", value="pg_719")
list_trees(one_study)
## To find studies pertaining to Mammals
mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
value="mammalia")
## To extract the tree identifiers for each of the studies
list_trees(mammals)
## ... or for a given study
list_trees(mammals, "ot_308")
## Just the identifiers without other information about the studies
mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
value="mammalia", detailed=FALSE)
}
}
\seealso{
\code{\link{studies_properties}} which lists properties
against which the studies can be
searched. \code{\link{list_trees}} that returns a list for all
tree ids associated with a study.
}
rotl/man/strip_ott_ids.Rd 0000644 0001775 0000144 00000001653 13055075704 015515 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tol.R
\name{strip_ott_ids}
\alias{strip_ott_ids}
\title{Strip OTT ids from tip labels}
\usage{
strip_ott_ids(tip_labels, remove_underscores = FALSE)
}
\arguments{
\item{tip_labels}{a character vector containing tip labels (most
likely the \code{tip.label} element from a tree returned by
\code{\link{tol_induced_subtree}}}
\item{remove_underscores}{logical (defaults to FALSE). If set to
TRUE underscores in tip labels are converted to spaces}
}
\value{
A character vector containing the contents of
\code{tip_labels} with any OTT ids removed.
}
\description{
Strip OTT ids from tip labels
}
\examples{
\dontrun{
genera <- c("Perdix", "Dendroica", "Cinclus", "Selasphorus", "Struthio")
tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 102710))
tr$tip.label \%in\% genera
tr$tip.label <- strip_ott_ids(tr$tip.label)
tr$tip.label \%in\% genera}
}
rotl/man/taxonomy_mrca.Rd 0000644 0001775 0000144 00000003316 13055075704 015505 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/taxonomy.R
\name{taxonomy_mrca}
\alias{taxonomy_mrca}
\alias{tax_rank.taxon_mrca}
\alias{tax_name.taxon_mrca}
\alias{ott_id.taxon_mrca}
\alias{unique_name.taxon_mrca}
\alias{tax_sources.taxon_mrca}
\alias{flags.taxon_mrca}
\alias{is_suppressed.taxon_mrca}
\title{Taxonomic MRCA}
\usage{
taxonomy_mrca(ott_ids = NULL, ...)
\method{tax_rank}{taxon_mrca}(tax, ...)
\method{tax_name}{taxon_mrca}(tax, ...)
\method{ott_id}{taxon_mrca}(tax, ...)
\method{unique_name}{taxon_mrca}(tax, ...)
\method{tax_sources}{taxon_mrca}(tax, ...)
\method{flags}{taxon_mrca}(tax, ...)
\method{is_suppressed}{taxon_mrca}(tax, ...)
}
\arguments{
\item{ott_ids}{a vector of ott ids for the taxa whose MRCA is to
be found (numeric).}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
\item{tax}{an object generated by the \code{taxonomy_mrca}
function}
}
\value{
\itemize{
\item{\code{taxonomy_mrca}} { returns a list about the
taxonomic information relating to the MRCA for the ott_ids
provided. }
\item{\code{tax_rank}} { returns a character vector of the
taxonomic rank for the MRCA. }
\item{\code{tax_name}} { returns a character vector the
Open Tree Taxonomy name for the MRCA. }
\item{\code{ott_id}} { returns a numeric vector of the ott id
for the MRCA. }
}
}
\description{
Taxonomic Least Inclusive Common Ancestor (MRCA)
}
\details{
Given a set of OTT ids, get the taxon that is the most recent common
ancestor (the MRCA) of all the identified taxa.
}
\examples{
\dontrun{
req <- taxonomy_mrca(ott_ids=c(515698,590452,643717))
tax_rank(req)
tax_name(req)
ott_id(req)
}
}
rotl/man/is_in_tree.Rd 0000644 0001775 0000144 00000002631 13056074002 014733 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/is_in_tree.R
\name{is_in_tree}
\alias{is_in_tree}
\title{Check that OTT ids occur in the Synthetic Tree}
\usage{
is_in_tree(ott_ids, ...)
}
\arguments{
\item{ott_ids}{a vector of Open Tree Taxonomy identifiers}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
A named logical vector. \code{TRUE} indicates that the OTT
id is in the synthetic tree, and \code{FALSE} that it is not.
}
\description{
Some valid taxonomic names do not occur in the Synthetic
Tree. This convenience function allows you to check whether a
given Open Tree Taxonomy identifier (OTT id) is in the tree. A taxonomic
name may not occur in the synthetic tree because (1) it is an
extinct or invalid taxon, or (2) it is part of a group that is not
monophyletic in the tree.
}
\examples{
\dontrun{
plant_families <- c("Asteraceae", "Solanaceae", "Poaceae", "Amaranthaceae",
"Zamiaceae", "Araceae", "Juncaceae")
matched_names <- tnrs_match_names(plant_families)
## This fails because some ott ids are not in the tree
## plant_tree <- tol_induced_subtree(ott_id(matched_names))
## So let's check which ones are actually in the tree first:
in_tree <- is_in_tree(ott_id(matched_names))
## This now works:
plant_tree <- tol_induced_subtree(ott_id(matched_names)[in_tree])
}
}
rotl/man/get_study_tree.Rd 0000644 0001775 0000144 00000004343 13055075704 015654 0 ustar deepayan users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/studies.R
\name{get_study_tree}
\alias{get_study_tree}
\title{Study Tree}
\usage{
get_study_tree(study_id = NULL, tree_id = NULL,
object_format = c("phylo"), tip_label = c("original_label", "ott_id",
"ott_taxon_name"), file_format, file, deduplicate = TRUE, ...)
}
\arguments{
\item{study_id}{the identifier of a study (character)}
\item{tree_id}{the identifier of a tree within the study}
\item{object_format}{the class of the object to be returned
(default and currently only possible value \code{phylo} from
the \code{\link[ape]{ape}} package).}
\item{tip_label}{the format of the tip
labels. \dQuote{\code{original_label}} (default) returns the
original labels as provided in the study,
\dQuote{\code{ott_id}} labels are replaced by their ott IDs,
\dQuote{\code{ott_taxon_name}} labels are replaced by their
Open Tree Taxonomy taxon name.}
\item{file_format}{the format of the file to be generated
(\code{newick} default, \code{nexus}, or \code{json}).}
\item{file}{the file name where the output of the function will be
saved.}
\item{deduplicate}{logical (default \code{TRUE}). If the tree
returned by the study contains duplicated taxon names, should they
be made unique? It is normally illegal for NEXUS/Newick tree
strings to contain duplicated tip names. This is a workaround to
circumvent this requirement. If \code{TRUE}, duplicated tip labels
will be appended \code{_1}, \code{_2}, etc.}
\item{...}{additional arguments to customize the API request (see
\code{\link{rotl}} package documentation).}
}
\value{
if \code{file_format} is missing, an object of class
\code{phylo}, otherwise a logical indicating whether the file
was successfully created.
}
\description{
Returns a specific tree from within a study
}
\examples{
\dontrun{
tree <- get_study_tree(study_id="pg_1144", tree_id="tree2324")
## comparison of the first few tip labels depending on the options used
head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="original_label")$tip.label)
head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_id")$tip.label)
head(get_study_tree(study_id="pg_1144", tree_id="tree2324", tip_label="ott_taxon_name")$tip.label)
}
}
rotl/LICENSE 0000644 0001775 0000144 00000000117 13056156657 012574 0 ustar deepayan users YEAR: 2017
COPYRIGHT HOLDER: Francois Michonneau, Joseph W. Brown, David Winter