wikitaxa/ 0000755 0001777 0001777 00000000000 13216753634 013437 5 ustar herbrandt herbrandt wikitaxa/inst/ 0000755 0001777 0001777 00000000000 13216602015 014376 5 ustar herbrandt herbrandt wikitaxa/inst/doc/ 0000755 0001777 0001777 00000000000 13216602015 015143 5 ustar herbrandt herbrandt wikitaxa/inst/doc/wikitaxa_vignette.Rmd 0000644 0001777 0001777 00000005474 13071751010 021346 0 ustar herbrandt herbrandt --- title: "Introduction to the wikitaxa package" author: "Scott Chamberlain" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Introduction to the wikitaxa package} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo=FALSE} knitr::opts_chunk$set( comment = "#>", collapse = TRUE, warning = FALSE, message = FALSE ) ``` `wikitaxa` - Taxonomy data from Wikipedia The goal of `wikitaxa` is to allow search and taxonomic data retrieval from across many Wikimedia sites, including: Wikipedia, Wikicommons, and Wikispecies. There are lower level and higher level parts to the package API: ### Low level API The low level API is meant for power users and gives you more control, but requires more knowledge. * `wt_wiki_page()` * `wt_wiki_page_parse()` * `wt_wiki_url_build()` * `wt_wiki_url_parse()` * `wt_wikispecies_parse()` * `wt_wikicommons_parse()` * `wt_wikipedia_parse()` ### High level API The high level API is meant to be easier and faster to use. * `wt_data()` * `wt_data_id()` * `wt_wikispecies()` * `wt_wikicommons()` * `wt_wikipedia()` Search functions: * `wt_wikicommons_search()` * `wt_wikispecies_search()` * `wt_wikipedia_search()` ## Installation CRAN version ```{r eval=FALSE} install.packages("wikitaxa") ``` Dev version ```{r eval=FALSE} devtools::install_github("ropensci/wikitaxa") ``` ```{r} library("wikitaxa") ``` ## wiki data ```{r eval=FALSE} wt_data("Poa annua") ``` Get a Wikidata ID ```{r} wt_data_id("Mimulus foliatus") ``` ## wikipedia lower level ```{r} pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica") res <- wt_wiki_page_parse(pg) res$iwlinks ``` higher level ```{r} res <- wt_wikipedia("Malus domestica") res$common_names res$classification ``` choose a wikipedia language ```{r eval=FALSE} # French wt_wikipedia(name = "Malus domestica", wiki = "fr") # Slovak wt_wikipedia(name = "Malus domestica", wiki = "sk") # Vietnamese wt_wikipedia(name = "Malus domestica", wiki = "vi") ``` search ```{r} wt_wikipedia_search(query = "Pinus") ``` search supports languages ```{r eval=FALSE} wt_wikipedia_search(query = "Pinus", wiki = "fr") ``` ## wikicommons lower level ```{r} pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus") res <- wt_wikicommons_parse(pg) res$common_names[1:3] ``` higher level ```{r} res <- wt_wikicommons("Abelmoschus") res$classification res$common_names ``` search ```{r} wt_wikicommons_search(query = "Pinus") ``` ## wikispecies lower level ```{r} pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica") res <- wt_wikispecies_parse(pg, types = "common_names") res$common_names[1:3] ``` higher level ```{r} res <- wt_wikispecies("Malus domestica") res$classification res$common_names ``` search ```{r} wt_wikispecies_search(query = "Pinus") ``` wikitaxa/inst/doc/wikitaxa_vignette.html 0000644 0001777 0001777 00000125671 13216602015 021573 0 ustar herbrandt herbrandt
wikitaxa
- Taxonomy data from Wikipedia
The goal of wikitaxa
is to allow search and taxonomic data retrieval from across many Wikimedia sites, including: Wikipedia, Wikicommons, and Wikispecies.
There are lower level and higher level parts to the package API:
The low level API is meant for power users and gives you more control, but requires more knowledge.
wt_wiki_page()
wt_wiki_page_parse()
wt_wiki_url_build()
wt_wiki_url_parse()
wt_wikispecies_parse()
wt_wikicommons_parse()
wt_wikipedia_parse()
The high level API is meant to be easier and faster to use.
wt_data()
wt_data_id()
wt_wikispecies()
wt_wikicommons()
wt_wikipedia()
Search functions:
wt_wikicommons_search()
wt_wikispecies_search()
wt_wikipedia_search()
CRAN version
Dev version
Get a Wikidata ID
lower level
pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
res <- wt_wiki_page_parse(pg)
res$iwlinks
#> [1] "https://en.wiktionary.org/wiki/apple"
#> [2] "https://commons.wikimedia.org/wiki/Special:Search/Apple"
#> [3] "https://en.wikiquote.org/wiki/Apples"
#> [4] "https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Apple"
#> [5] "https://en.wikibooks.org/wiki/Apples"
#> [6] "https://species.wikimedia.org/wiki/Malus_domestica"
#> [7] "https://commons.wikimedia.org/wiki/Category:Apple_cultivars"
higher level
res <- wt_wikipedia("Malus domestica")
res$common_names
#> # A tibble: 1 x 2
#> name language
#> <chr> <chr>
#> 1 Apple en
res$classification
#> # A tibble: 3 x 2
#> rank name
#> <chr> <chr>
#> 1 plainlinks
#> 2 species M. pumila
#> 3 binomial Malus pumila
choose a wikipedia language
# French
wt_wikipedia(name = "Malus domestica", wiki = "fr")
# Slovak
wt_wikipedia(name = "Malus domestica", wiki = "sk")
# Vietnamese
wt_wikipedia(name = "Malus domestica", wiki = "vi")
search
wt_wikipedia_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 2912
#>
#>
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount
#> * <int> <chr> <int> <int> <int>
#> 1 0 Pine 39389 21808 2460
#> 2 0 List of Pinus species 448990 14070 984
#> 3 0 Pinus longaeva 649634 12794 1424
#> 4 0 Pinus ponderosa 532941 29851 2644
#> 5 0 Pinus mugo 438946 10733 808
#> 6 0 Bristlecone pine 215931 16321 1679
#> 7 0 Pinus nigra 438963 11476 1352
#> 8 0 Pinus thunbergii 1522846 4679 438
#> 9 0 Pinus contorta 507717 22621 2321
#> 10 0 Pinus sabiniana 427209 13352 1262
#> # ... with 2 more variables: snippet <chr>, timestamp <chr>
search supports languages
lower level
pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
res <- wt_wikicommons_parse(pg)
res$common_names[1:3]
#> [[1]]
#> [[1]]$name
#> [1] "okra"
#>
#> [[1]]$language
#> [1] "en"
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "مسكي"
#>
#> [[2]]$language
#> [1] "ar"
#>
#>
#> [[3]]
#> [[3]]$name
#> [1] "Abelmoş"
#>
#> [[3]]$language
#> [1] "az"
higher level
res <- wt_wikicommons("Abelmoschus")
res$classification
#> # A tibble: 15 x 2
#> rank name
#> <chr> <chr>
#> 1 Domain Eukaryota
#> 2 unranked Archaeplastida
#> 3 Regnum Plantae
#> 4 Cladus angiosperms
#> 5 Cladus eudicots
#> 6 Cladus core eudicots
#> 7 Cladus superrosids
#> 8 Cladus rosids
#> 9 Cladus eurosids II
#> 10 Ordo Malvales
#> 11 Familia Malvaceae
#> 12 Subfamilia Malvoideae
#> 13 Tribus Hibisceae
#> 14 Genus Abelmoschus
#> 15 Authority Medik. (1787)
res$common_names
#> # A tibble: 19 x 2
#> name language
#> <chr> <chr>
#> 1 okra en
#> 2 مسكي ar
#> 3 Abelmoş az
#> 4 Ibiškovec cs
#> 5 Bisameibisch de
#> 6 Okrat fi
#> 7 Abelmosco gl
#> 8 Abelmošus hr
#> 9 Ybiškė lt
#> 10 "അബെ\u0d7dമോസ്കസ്" ml
#> 11 Абельмош mrj
#> 12 Abelmoskusslekta nn
#> 13 Piżmian pl
#> 14 Абельмош ru
#> 15 موري sd
#> 16 Okrasläktet sv
#> 17 Абельмош udm
#> 18 Chi Vông vang vi
#> 19 黄葵属 zh
search
wt_wikicommons_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 261
#>
#>
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount
#> * <int> <chr> <int> <int> <int>
#> 1 0 Pinus 82071 4154 320
#> 2 0 Pinus × schwerinii 11923249 634 67
#> 3 0 Pinus nigra 64703 7775 501
#> 4 0 Spinus pinus 703299 1560 242
#> 5 0 Pinus cooperi 8853401 564 64
#> 6 0 Pinus distribution maps of North America 29464212 25971 92
#> 7 0 Pinus herrerae 29975479 206 28
#> 8 0 Pinus tabuliformis 235899 1739 138
#> 9 0 Pinus maximinoi 20376092 485 60
#> 10 0 Pinus pseudostrobus 9972866 756 83
#> # ... with 2 more variables: snippet <chr>, timestamp <chr>
lower level
pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
res <- wt_wikispecies_parse(pg, types = "common_names")
res$common_names[1:3]
#> [[1]]
#> [[1]]$name
#> [1] "Ябълка"
#>
#> [[1]]$language
#> [1] "български"
#>
#>
#> [[2]]
#> [[2]]$name
#> [1] "Poma, pomera"
#>
#> [[2]]$language
#> [1] "català"
#>
#>
#> [[3]]
#> [[3]]$name
#> [1] "Apfel"
#>
#> [[3]]$language
#> [1] "Deutsch"
higher level
res <- wt_wikispecies("Malus domestica")
res$classification
#> # A tibble: 8 x 2
#> rank name
#> <chr> <chr>
#> 1 Superregnum Eukaryota
#> 2 Regnum Plantae
#> 3 Cladus Angiosperms
#> 4 Cladus Eudicots
#> 5 Cladus Core eudicots
#> 6 Cladus Rosids
#> 7 Cladus Eurosids I
#> 8 Ordo Rosales
res$common_names
#> # A tibble: 19 x 2
#> name language
#> <chr> <chr>
#> 1 Ябълка български
#> 2 Poma, pomera català
#> 3 Apfel Deutsch
#> 4 Aed-õunapuu eesti
#> 5 Μηλιά Ελληνικά
#> 6 Apple English
#> 7 Manzano español
#> 8 Pomme français
#> 9 Melâr furlan
#> 10 사과나무 한국어
#> 11 ‘Āpala Hawaiʻi
#> 12 Melo italiano
#> 13 Aapel Nordfriisk
#> 14 Maçã, Macieira português
#> 15 Яблоня домашняя русский
#> 16 Tarhaomenapuu suomi
#> 17 Elma Türkçe
#> 18 Яблуня домашня українська
#> 19 Pomaro vèneto
search
wt_wikispecies_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#>
#> $continue
#> $continue$sroffset
#> [1] 10
#>
#> $continue$continue
#> [1] "-||"
#>
#>
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 400
#>
#>
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount
#> * <int> <chr> <int> <int> <int>
#> 1 0 Pinus 17362 1570 282
#> 2 0 Pinus nigra subsp. nigra 327138 1412 127
#> 3 0 Pinus subg. Pinus 300923 318 27
#> 4 0 Pinus clausa 45047 1520 210
#> 5 0 Pinus sect. Pinus 300935 623 68
#> 6 0 Pinus resinosa 45082 1195 165
#> 7 0 Pinus gordoniana 260795 594 61
#> 8 0 Pinus subsect. Pinus 300938 718 94
#> 9 0 Pinus thunbergii 73542 999 140
#> 10 0 Pinus sabiniana 45084 644 80
#> # ... with 2 more variables: snippet <chr>, timestamp <chr>